%root:main.tex %!TEX root=./main.tex \section{Hardness of Exact Computation} \label{sec:hard} In this section, we will prove the hardness results claimed in Table~\ref{tab:lbs} for a specific (family) of hard instances $(\qhard^k,\pdb)$ for \Cref{prob:bag-pdb-poly-expected} where $\pdb$ is a $1$-\abbrTIDB. Note that this implies hardness for \abbrCTIDB\xplural $\inparen{\bound\geq1}$ %; \Cref{prob:bag-pdb-poly-expected} cannot be done in $\bigO{\qruntime{\optquery{\query},\tupset,\bound}}$ runtime. The results also apply to as well as \abbrOneBIDB. % and other \abbrPDB\xplural. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %\subsection{Preliminaries}\label{sec:hard:sub:pre} Our hardness results are based on (exactly) counting the number of (not necessarily induced) subgraphs in $G$ isomorphic to $H$. Let $\numocc{G}{H}$ denote this quantity. We think of $H$ as being of constant size and $G$ as growing. In particular, we will consider computing the following counts (given $G$ in its adjacency list representation): $\numocc{G}{\tri}$ (the number of triangles), $\numocc{G}{\threedis}$ (the number of $3$-matchings), and the latter's generalization $\numocc{G}{\kmatch}$ (the number of $k$-matchings). We use $\kmatchtime$ to denote the optimal runtime of computing $\numocc{G}{\kmatch}$ exactly. Our results in \Cref{sec:multiple-p} are based on the following known (conditional) hardness results: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Theorem}[\cite{k-match}] \label{thm:k-match-hard} Given positive integer $k$ and undirected graph $G=(\vset,\edgeSet)$ with no self-loops or parallel edges, $\kmatchtime\ge \littleomega{f(k)\cdot |\edgeSet|^c}$ for any function $f$ and any constant $c$ independent of $\abs{E}$ and $k$ (assuming $\sharpwzero\ne\sharpwone$). \end{Theorem} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %\begin{hypo}\label{conj:known-algo-kmatch} %There exists an absolute constant $c_0>0$ such that for every $G=(\vset,\edgeSet)$, we have $\kmatchtime \ge \Omega\inparen{|E|^{c_0\cdot k}}$ for large enough $k$. %\end{hypo} %<<<<<<< HEAD %\begin{hypo}[~\cite{10.1109/FOCS.2014.22}]\label{conj:known-algo-kmatch} %For every $G=\inparen{\vset, \edgeSet}$, $\kmatchtime\ge n^{\Omega\inparen{k/\log{k}}}$. %\end{hypo} %======= \begin{Theorem}[~\cite{10.1109/FOCS.2014.22}]\label{conj:known-algo-kmatch} Given positive integer $k$ and undirected graph $G=(\vset,\edgeSet)$, $\kmatchtime\ge |\vset|^{\Omega\inparen{k/\log{k}}}$ (assuming ETH). \end{Theorem} %We note that the above conjecture is somewhat non-standard. In particular, the best known algorithm to compute $\numocc{G}{\kmatch}$ takes time $\Omega\inparen{|V|^{k/2}}$ %(i.e. if this is the best algorithm then $c_0=\frac 14$) %~\cite{k-match}. The above result is saying is that, assuming Exponential Time Hypothesis (ETH), one can only hope for a slightly super-polynomial improvement over the trivial algorithm to compute $\numocc{G}{\kmatch}$. % Our hardness result in Section~\ref{sec:single-p} is based on the following conjectured hardness result: % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{hypo} \label{conj:graph} There exists a constant $\eps_0>0$ such that given an undirected graph $G=(\vset,\edgeSet)$, computing $\numocc{G}{\tri}$ exactly cannot be done in time $o\inparen{|\edgeSet|^{1+\eps_0}}$. \end{hypo} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % The so called {\em Triangle detection hypothesis} (cf.~\cite{triang-hard}), which states that detecting the presence of triangles in $G$ takes time $\Omega\inparen{|\edgeSet|^{4/3}}$, implies that in Conjecture~\ref{conj:graph} we can take $\eps_0\ge \frac 13$. All of our hardness results rely on a simple lineage polynomial encoding of the edges of a graph. To prove our hardness result, consider a graph $G=(\vset, \edgeSet)$, where $|\edgeSet| = m$, $\vset = [\numvar]$. Our lineage polynomial has a variable $X_i$ for every $i$ in $[\numvar]$. Consider the polynomial $\poly_{G}(\vct{X}) = \sum\limits_{(i, j) \in \edgeSet} X_i \cdot X_j.$ The hard polynomial for our problem will be a suitable power $k\ge 3$ of the polynomial above: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Definition}\label{def:qk} For any graph $G=(V,\edgeSet)$ and $\kElem\ge 1$, define \[\poly_{G}^\kElem(X_1,\dots,X_n) = \left(\sum\limits_{(i, j) \in \edgeSet} X_i \cdot X_j\right)^\kElem.\] \end{Definition} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \noindent Returning to \Cref{fig:two-step}, it can be seen that $\poly_{G}^\kElem(\vct{X})$ is the lineage polynomial from query $\qhard^k$, which we define next. %Let us alias %\begin{lstlisting} %SELECT DISTINCT 1 FROM T $t_1$, R r, T $t_2$ %WHERE $t_1$.Point = r.Point$_1$ AND $t_2$.Point = %r.Point$_2$ %\end{lstlisting} %as $Q^1$. %The query $\qhard^k$ then becomes \mdfdefinestyle{underbrace}{topline=false, rightline=false, bottomline=false, leftline=false, backgroundcolor=black!15!white, innerbottommargin=0pt} \begin{mdframed}[style=underbrace] \begin{lstlisting} SELECT COUNT(*) FROM $\underbrace{Q_1\text{ JOIN }Q_1\text{ JOIN}\cdots\text{JOIN }Q_1}_{k\rm\ times}$ \end{lstlisting} \end{mdframed} In the above, $\query_1$ is as defined in \Cref{sec:intro}, which is the same as $\qhard^1$. % %\noindent %Consider again the \abbrCTIDB instance $\pdb$ of~\Cref{fig:two-step} and, for our hard instance, let $\bound = 1$. $\pdb$ generalizes to one compatible We next define the instances for $T$ and $R$ that lead to the lineage polynomial in~\Cref{def:qk} as follows. Relation $T$ has $n$ tuples corresponding to each vertex for $i$ in $[n]$, each with probability $\prob$ and $R$ has tuples corresponding to the edges $\edgeSet$ (each with a probability of $1$).\footnote{Technically, $\poly_{G}^\kElem(\vct{X})$ should have variables corresponding to tuples in $R$ as well, but since they always are present with probability $1$, we drop those. Our argument also works when all the tuples in $R$ also are present with probability $\prob$ but to simplify notation we assign probability $1$ to edges.} In other words, the \dbbaseName $\tupset$ contains the set of $\numvar$ unary tuples in $T$ (which corresponds to $\vset$) and $\numedge$ binary tuples in $R$ (which corresponds to $\edgeSet$). Note that this implies that $\poly_{G}^\kElem$ is indeed a $1$-\abbrTIDB lineage polynomial. Next, we note that the runtime for answering $\qhard^k$ on deterministic database $\tupset$, as defined above, is $O_k\inparen{\numedge}$ (i.e. deterministic query processing is `easy' for this query): \begin{Lemma}\label{lem:tdet-om} For $\qhard^k,\tupset$ as above, $\qruntimenoopt{\qhard^k, \tupset, \bound}$ is $O_k\inparen{\numedge}$. \end{Lemma} \subsection{Multiple Distinct $\prob$ Values} \label{sec:multiple-p} We are now ready to present one of our main hardness result. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Theorem}\label{thm:mult-p-hard-result} Let $\prob_0,\ldots,\prob_{2k}$ be $2k + 1$ distinct values in $(0, 1]$. Then computing $\rpoly_G^\kElem(\prob_i,\dots,\prob_i)$ (for all $i\in [2k+1]$) for arbitrary $G=(\vset,\edgeSet)$ needs time $\bigOmega{\kmatchtime}$, if $\kmatchtime\ge \omega\inparen{\abs{\edgeSet}}$. \end{Theorem} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Note that the second (and third) row(s) of \Cref{tab:lbs} follow from %\Cref{prop:expection-of-polynom}, \Cref{thm:mult-p-hard-result}, \Cref{lem:tdet-om}, and \Cref{thm:k-match-hard} (\Cref{conj:known-algo-kmatch} resp.). %\textcolor{red}{Need to put in a proof overview here-- Atri} \Cref{thm:mult-p-hard-result} follows by observing that $\rpoly_G^\kElem(\prob,\dots,\prob)=\prob^{2k}\cdot \numocc{G}{\kmatch} +r(p)$, where $r(p)$ is a polynomial of degree at most $2k-1$ (with coefficients that just depend on $G$). By polynomial interpolation, knowing the values $\rpoly_G^\kElem(\prob_i,\dots,\prob_i)$ (over all $i\in [2k+1]$) allows us to compute all the coefficients, including $\numocc{G}{\kmatch}$. %while the third row is proved by %\Cref{prop:expection-of-polynom}, \Cref{thm:mult-p-hard-result}, \Cref{lem:tdet-om}, and \Cref{conj:known-algo-kmatch}. %Since \Cref{conj:known-algo-kmatch} is non-standard, the latter hardness result should be interpreted as follows. Any substantial polynomial improvement for \Cref{prob:bag-pdb-poly-expected} (over the trivial algorithm that converts $\poly$ into SMB and then uses \Cref{cor:expct-sop} for \abbrStepTwo) would lead to an improvement over the state of the art {\em upper} bounds on $\kmatchtime$. Finally, Note that \Cref{thm:mult-p-hard-result} needs one to be able to compute the expected multiplicities over $(2k+1)$ distinct values of $p_i$, each of which corresponds to distinct $\bpd$ (for the same $\tupset$), which explain the `Multiple' entries in the second column of the second and third rows in \Cref{tab:lbs}. Next, we argue how to get rid of this latter requirement. %%% Local Variables: %%% mode: latex %%% TeX-master: "main" %%% End: