Finished rearranging S3.
parent
6edbd88852
commit
d3081e80d1
|
@ -2,15 +2,33 @@
|
|||
|
||||
We use \Cref{lem:qEk-multi-p} to prove \Cref{thm:mult-p-hard-result}:
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{\Cref{lem:pdb-for-def-qk}}
|
||||
\begin{Lemma}\label{lem:pdb-for-def-qk}
|
||||
Assuming that each $v \in \vset$ has degree $\geq 1$,\footnote{We argue that this is a reasonable assumption, since any vertex with degree $= 0$ can be dropped without affecting the result of our hard query.} the \abbrPDB relations encoding the edges for the hard query of \Cref{def:qk} can be computed in $\bigO{\numedge}$ time.
|
||||
\end{Lemma}
|
||||
\begin{proof}[Proof of \Cref{lem:pdb-for-def-qk}]
|
||||
Only two relations need be constructed, one for the set $\vset$ and one for the set $\edgeSet$. By a simple linear scan, each can be constructed in time $\bigO{\numedge + \numvar}$. Given that the degree of each $v \in \vset$ is at least $1$, we have that $\abs{\edgeSet}$ is at least within a constant factor of $\abs{\vset}$, and this yields the claimed runtime.
|
||||
\end{proof}
|
||||
|
||||
\subsection{Proof of \Cref{lem:tdet-om}}
|
||||
\begin{proof}
|
||||
By the recursive defintion of $\qruntime{\cdot, \cdot}$ (see \Cref{sec:gen}), we have the following equation for our hard query $\query$ when $k = 1$.
|
||||
\begin{equation*}
|
||||
\qruntime{\query^1, \dbbase} = \abs{\dbbase.\vset} + \abs{\dbbase.\edgeSet} + \abs{\dbbase.\vset} + \abs{\dbbase.\vset \join \dbbase.\edgeSet \join \dbbase.\vset}
|
||||
\end{equation*}
|
||||
The quantity $\abs{\dbbase.\vset \join \dbbase.\edgeSet}$ is $\bigO{\numedge}$ by \Cref{def:qk}. Then by the assumption of \Cref{lem:pdb-for-def-qk} (each $v \in \vset$ has degree $\geq 1$), the sum of the first three terms is $\bigO{\numedge}$. We then obtain that $\qruntime{\query^1, \dbbase} = \bigO{\numedge} + \bigO{\numedge} = \bigO{\numedge}$. For $\query^k = \query_1^1 \times\cdots\times\query_k^1$, we have the recurrence $\qruntime{\query^k, \dbbase} = \qruntime{\query_1^1, \dbbase} + \cdots +\qruntime{\query_k^1, \dbbase} + \abs{\query_1^1\join\cdots\join\query_k^1}$. The dominating term in the sum of the recursive definition is $\abs{\query_1^1\join\cdots\join\query_k^1} = \bigO{\numedge^k} = O_k(\numedge)$.
|
||||
%Since by definition, $\dbbase = \cup_{\db \in \idb}\db$, it follows that $\dbbase$ consists of the relations that contain all possible $v \in \vset$ and $e \in \edgeSet$. Because the result for $\query^1$ cannot be any larger than the relation encoding $\edgeSet$ (i.e., $\abs{\edgeSet}$), it follows that (using an efficient query evaluation strategy such as indexing) the runtime of $\qruntime{\query^1, \dbbase}$ is indeed $O(\numedge)$. When $k > 1$, since by \Cref{def:qk} $\query^k$ is simply a cross product of the original query $\query^1$, we arrive at the desired runtime of $O_k(\numedge)$.
|
||||
\end{proof}
|
||||
\subsection{Proof of Theorem~\ref{thm:mult-p-hard-result}}
|
||||
\begin{proof}
|
||||
For the sake of contradiction, let us assume we can solve our problem in $f(\kElem)\cdot m^c$ time for some absolute constant $c$. Given a graph $G$ by \Cref{lem:pdb-for-def-qk} we can compute the \abbrPDB encoding in $\bigO{\numedge}$ time. Then after we run our algorithm on $\rpoly_G^\kElem$, we get $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for $0\leq i\leq 2\kElem$ in additional $\inparen{2k + 1}f(\kElem)\cdot m^c$ time. \Cref{lem:qEk-multi-p} then computes the number of $k$-matchings in $G$ in $O(\kElem^3)$ time. Thus, for $f'(\cdot) = \inparen{2k + 1}\cdot f(\cdot)$, overall we have an algorithm for computing the number of $k$-matchings in time
|
||||
\begin{align*}
|
||||
O(km) + \inparen{2k + 1}\cdot f(\kElem)\cdot m^c + O(\kElem^3)
|
||||
&\le \inparen{O(\kElem^3) + f'(\kElem)}\cdot m^{c+1} \\
|
||||
&\le \inparen{O(\kElem^3) + f'(\kElem)}\cdot n^{2c+2},
|
||||
\end{align*}
|
||||
which together with \Cref{thm:k-match-hard} contradicts the conjecture that $\sharpwone$ problems cannot be solved in $f(k)\cdot \numvar^c$ time.
|
||||
For the sake of contradiction, assume we can solve our problem in $\littleo{\kmatchtime + \numedge}$ time. Given a graph $G$ by \Cref{lem:pdb-for-def-qk} we can compute the \abbrPDB encoding in $\bigO{\numedge}$ time. Then after we run our algorithm on $\rpoly_G^\kElem$, we get $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for $0\leq i\leq 2\kElem$ in additional $\bigO{k}\cdot \littleo{\kmatchtime}$ time. \Cref{lem:qEk-multi-p} then computes the number of $k$-matchings in $G$ in $O(\kElem^3)$ time. We have an algorithm for computing the number of $k$-matchings in time
|
||||
\begin{align}
|
||||
&\bigO{\numedge} + \bigO{k}\cdot \littleo{\kmatchtime + \numedge} + O(\kElem^3)\label{eq:proof-omega-kmatch1}\\
|
||||
= &\bigO{\numedge} + \bigO{k}\cdot \littleo{\kmatchtime} + O(\kElem^3)\label{eq:proof-omega-kmatch2}\\
|
||||
= &\bigO{k}\cdot \littleo{\kmatchtime} + O(\kElem^3)\label{eq:proof-omega-kmatch3}\\
|
||||
= &\littleo{\kmatchtime}\label{eq:proof-omega-kmatch4}.
|
||||
\end{align}
|
||||
We obtain \Cref{eq:proof-omega-kmatch2} by the assumption that $\kmatchtime \in \littleomega{\numedge}$, an assumption which is upheld by the assumption in \Cref{thm:k-match-hard} that $\#W[0]\neq\sharpwone$ and the further assumption that the best runtime to compute $k$-matchings is $\bigOmega{\numedge^{k/2}}$ in \cite{k-match}. \Cref{eq:proof-omega-kmatch3} then follows by the fact that $\bigO{\numedge}\in \littleo{\numedge}\in \littleo{\kmatchtime}$. We then obtain \Cref{eq:proof-omega-kmatch4} by the observation that $\bigO{k}\in\bigO{k^3}$ and $\bigO{k^3} \in \littleo{\numedge}$ since by definition of parameterized complexity we have $\frac{\numedge}{k}\rightarrow \infty$. Thus we obtain the contradiction that we can achieve a runtime $\littleo{\kmatchtime}$ that is better than the actual time $\kmatchtime$ required to compute $k$-matchings.
|
||||
\qed
|
||||
\end{proof}
|
||||
|
||||
|
|
|
@ -266,7 +266,6 @@
|
|||
\newcommand{\subgraph}{\vari{S}_{\equivtree(\circuit)}}
|
||||
%-----
|
||||
\newcommand{\cost}{\func{Cost}}
|
||||
\newcommand{\qruntime}[1]{T_{det}(#1)}
|
||||
\newcommand{\nullval}{NULL}
|
||||
|
||||
|
||||
|
@ -288,6 +287,9 @@
|
|||
% COMPLEXITY
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\newcommand{\bigO}[1]{O\inparen{#1}}
|
||||
\newcommand{\littleo}[1]{o\inparen{#1}}
|
||||
\newcommand{\bigOmega}[1]{\Omega\inparen{#1}}
|
||||
\newcommand{\littleomega}[1]{\omega\inparen{#1}}
|
||||
\newcommand{\np}{{\sf NP}\xspace}
|
||||
\newcommand{\polytime}{{\sf P}\xspace}
|
||||
\newcommand{\sharpp}{\#{\sf P}\xspace}
|
||||
|
@ -297,6 +299,8 @@
|
|||
\newcommand{\sharpwonehard}{\#{\sf W}[1]-hard\xspace}
|
||||
\newcommand{\ptime}{{\sf PTIME}\xspace}
|
||||
\newcommand{\timeOf}[1]{T_{#1}}
|
||||
\newcommand{\qruntime}[1]{T_{det}(#1)}
|
||||
\newcommand{\kmatchtime}{T_{match}\inparen{k, G}}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
|
|
@ -8,16 +8,18 @@ In this section, we will prove that computing $\expct\pbox{\poly(\vct{W})}$ exac
|
|||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Preliminaries}
|
||||
Our hardness results are based on (exactly) counting the number of (not necessarily induced) subgraphs in $G$ isomorphic to $H$. Let $\numocc{G}{H}$ denote this quantity. We can think of $H$ as being of constant size and $G$ as growing. %In query processing, $H$ can be viewed as the query while $G$ as the database instance.
|
||||
In particular, we will consider the problems of computing the following counts (given $G$ in its adjacency list representation): $\numocc{G}{\tri}$ (the number of triangles), $\numocc{G}{\threedis}$ (the number of $3$-matchings), and the latter's generalization $\numocc{G}{\kmatch}$ (the number of $k$-matchings). Our hardness result in \Cref{sec:multiple-p} is based on the following result:
|
||||
In particular, we will consider the problems of computing the following counts (given $G$ in its adjacency list representation): $\numocc{G}{\tri}$ (the number of triangles), $\numocc{G}{\threedis}$ (the number of $3$-matchings), and the latter's generalization $\numocc{G}{\kmatch}$ (the number of $k$-matchings). We use $\kmatchtime$ to denote the runtime of computing $\numocc{G}{\kmatch}$. Our hardness result in \Cref{sec:multiple-p} is based on the following result:
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Theorem}[\cite{k-match}]
|
||||
\label{thm:k-match-hard}
|
||||
Given positive integer $k$ and undirected graph $G$ with no self-loops or parallel edges, computing $\numocc{G}{\kmatch}$ exactly is %counting the number of $k$-matchings in $G$ is
|
||||
\sharpwonehard (parameterization is in $k$).
|
||||
Given positive integer $k$ and undirected graph $G$ with no self-loops or parallel edges, the time $\kmatchtime$ to compute $\numocc{G}{\kmatch}$ exactly is $\littleomega{f(k)\cdot \numedge^c}$ for any function $f$ and fixed constant $c$ independent of $\numedge$ and $k$. %counting the number of $k$-matchings in $G$ is\sharpwonehard (parameterization is in $k$).
|
||||
\end{Theorem}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
The above result means that we cannot hope to count the number of $k$-matchings in $G=(\vset,\edgeSet)$ in time $f(k)\cdot |\vset|^{c}$ for any function $f$ and constant $c$ independent of $k$. In fact, all known algorithms to solve this problem take time $|\vset|^{\Omega(k)}$.
|
||||
%The above result means that we cannot hope to count the number of $k$-matchings in $G=(\vset,\edgeSet)$ in time $f(k)\cdot |\vset|^{c}$ for any function $f$ and constant $c$ independent of $k$.
|
||||
\begin{hypo}[\cite{k-match}]\label{conj:known-algo-kmatch}
|
||||
All best known algorithms to solve $\numocc{G}{\kmatch}$ take time $\kmatchtime = \bigO{|\vset|^{\Omega(k)}}$.
|
||||
\end{hypo}
|
||||
%
|
||||
Our hardness result in Section~\ref{sec:single-p} is based on the following conjectured hardness result:
|
||||
%
|
||||
|
@ -69,24 +71,9 @@ SELECT 1 FROM $R_1$ JOIN $R_2$ JOIN$\cdots$JOIN $R_k$
|
|||
\noindent where adapting the PDB instance in \Cref{fig:two-step}, relation $OnTime$ has $4$ tuples corresponding to each vertex for $i$ in $[4]$, each with probability $\prob_i$ and $Route$ has tuples corresponding to the edges $\edgeSet$ (each with probability of $1$).\footnote{Technically, $\poly_{G}^\kElem(\vct{X})$ should have variables corresponding to tuples in $Route$ as well, but since they always are present with probability $1$, we drop those. Our argument also works when all the tuples in $Route$ also are present with probability $\prob$ but to simplify notation we assign probability $1$ to edges.}
|
||||
Note that this implies that our hard lineage polynomial can be represented as an expression tree produced by a project-join query with same probability value for each input tuple $\prob_i$, and hence is indeed a lineage polynomial for a \abbrTIDB \abbrPDB.
|
||||
|
||||
\begin{Lemma}\label{lem:pdb-for-def-qk}
|
||||
Assuming that each $v \in \vset$ has degree $\geq 1$,\footnote{We argue that this is a reasonable assumption, since any vertex with degree $= 0$ can be dropped without affecting the result of our hard query.} the \abbrPDB relations encoding the edges for the hard query of \Cref{def:qk} can be computed in $\bigO{\numedge}$ time.
|
||||
\end{Lemma}
|
||||
\begin{proof}[Proof of \Cref{lem:pdb-for-def-qk}]
|
||||
Only two relations need be constructed, one for the set $\vset$ and one for the set $\edgeSet$. By a simple linear scan, each can be constructed in time $\bigO{\numedge + \numvar}$. Given that the degree of each $v \in \vset$ is at least $1$, we have that $\abs{\edgeSet}$ is at least within a constant factor of $\abs{\vset}$, and this yields the claimed runtime.
|
||||
\end{proof}
|
||||
|
||||
\begin{Lemma}\label{lem:tdet-om}
|
||||
For the $\query^k$ of \Cref{def:qk}, the runtime $\qruntime{\query^k, \dbbase}$ is $O_k(\numedge)$.
|
||||
\end{Lemma}
|
||||
\begin{proof}[Proof of \Cref{lem:tdet-om}]
|
||||
By the recursive defintion of $\qruntime{\cdot, \cdot}$ (see \Cref{sec:gen}), we have the following equation for our hard query $\query$ when $k = 1$.
|
||||
\begin{equation*}
|
||||
\qruntime{\query^1, \dbbase} = \abs{\dbbase.\vset} + \abs{\dbbase.\edgeSet} + \abs{\dbbase.\vset} + \abs{\dbbase.\vset \join \dbbase.\edgeSet \join \dbbase.\vset}
|
||||
\end{equation*}
|
||||
The quantity $\abs{\dbbase.\vset \join \dbbase.\edgeSet}$ is $\bigO{\numedge}$ by \Cref{def:qk}. Then by the assumption of \Cref{lem:pdb-for-def-qk} (each $v \in \vset$ has degree $\geq 1$), the sum of the first three terms is $\bigO{\numedge}$. We then obtain that $\qruntime{\query^1, \dbbase} = \bigO{\numedge} + \bigO{\numedge} = \bigO{\numedge}$. For $\query^k = \query_1^1 \times\cdots\times\query_k^1$, we have the recurrence $\qruntime{\query^k, \dbbase} = \qruntime{\query_1^1, \dbbase} + \cdots +\qruntime{\query_k^1, \dbbase} + \abs{\query_1^1\join\cdots\join\query_k^1}$. The dominating term in the sum of the recursive definition is $\abs{\query_1^1\join\cdots\join\query_k^1} = \bigO{\numedge^k} = O_k(\numedge)$.
|
||||
%Since by definition, $\dbbase = \cup_{\db \in \idb}\db$, it follows that $\dbbase$ consists of the relations that contain all possible $v \in \vset$ and $e \in \edgeSet$. Because the result for $\query^1$ cannot be any larger than the relation encoding $\edgeSet$ (i.e., $\abs{\edgeSet}$), it follows that (using an efficient query evaluation strategy such as indexing) the runtime of $\qruntime{\query^1, \dbbase}$ is indeed $O(\numedge)$. When $k > 1$, since by \Cref{def:qk} $\query^k$ is simply a cross product of the original query $\query^1$, we arrive at the desired runtime of $O_k(\numedge)$.
|
||||
\end{proof}
|
||||
|
||||
%\begin{Corollary}\label{cor:at-least-kmatch}
|
||||
%\end{Corollary}
|
||||
|
@ -103,16 +90,16 @@ The quantity $\abs{\dbbase.\vset \join \dbbase.\edgeSet}$ is $\bigO{\numedge}$ b
|
|||
%Unless otherwise noted, all proofs for this section are in \Cref{app:single-mult-p}.
|
||||
We are now ready to present our main hardness result.
|
||||
%
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Theorem}\label{thm:mult-p-hard-result}
|
||||
Let $\prob_0,\ldots,\prob_{2k}$ be $2k + 1$ distinct values in $(0, 1]$. Then computing $\rpoly_G^\kElem(\prob_i,\dots,\prob_i)$ for arbitrary $G$
|
||||
%and any $(2k+1)$ distinct values $\prob_i$ ($0\le i \le 2k$)
|
||||
is \sharpwonehard (parameterization is in $k$).
|
||||
is in time $\bigOmega{\kmatchtime}$.
|
||||
\end{Theorem}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%
|
||||
We will prove the above result by reducing from the problem of computing the number of $k$-matchings in $G$. Given the current best-known algorithm for this counting problem, our results imply that unless the state-of-the-art $k$-matching algorithms are improved, we cannot hope to solve our problem in time better than $\Omega_k\inparen{m^{k/2}}$ where $m=\abs{\edgeSet}$, which is only quadratically faster than expanding $\poly_{G}^\kElem(\vct{X})$ into its \abbrSMB form and then using \Cref{cor:expct-sop}. The approximation algorithm we present in \Cref{sec:algo} has runtime $O_k\inparen{m}$ for this query. % (since it runs in linear-time on all lineage polynomials).
|
||||
|
||||
The second row of \Cref{tab:lbs} is upheld by \Cref{thm:mult-p-hard-result}, \Cref{lem:tdet-om}, and \Cref{thm:k-match-hard}. The third row is proved by \Cref{thm:mult-p-hard-result}, \Cref{lem:tdet-om}, and \Cref{conj:known-algo-kmatch}.
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%NEEDS to be moved to appendix
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
|
Loading…
Reference in New Issue