paper-BagRelationalPDBsAreHard/app_hardness-results.tex

158 lines
16 KiB
TeX

%root: main.tex
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{\Cref{lem:pdb-for-def-qk}}
\begin{Lemma}\label{lem:pdb-for-def-qk}
Assuming that each $v \in \vset$ has degree $\geq 1$,\footnote{This is WLOG, since any vertex with degree $0$ can be dropped without affecting the result of our hard query.} the \abbrPDB relations encoding the edges for $\poly_G^\kElem$ of \Cref{def:qk} can be computed in $\bigO{\numedge}$ time.
\end{Lemma}
\begin{proof}[Proof of \Cref{lem:pdb-for-def-qk}]
Only two relations need be constructed, one for the set $\vset$ and one for the set $\edgeSet$. By a simple linear scan, each can be constructed in time $\bigO{\numedge + \numvar}$. Given that the degree of each $v \in \vset$ is at least $1$, we have that $m\ge \Omega(n)$,
and this yields the claimed runtime.
\qed
\end{proof}
\subsection{Proof of \Cref{lem:tdet-om}}
\begin{proof}
By the recursive defintion of $\qruntimenoopt{\cdot, \cdot}$ (see \Cref{sec:gen}), we have the following equation for our hard query $\query$ when $k = 1$, (we denote this as $\query^1$).
\begin{equation*}
\qruntimenoopt{\query^1, \tupset} = \abs{\tupset.\vset} + \abs{\tupset.\edgeSet} + \abs{\tupset.\vset} + \jointime{\tupset.\vset , \tupset.\edgeSet , \tupset.\vset}.
\end{equation*}
We argue that $\jointime{\tupset.\vset , \tupset.\edgeSet , \tupset.\vset}$ is at most $O(\numedge)$ by noting that there exists an algorithm that computes $\tupset.\vset\join\tupset.\edgeSet\join\tupset.\vset$ in the same runtime\footnote{Indeed the trivial algorithm that computes the obvious pair-wise joins has the claimed runtime. That is, we first compute $\tupset.\vset\join\tupset.\edgeSet$, which takes $O(m)$ (assuming $\tupset.\vset$ is stored in hash map) since tuples in $\tupset.\vset$ can only filter tuples in $\tupset.\edgeSet$. The resulting subset of tuples in $\tupset.\edgeSet$ are then again joined (on the right) with $\tupset.\vset$, which by the same argument as before also takes $O(m)$ time, as desried.}. Then by the assumption of \Cref{lem:pdb-for-def-qk} (each $v \in \vset$ has degree $\geq 1$), the sum of the first three terms is $\bigO{\numedge}$. We then obtain that $\qruntimenoopt{\query^1, \tupset} = \bigO{\numedge} + \bigO{\numedge} = \bigO{\numedge}$. For $\query^k = \query_1^1 \times\cdots\times\query_k^1$, we have the recurrence $\qruntimenoopt{\query^k, \tupset} = \qruntimenoopt{\query_1^1, \tupset} + \cdots +\qruntimenoopt{\query_k^1, \tupset} + \jointime{\query_1^1,\cdots,\query_k^1}$. Since $\query^1$ outputs a count, computing the join $\query_1^1\join\cdots\join\query_k^1$ is just multiplying $k$ numbers, which takes $O(k)$ time. Thus, we have
\[\qruntimenoopt{\query^k, \tupset} \le k\cdot O(m)+O(k)\le O(km),\]
as desired.
\qed
\end{proof}
\subsection{\Cref{lem:qEk-multi-p}}
\noindent The following lemma reduces the problem of counting $\kElem$-matchings in a graph to our problem (and proves \Cref{thm:mult-p-hard-result}):
\begin{Lemma}\label{lem:qEk-multi-p}
Let $\prob_0,\ldots, \prob_{2\kElem}$ be distinct values in $(0, 1]$. Then given the values $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for $0\leq i\leq 2\kElem$, the number of $\kElem$-matchings in $G$ can be computed in $\bigO{\kElem^3}$ time.
\end{Lemma}
\subsection{Proof of Lemma~\ref{lem:qEk-multi-p}}\label{subsec:c2k-proportional}
\input{app_hard_lem-mult-p}
\subsection{Proof of Theorem~\ref{thm:mult-p-hard-result}}
\begin{proof}
For the sake of contradiction, assume we can solve our problem in $\littleo{\kmatchtime}$ time. Given a graph $G$ by \Cref{lem:pdb-for-def-qk} we can compute the \abbrPDB encoding in $\bigO{\numedge}$ time. Then after we run our algorithm on $\rpoly_G^\kElem$, we get $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for every $0\leq i\leq 2\kElem$ in additional $\bigO{k}\cdot \littleo{\kmatchtime}$ time. \Cref{lem:qEk-multi-p} then computes the number of $k$-matchings in $G$ in $O(\kElem^3)$ time. Adding the runtime of all of these steps, we have an algorithm for computing the number of $k$-matchings that runs in time
\begin{align}
&\bigO{\numedge} + \bigO{k}\cdot \littleo{\kmatchtime} + O(\kElem^3)\label{eq:proof-omega-kmatch2}\\
&\le \littleo{\kmatchtime}\label{eq:proof-omega-kmatch4}.
\end{align}
We obtain \Cref{eq:proof-omega-kmatch4} from the facts that $k$ is fixed (related to $m$) and the assumption that $\kmatchtime\ge\omega(m)$.
Thus we obtain the contradiction that we can achieve a runtime $\littleo{\kmatchtime}$ that is better than the optimal time $\kmatchtime$ required to compute $k$-matchings.
\qed
\end{proof}
\subsection{Subgraph Notation and $O(1)$ Closed Formulas}
\input{app_hard_notation-easy-counts}
\subsection{Proofs of \Cref{eq:1e}-\Cref{eq:3p-3tri}}
\label{app:easy-counts}
The proofs for \Cref{eq:1e}, \Cref{eq:2p} and \Cref{eq:3s} are immediate.
\begin{proof}[Proof of \Cref{eq:2m}]
For edge $(i, j)$ connecting arbitrary vertices $i$ and $j$, finding all other edges in $G$ disjoint to $(i, j)$ is equivalent to finding all edges that are not connected to either vertex $i$ or $j$. The number of such edges is $m - d_i - d_j + 1$, where we add $1$ since edge $(i, j)$ is removed twice when subtracting both $d_i$ and $d_j$. Since the summation is iterating over all edges such that a pair $\left((i, j), (k, \ell)\right)$ will also be counted as $\left((k, \ell), (i, j)\right)$, division by $2$ then eliminates this double counting. Note that $m$ and $d_i$ for all $i \in V$ can be computed in one pass over the set of edges by simply maintaining counts for each quantity. Finally, the summation is also one traversal through the set of edges where each operation is either a lookup ($O(1)$ time) or an addition operation (also $O(1)$) time.
\qed
\end{proof}
\begin{proof}[Proof of \Cref{eq:2pd-3d}]
\Cref{eq:2pd-3d} is true for similar reasons. For edge $(i, j)$, it is necessary to find two additional edges, disjoint or connected. As in our argument for \Cref{eq:2m}, once the number of edges disjoint to $(i, j)$ have been computed, then we only need to consider all possible combinations of two edges from the set of disjoint edges, since it doesn't matter if the two edges are connected or not. Note, the factor $3$ of $\threedis$ is necessary to account for the triple counting of $3$-matchings, since it is indistinguishable to the closed form expression which of the remaining edges are either disjoint or connected to each of the edges in the {\emph{initial}} set of edges disjoint to the edge under consideration. Observe that the disjoint case will be counted $3$ times since each edge of a $3$-path is visited once, and the same $3$-path counted in each visitation. For the latter case however, it is true that since the two path in $\twopathdis$ is connected, there will be no multiple counting by the fact that the summation automatically disconnects the current edge, meaning that a two matching at the current vertex under consideration will not be counted. Thus, $\twopathdis$ will only be counted once, precisely when the single disjoint edge is visited in the summation. The sum over all such edge combinations is precisely then $\numocc{G}{\twopathdis} + 3\numocc{G}{\threedis}$. Note that all factorials can be computed in $O(m)$ time, and then the remaining operations in computing each combination $\binom{n}{2}$ are constant, thus yielding the claimed $O(m)$ run time.
\qed
\end{proof}
\begin{proof}[Proof of \Cref{eq:3p-3tri}]
To compute $\numocc{G}{\threepath}$, note that for an arbitrary edge $(i, j)$, a 3-path exists for edge pair $(i, \ell)$ and $(j, k)$ where $i, j, k, \ell$ are distinct. Further, the quantity $(d_i - 1) \cdot (d_j - 1)$ represents the number of 3-edge subgraphs with middle edge $(i, j)$ and outer edges $(i, \ell), (j, k)$ such that $\ell \neq j$ and $k \neq i$. When $k = \ell$, the resulting subgraph is a triangle, and when $k \neq \ell$, the subgraph is a 3-path. Summing over all edges (i, j) gives \Cref{eq:3p-3tri} by observing that each triangle is counted thrice, while each 3-path is counted just once. For reasons similar to \Cref{eq:2m}, all $d_i$ can be computed in $O(m)$ time and each summand can then be computed in $O(1)$ time, yielding an overall $O(m)$ run time.
\qed
\end{proof}
\input{app_hard_single-p-proof-defs}
\subsection{Proofs for \Cref{lem:3m-G2}, \Cref{lem:tri}, and \Cref{lem:lin-sys}}\label{subsec:proofs-struc-lemmas}
Before proceeding, let us introduce a few more helpful definitions.
\begin{Definition}[$\esetType{\ell}$]\label{def:ed-nota}
For $\ell > 1$, we use $\esetType{\ell}$ to denote the set of edges in $\graph{\ell}$. For any graph $\graph{\ell}$, its edges are denoted by the a pair $(e, b)$, such that $b \in \{0,\ldots, \ell-1\}$ where $(e,0),\dots,(e,\ell-1)$ is the $\ell$-path that replaces the edge $e$ for $e\in \esetType{1}$.
\end{Definition}
\begin{Definition}[$\eset{\ell}$]
Given an arbitrary subgraph $\sg{1}$ of $\graph{1}$, let $\eset{1}$ denote the set of edges in $\sg{1}$. Define then $\eset{\ell}$ for $\ell > 1$ as the set of edges in the generated subgraph $\sg{\ell}$ (i.e. when we apply \Cref{def:Gk} to $S$ to generate $\sg{\ell}$).
\end{Definition}
For example, consider $\sg{1}$ with edges $\eset{1} = \{e_1\}$. Then the edge set of $\sg{2}$ is defined as $\eset{2} = \{(e_1, 0), (e_1, 1)\}$.
\begin{Definition}[$\binom{\edgeSet}{t}$ and $\binom{\edgeSet}{\leq t}$]\label{def:ed-sub}
Let $\binom{E}{t}$ denote the set of subsets in $E$ with exactly $t$ edges. In a similar manner, $\binom{E}{\leq t}$ is used to mean the subsets of $E$ with $t$ or fewer edges.
\end{Definition}
The following function $f_\ell$ is a mapping from every $3$-edge shape in $\graph{\ell}$ to its `projection' in $\graph{1}$.
\begin{Definition}\label{def:fk}
Let $f_\ell: \binom{\esetType{\ell}}{3} \rightarrow \binom{\esetType{1}}{\leq3}$ be defined as follows. For any element $s \in \binom{\esetType{\ell}}{3}$ such that $s = \pbrace{(e_1, b_1), (e_2, b_2), (e_3, b_3)}$, define:
\[ f_\ell\left(\pbrace{(e_1, b_1), (e_2, b_2), (e_3, b_3)}\right) = \pbrace{e_1, e_2, e_3}.\]
\end{Definition}
\begin{Definition}[$f_\ell^{-1}$]\label{def:fk-inv}
For an arbitrary subgraph $\sg{1}$ of $\graph{1}$ with at most $m \leq 3$ edges, the inverse function $f_\ell^{-1}: \binom{\esetType{1}}{\leq 3}\rightarrow 2^{\binom{\esetType{\ell}}{3}}$ takes $\eset{1}$ and outputs the set of all elements $s \in \binom{\eset{\ell}}{3}$ such that
$f_\ell(s) = \eset{1}$.
\end{Definition}
Note, importantly, that when we discuss $f_\ell^{-1}$, that each \textit{edge} present in $\eset{1}$ must have an edge in each $s\in f_\ell^{-1}(\eset{1})$ that projects down to it. In particular, if $|\eset{1}| = 3$, then it must be the case that each $s\in f_\ell^{-1}(\eset{1})$ consists of the following set of edges: $\{ (e_i, b_1), (e_j, b_2), (e_m, b_3) \}$, where $i,j$ and $m$ are distinct.
We are now ready to prove the structural lemmas.
To prove the structural lemmas, we will
count the number of occurrences of $\tri$ and $\threedis$ in $\graph{\ell}$. This is accomplished by computing how many $\threedis$ and $\tri$ subgraphs appear in $f_\ell^{-1}(\eset{1})$ for each $S\in\binom{E_1}{\le 3}$.
\subsubsection{Proof of Lemma \ref{lem:3m-G2}}
\begin{proof}
For each subset $\eset{1}\in \binom{E_1}{\le 3}$, we count the number of {\emph{$3$-matchings }}in the $3$-edge subgraphs of $\graph{2}$ in $f_2^{-1}(\eset{1})$. Denote the set of $3$-matchings in $f_2^{-1}(\eset{1})$ as $f_{2,\threedis}^{-1}\inparen{\eset{1}}$ and its size as $\abs{f_{2, \threedis}^{-1}\inparen{\eset{1}}}$. We first consider the case of $\eset{1} \in \binom{E_1}{3}$, where $\eset{1}$ is composed of the edges $e_1, e_2, e_3$ and $f_2^{-1}(\eset{1})$ is the set $s$ of all $3$-edge subsets of edges $e_i \in \{(e_1, 0), (e_1, 1), (e_2, 0), (e_2, 1),$ $(e_3, 0), (e_3, 1)\}$ such that $f_\ell(s) = \{e_1, e_2, e_3\}$. For the case that $f_2^{-1}\inparen{\eset{1}} = f_{2, \threedis}^{-1}\inparen{\eset{1}}$ we have that $\abs{f_{2, \threedis}^{-1}\inparen{\eset{1}}} = 8$.%where each set of edges of the form $\{(e_1, b_1), (e_2, b_2), (e_3, b_3)\}$ for $b_i \in [2], i \in [3]$ is present, we have $\abs{f_{2, \threedis}^{-1}(\esetType{1})} = 8$. We count the number of $3$-matchings from the set $f_2^{-1}(\eset{1})$.
We do a case analysis based on the subgraph $\sg{1}$ induced by $\eset{1}$.
\begin{itemize}
\item $3$-matching ($\threedis$)
\end{itemize}
When $\sg{1}$ is isomorphic to $\threedis$, it is the case that edges in $\eset{2}$ are {\em not} disjoint only for the pairs $(e_i, 0), (e_i, 1)$ for $i\in \{1,2,3\}$. By definition, each set of edges in $f_2^{-1}\inparen{\eset{1}}$ is a three matching and $\abs{f_{2, \threedis}^{-1}\inparen{\eset{1}}} = 8$.% possible 3-matchings.
\begin{itemize}
\item Disjoint Two-Path ($\twopathdis$)
\end{itemize}
For $\sg{1}$ isomorphic to $\twopathdis$ edges $e_2, e_3$ form a $2$-path with $e_1$ being disjoint. This means that in $\sg{2}$ edges $(e_2, 0), (e_2, 1), (e_3, 0), (e_3, 1)$ form a $4$-path while $(e_1, 0), (e_1, 1)$ is its own disjoint $2$-path. We can pick either $(e_1, 0)$ or $(e_1, 1)$ for the first edge in the $3$-matching, while it is necessary to have a $2$-matching from path $(e_2, 0),\ldots(e_3, 1)$. Note that the $4$-path allows for three possible $2$-matchings, specifically,
\begin{equation*}
\pbrace{(e_2, 0), (e_3, 0)}, \pbrace{(e_2, 0), (e_3, 1)}, \pbrace{(e_2, 1), (e_3, 1)}.
\end{equation*}
Since these two selections can be made independently, $\abs{f_{2, \threedis}^{-1}\inparen{\eset{1}}} = 2 \cdot 3 = 6$.% \emph{distinct} $3$-matchings in $f_2^{-1}(\eset{1})$.
\begin{itemize}
\item $3$-star ($\oneint$)
\end{itemize}
When $\sg{1}$ is isomorphic to $\oneint$, the inner edges $(e_i, 1)$ of $\sg{2}$ are all connected, and the outer edges $(e_i, 0)$ are all disjoint. Note that for a valid $3$-matching it must be the case that at most one inner edge can be part of the set of disjoint edges. For the case of when exactly one inner edge is chosen, there exist $3$ possiblities, based on which inner edge is chosen. Note that if $(e_i, 1)$ is chosen, the matching has to choose $(e_j, 0)$ for $j \neq i$ and $(e_{j'}, 0)$ for $j' \neq i, j' \neq j$. The remaining possible 3-matching occurs when all 3 outer edges are chosen, and $\abs{f_{2, \threedis}^{-1}\inparen{\eset{1}}} = 4$.
\begin{itemize}
\item $3$-path ($\threepath$)
\end{itemize}
When $\sg{1}$ is isomorphic to $\threepath$ it is the case that all edges beginning with $e_1$ and ending with $e_3$ are successively connected. This means that the edges of $\eset{2}$ form a $6$-path. For a $3$-matching to exist in $f_2^{-1}(\eset{1})$, we cannot pick both $(e_i,0)$ and $(e_i,1)$ or both $(e_i, 1)$ and $(e_j, 0)$ where $j = i + 1$.
There are four such possibilities: $\pbrace{(e_1, 0), (e_2, 0), (e_3, 0)}$, $\pbrace{(e_1, 0), (e_2, 0), (e_3, 1)}$, $\pbrace{(e_1, 0), (e_2, 1), (e_3, 1)},$ $\pbrace{(e_1, 1), (e_2, 1), (e_3, 1)}$ and $\abs{f^{-1}_{2, \threedis}\inparen{\eset{1}}} = 4.$
\begin{itemize}
\item Triangle ($\tri$)
\end{itemize}
For $\sg{1}$ isomorphic to $\tri$, note that it is the case that the edges in $\eset{2}$ are connected in a successive manner, but this time in a cycle, such that $(e_1, 0)$ and $(e_3, 1)$ are also connected. While this is similar to the discussion of the three path above, the first and last edges are not disjoint.
This rules out both subsets of $(e_1, 0), (e_2, 0), (e_3, 1)$ and $(e_1, 0), (e_2, 1), (e_3, 1)$, so that $\abs{f_{2, \threedis}^{-1}\inparen{\eset{1}}} = 2$.
\noindent Let us now consider when $\eset{1} \in \binom{E_1}{\leq 2}$, i.e. fixed subgraphs among
\begin{itemize}
\item $2$-matching ($\twodis$), $2$-path ($\twopath$), $1$ edge ($\ed$)
\end{itemize}
When $|\eset{1}| = 2$, we can only pick one from each of two pairs, $\pbrace{(e_1, 0), (e_1, 1)}$ and $\pbrace{(e_2, 0), (e_2, 1)}$. The third edge choice in $\eset{2}$ will break the disjoint property of a $3$-matching. Thus, a $3$-matching cannot exist in $f_2^{-1}(\eset{1})$. A similar argument holds for $|\eset{1}| = 1$, where the output of $f_2^{-1}$ is $\{\emptyset\}$ since there are not enough edges in the input to produce any other output.
Observe that all of the arguments above focused solely on the property of subgraph $\sg{1}$ being isomorphmic. In other words, all $\eset{1}$ of a given ``shape'' yield the same number of $3$-matchings in $f_2^{-1}(\eset{1})$, and this is why we get the required identity using the above case analysis.
\qed
\end{proof}
\subsubsection{Proof of \Cref{lem:tri}}
\begin{proof}
The number of triangles in $\graph{\ell}$ for $\ell \geq 2$ will always be $0$ for the simple fact that all cycles in $\graph{\ell}$ will have at least six edges.
\qed
\end{proof}
\subsubsection{Proof of \Cref{lem:lin-sys}}
\input{app_hard_linsys}