paper-BagRelationalPDBsAreHard/single_p.tex

210 lines
15 KiB
TeX

%root: main.tex
%!TEX root=./main.tex
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Single $\prob$ value}
\label{sec:single-p}
%In this discussion, let us fix $\kElem = 3$.
While \Cref{thm:mult-p-hard-result} shows that computing $\rpoly(\prob,\dots,\prob)$ in general is hard it does not rule out the possibility that one can compute this value exactly for a {\em fixed} value of $\prob$. Indeed, it is easy to check that one can compute $\rpoly(\prob,\dots,\prob)$ exactly in linear time for $\prob\in \inset{0,1}$. In this section, we show that these two are the only possibilities:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Theorem}\label{th:single-p-hard}
Fix $\prob\in (0,1)$. Then assuming \Cref{conj:graph} is true, any algorithm that computes $\rpoly_{G}^3(\prob,\dots,\prob)$ from $G$ exactly has to run in time $\Omega\inparen{\abs{E(G)}^{1+\eps_0}}$, where $\eps_0$ is as defined in \Cref{conj:graph}.
\end{Theorem}
%\begin{proof}[Proof of Corollary ~\ref{th:single-p-gen-k}]
%Consider $\poly^3_{G}$ and $\poly' = 1$ such that $\poly'' = \poly^3_{G} \cdot \poly'$. By \Cref{th:single-p}, query $\poly''$ with $\kElem = 4$ has $\Omega(\numvar^{\frac{4}{3}})$ complexity.
%\end{proof}
The above shows the hardness for a very specific query polynomial but it is easy to come up with an infinite family of hard query polynomials by `embedding' $\rpoly_{G}^3$ into an infinite family of trivial query polynomials.
Unlike \Cref{thm:mult-p-hard-result} the above result does not show that computing $\rpoly_{G}^3(\prob,\dots,\prob)$ for a fixed $\prob\in (0,1)$ is \sharpwonehard.
However, in \Cref{sec:algo} we show that if we are willing to compute an approximation that this problem (and indeed solving our problem for a much more general setting) is in linear time.
%\AH{@atri needs to put in the result for triangles of $\numvar^{\frac{4}{3}}$ runtime.}
We will prove the above result by the following reduction:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Theorem}\label{th:single-p}
Fix $\prob\in (0,1)$. Let $G$ be a graph on $\numedge$ edges.
If we can compute $\rpoly_{G}^3(\prob,\dots,\prob)$ exactly in $T(\numedge)$ time, then we can exactly compute $\numocc{G}{\tri}$, $\numocc{G}{\threepath}$ and $\numocc{G}{\threedis}$ %count the number of triangles, 3-paths, and 3-matchings in $G$
in $O\inparen{T(\numedge) + \numedge}$ time.
\end{Theorem}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{proof}[Proof of \Cref{th:single-p-hard}]
For the sake of contradiction, assume that for any $G$, we can compute $\rpoly_{G}^3(\prob,\dots,\prob)$ in $o\inparen{m^{1+\eps_0}}$ time.
Let $G$ be the input graph. It is easy to see that one can compute the expression tree for $\poly_{G}^3(\vct{X})$ in $O(m)$ time. Then by \Cref{th:single-p} we can compute $\numocc{G}{\tri}$, $\numocc{G}{\threepath}$ and $\numocc{G}{\threedis}$ in further time $o\inparen{m^{1+\eps_0}}+O(m)$. Thus, the overall, reduction takes $o\inparen{m^{1+\eps_0}}+O(m)= o\inparen{m^{1+\eps_0}}$ time, which violates \Cref{conj:graph}.
\end{proof}
\qed
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
In other words, if \Cref{th:single-p} holds, then so must \Cref{th:single-p-hard}.
%Before we move on to the proof itself, we state the results, lemmas, and defintions that will be useful in the proof.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsubsection{Preliminaries and Notation}
We need all the possible edge patterns in an arbitrary $G$ with at most three distinct edges. We have already seen $\tri,\threepath$ and $\threedis$, so we define the remaining patterns:
\begin{itemize}
\item Single Edge $\left(\ed\right)$
\item 2-path ($\twopath$)
\item 2-matching ($\twodis$)
%\item Triangle ($\tri$)
%\item 3-path ($\threepath$)
\item 3-star ($\oneint$)--this is the graph that results when all three edges share exactly one common endpoint. The remaining endpoint for each edge is disconnected from any endpoint of the remaining two edges.
\item Disjoint Two-Path ($\twopathdis$)--this subgraph consists of a two-path and a remaining disjoint edge.
%\item 3-matching ($\threedis$)--this subgraph is composed of three disjoint edges.
\end{itemize}
%Let $\numocc{G}{H}$ denote the number of occurrences of pattern $H$ in graph $G$, where, for example, $\numocc{G}{\ed}$ means the number of single edges in $G$.
For any graph $G$, the following formulas for $\numocc{G}{H}$ compute their respective patterns exactly in $O(\numedge)$ time, with $d_i$ representing the degree of vertex $i$ (proofs are in \Cref{app:easy-counts}):
\begin{align}
&\numocc{G}{\ed} = \numedge, \label{eq:1e}\\
&\numocc{G}{\twopath} = \sum_{i \in V} \binom{d_i}{2} \label{eq:2p}\\
&\numocc{G}{\twodis} = \sum_{(i, j) \in E} \frac{\numedge - d_i - d_j + 1}{2}\label{eq:2m}\\%\binom{\numedge - d_i - d_j + 1}{2}\label{eq:2m}\\
&\numocc{G}{\oneint} = \sum_{i \in V} \binom{d_i}{3}\label{eq:3s}\\
&\numocc{G}{\twopathdis} + 3\numocc{G}{\threedis} = \sum_{(i, j) \in E} \binom{\numedge - d_i - d_j + 1}{2}\label{eq:2pd-3d}
\end{align}
%A quick argument to why \Cref{eq:2m} is true. Note that for edge $(i, j)$ connecting arbitrary vertices $i$ and $j$, finding all other edges in $G$ disjoint to $(i, j)$ is equivalent to finding all edges that are not connected to either vertex $i$ or $j$. The number of such edges is $m - d_i - d_j + 1$, where we add $1$ since edge $(i, j)$ is removed twice when subtracting both $d_i$ and $d_j$. Since the summation is iterating over all edges such that a pair $\left((i, j), (k, \ell)\right)$ will also be counted as $\left((k, \ell), (i, j)\right)$, division by $2$ then eliminates this double counting.
%\Cref{eq:2pd-3d} is true for similar reasons. For edge $(i, j)$, it is necessary to find two additional edges, disjoint or connected. As in \Cref{eq:2m}, once the number of edges disjoint to $(i, j)$ have been computed, then we only need to consider all possible combinations of two edges from the set of disjoint edges, since it doesn't matter if the two edges are connected or not. Note, the factor $3$ of $\threedis$ is necessary to account for the triple counting of $3$-matchings. It is also the case that, since the two path in $\twopathdis$ is connected, that there will be no double counting by the fact that the summation automatically 'disconnects' the current edge, meaning that a two matching at the current vertex will not be counted. The sum over all such edge combinations is precisely then $\numocc{G}{\twopathdis} + 3\numocc{G}{\threedis}$.
%Original lemma proving the exact coefficient terms in qE3
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsubsection{The proofs}
Note that $\rpoly_{G}^3(\prob,\ldots, \prob)$ as a polynomial in $\prob$ has degree at most six. Next, we figure out the exact coefficients since this would be useful in our arguments:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Lemma}\label{lem:qE3-exp}
%When we expand $\poly_{G}^3(\vct{X})$ out and assign all exponents $e \geq 1$ a value of $1$, we have the following result,
For any $\prob$, we have:
{\small
\begin{align}
\rpoly_{G}^3(\prob,\ldots, \prob) &= \numocc{G}{\ed}\prob^2 + 6\numocc{G}{\twopath}\prob^3 + 6\numocc{G}{\twodis}\prob^4 + 6\numocc{G}{\tri}\prob^3\nonumber\\
&+ 6\numocc{G}{\oneint}\prob^4 + 6\numocc{G}{\threepath}\prob^4 + 6\numocc{G}{\twopathdis}\prob^5 + 6\numocc{G}{\threedis}\prob^6.\label{claim:four-one}
\end{align}}
\end{Lemma}
\begin{proof}[Proof of \Cref{lem:qE3-exp}]
By definition we have that
\[\poly_{G}^3(\vct{X}) = \sum_{\substack{(i_1, j_1), (i_2, j_2), (i_3, j_3) \in E}}~\; \prod_{\ell = 1}^{3}X_{i_\ell}X_{j_\ell}.\]
Hence $\rpoly_{G}^3(\vct{X})$ has degree six. Note that the monomial $\prod_{\ell = 1}^{3}X_{i_\ell}X_{j_\ell}$ will contribute to the coefficient of $\prob^\nu$ in $\rpoly_{G}^3(\vct{X})$, where $\nu$ is the number of distinct variables in the monomial.
%Rather than list all the expressions in full detail, let us make some observations regarding the sum.
Let $e_1 = (i_1, j_1), e_2 = (i_2, j_2), e_3 = (i_3, j_3)$.
We compute $\rpoly_{G}^3(\vct{X})$ by considering each of the three forms that the triple $(e_1, e_2, e_3)$ can take.
\textsc{case 1:} $e_1 = e_2 = e_3$ (all edges are the same). There are exactly $\numedge=\numocc{G}{\ed}$ such triples, each with a $\prob^2$ factor in $\rpoly_{G}^3\left(\prob,\ldots, \prob\right)$.
\textsc{case 2:} This case occurs when there are two distinct edges of the three, call them $e$ and $e'$. When there are two distinct edges, there is then the occurence when $2$ variables in the triple $(e_1, e_2, e_3)$ are bound to $e$. There are three combinations for this occurrence in $\poly_{G}^3(\vct{X})$. Analogusly, there are three such occurrences in $\poly_{G}^3(\vct{X})$ when there is only one occurrence of $e$, i.e. $2$ of the variables in $(e_1, e_2, e_3)$ are $e'$. %Again, there are three combinations for this.
This implies that all $3 + 3 = 6$ combinations of two distinct edges $e$ and $e'$ contribute to the same monomial in $\rpoly_{G}^3$. % consist of the same monomial in $\rpoly$, i.e. $(e_1, e_1, e_2)$ is the same as $(e_2, e_1, e_2)$.
Since $e\ne e'$, this case produces the following edge patterns: $\twopath, \twodis$, which contribute $6\prob^3$ and $6\prob^4$ respectively to $\rpoly_{G}^3\left(\prob,\ldots, \prob\right)$.
\textsc{case 3:} All $e_1,e_2$ and $e_3$ are distinct. For this case, we have $3! = 6$ permutations of $(e_1, e_2, e_3)$, each of which contribute to a different monomial in the SMP representation of $\poly_{G}^3(\vct{X})$. This case consists of the following edge patterns: $\tri, \oneint, \threepath, \twopathdis, \threedis$, which contribute $6\prob^3, 6\prob^4, 6\prob^4, 6\prob^5$ and $6\prob^6$ respectively to $\rpoly_{G}^3\left(\prob,\ldots, \prob\right)$.
\end{proof}
\qed
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Since $\prob$ is fixed, \Cref{lem:qE3-exp} gives us one linear equation in $\numocc{G}{\tri},$ $\numocc{G}{\threepath}$ and $\numocc{G}{\threedis}$ (we can handle the other counts due to \Cref{eq:1e}-\Cref{eq:2pd-3d}). However, we need to generate two more independent linear equations in these three variables. Towards, this end we generate more graphs that are related to $G$:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}\label{def:Gk}
For $\ell > 1$, let graph $\graph{\ell}$ be a graph generated from an arbitrary graph $\graph{1}$, by replacing every edge $e$ of $\graph{1}$ with a $\ell$-path, such that all $\ell$-path replacement edges are disjoint. % in the sense that they only intersect at the original intersection endpoints as seen in $\graph{1}$.
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Next, we relate the various sub-graph counts in $\graph{2}$ and $\graph{3}$ to their counterparts in $\graph{1}=G$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Lemma}\label{lem:3m-G2}
The $3$-matchings in graph $\graph{2}$ satisfy the identity:
\begin{align*}
\numocc{\graph{2}}{\threedis} &= 8 \cdot \numocc{\graph{1}}{\threedis} + 6 \cdot \numocc{\graph{1}}{\twopathdis}\\
&+ 4 \cdot \numocc{\graph{1}}{\oneint} + 4 \cdot \numocc{\graph{1}}{\threepath} + 2 \cdot \numocc{\graph{1}}{\tri}.
\end{align*}
\end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Lemma}\label{lem:3m-G3}
The 3-matchings in $\graph{3}$ satisfy the identity:
\begin{align*}
\numocc{\graph{3}}{\threedis} &= 4\numocc{\graph{1}}{\twopath} + 6\numocc{\graph{1}}{\twodis} + 18\numocc{\graph{1}}{\tri}\\
&+ 21\numocc{\graph{1}}{\threepath}+ 24\numocc{\graph{1}}{\twopathdis} + 20\numocc{\graph{1}}{\oneint}\\
&+ 27\numocc{\graph{1}}{\threedis}.
\end{align*}
\end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Lemma}\label{lem:3p-G2}
The $3$-paths in $\graph{2}$ satisfy the identity:
\[\numocc{\graph{2}}{\threepath} = 2 \cdot \numocc{\graph{1}}{\twopath}.\]
\end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Lemma}\label{lem:3p-G3}
The $3$-paths in $\graph{3}$ satisfy the identity:
\[\numocc{\graph{3}}{\threepath} = \numocc{\graph{1}}{\ed} + 2 \cdot \numocc{\graph{1}}{\twopath}.\]
\end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Lemma}\label{lem:tri}
For $\ell > 1$ and any graph $\graph{\ell}$, $\numocc{\graph{\ell}}{\tri} = 0$.
\end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Using the results we have obtained so far, we will prove the following reduction result:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Lemma}\label{lem:lin-sys}
%Using the identities of lemmas [\ref{lem:3m-G2}, \ref{lem:3m-G3}, \ref{lem:3p-G2}, \ref{lem:3p-G3}, \ref{lem:tri}] to compute $\numocc{G}{\threedis}, \numocc{G}{\threepath}, \numocc{G}{\tri}$ for $G \in \{\graph{2}, \graph{3}\}$, there exists a linear system $\mtrix{\rpoly}\cdot (x~y~z~)^T = \vct{b}$ which can then be solved to determine the unknown quantities of $\numocc{\graph{1}}{\threedis}, \numocc{\graph{1}}{\threepath}$, and $\numocc{\graph{1}}{\tri}$.
Fix $\prob\in (0,1)$. Given $\rpoly_{\graph{\ell}}^3(\prob,\dots,\prob)$ for $\ell\in [3]$, we can compute in $O(m)$ time a vector $\vct{b}\in\mathbb{R}^3$ such that
\[ \begin{pmatrix}
1 & \prob & -(3\prob^2 - \prob^3)\\
-2(3\prob^2 - \prob^3) & -4(3\prob^2 - \prob^3) & 10(3\prob^2 - \prob^3)\\
-18(3\prob^2 - \prob^3) & -21(3\prob^2 - \prob^3) & 45(3\prob^2 - \prob^3)
\end{pmatrix}
\cdot
\begin{pmatrix}
\numocc{G}{\tri}]\\
\numocc{G}{\threepath}\\
\numocc{G}{\threedis}
\end{pmatrix}
=\vct{b},
\]
giving $\numocc{G}{\tri}, \numocc{G}{\threepath}$ and $\numocc{G}{\threedis}$ in $O(1)$ time.
\end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Due to lack of space we defer the proof of the above results to \Cref{subsec:proofs-struc-lemmas}.
%
This result immediately implies \Cref{th:single-p-hard}:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{proof}[Proof of \Cref{th:single-p-hard}]
We can compute $\graph{2}$ and $\graph{3}$ from $\graph{1}=G$ in $O(m)$ time (also note that these graphs also have $O(m)$ edges). Thus,
in time $O(T(m))$, we have $\rpoly_{\graph{\ell}}^3(\prob,\dots,\prob)$ for $\ell\in [3]$ and \Cref{lem:lin-sys} completes the proof.
\end{proof}
\qed
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \AH{I didn't think of a more appropriate name for $\vct{b}$, so I have just stuck with what Atri called it on chat.}
%Using \Cref{def:Gk} we construct graphs $\graph{2}$ and $\graph{3}$ from arbitrary graph $\graph{1}$.
%We then show that for any of the patterns $\threedis, \threepath, \tri$ which are all known to be hard to compute, we can use linear combinations in terms of $\graph{1}$ from Lemmas \ref{lem:3m-G2}, \ref{lem:3m-G3}, \ref{lem:3p-G2}, \ref{lem:3p-G3}, \ref{lem:tri} to compute $\numocc{\graph{i}}{S}$, where $i$ in $\{2, 3\}$ and $S \in \{\threedis, \threepath, \tri\}$. Then, using \Cref{claim:four-two}, \Cref{lem:qE3-exp} and \Cref{lem:lin-sys}, we can combine all three linear combinations into a linear system, solving for $\numocc{\graph{1}}{S}$.
%$%^&*(
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End: