paper-BagRelationalPDBsAreHard/mult_distinct_p.tex

%root:main.tex

\subsection{When $\poly$ is not in sum of monomials form}

We would like to argue for a compressed version of $\poly(\vct{w})$, in general $\expct_{\vct{w}}\pbox{\poly(\vct{w})}$ cannot be computed in linear time.

To this end, consider the following graph $G(V, E)$, where $|E| = \numedge$, $|V| = \numvar$, and $i, j \in [\numvar]$.

Consider the query $\poly_{G}(\vct{X}) = q_E(X_1,\ldots, X_\numvar) = \sum\limits_{(i, j) \in E} X_i \cdot X_j$.

%Original lemma proving the exact coefficient terms in qE3
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%\begin{Lemma}\label{lem:qE3-exp}
%When we expand $\poly_{G}(\vct{X}) = \left(q_E(X_1,\ldots, X_\numvar)\right)^3$ out and assign all exponents $e \geq 1$ a value of $1$, we have the following,
%	\begin{align}
%		&\rpoly_{G}(\prob,\ldots, \prob) = \numocc{G}{\ed}\prob^2 + 6\numocc{G}{\twopath}\prob^3 + 6\numocc{G}{\twodis} + 6\numocc{G}{\tri}\prob^3 + 6\numocc{G}{\oneint}\prob^4 + 6\numocc{G}{\threepath}\prob^4 + 6\numocc{G}{\twopathdis}\prob^5 + 6\numocc{G}{\threedis}\prob^6.\label{claim:four-one}
%	\end{align}
%\end{Lemma}
%
%\begin{proof}[Proof of \cref{lem:qE3-exp}]
%By definition we have that
%		\[\poly_{G}(\vct{X}) = \sum_{\substack{(i_1, j_1),\\ (i_2, j_2),\\ (i_3, j_3) \in E}} \prod_{\ell = 1}^{3}X_{i_\ell}X_{j_\ell}.\]
%		Rather than list all the expressions in full detail, let us make some observations regarding the sum.  Let $e_1 = (i_1, j_1), e_2 = (i_2, j_2), e_3 = (i_3, j_3)$.  Notice that each expression in the sum consists of a triple $(e_1, e_2, e_3)$.  There are three forms the triple $(e_1, e_2, e_3)$ can take.
%
%\textsc{case 1:} $e_1 = e_2 = e_3$, where all edges are the same.  There are exactly $\numedge$ such triples, each with a $\prob^2$ factor in $\rpoly_{G}\left(\prob_1,\ldots, \prob_\numvar\right)$.
%
%\textsc{case 2:}  This case occurs when there are two distinct edges of the three, call them $e$ and $e'$.  When there are two distinct edges, there is then the occurence when $2$ variables in the triple $(e_1, e_2, e_3)$ are bound to $e$.  There are three combinations for this occurrence.  It is the analogue for when there is only one occurrence of $e$, i.e. $2$ of the variables in $(e_1, e_2, e_3)$ are $e'$.  Again, there are three combinations for this.  All $3 + 3 = 6$ combinations of two distinct values consist of the same monomial in $\rpoly$, i.e. $(e_1, e_1, e_2)$ is the same as $(e_2, e_1, e_2)$.  This case produces the following edge patterns: $\twopath, \twodis$.
%
%\textsc{case 3:} $e_1 \neq e_2 \neq e_3$, i.e., when all edges are distinct.  For this case, we have $3! = 6$ permutations of $(e_1, e_2, e_3)$.  This case consists of the following edge patterns: $\tri, \oneint, \threepath, \twopathdis, \threedis$.
%\end{proof}
%\qed
\subsubsection{Multiple Distinct $\prob$ values}
For the following discussion, set $\poly_{G}^\kElem(\vct{X}) = \left(q_E(X_1,\ldots, X_\numvar)\right)^\kElem$.
\begin{Lemma}\label{lem:qEk-multi-p}
Given polynomial $\poly_{G}^\kElem(\prob,\ldots, \prob)$, we can write $\rpoly_{G}^\kElem$ as $\rpoly_{G}^\kElem(\prob,\ldots, \prob) = \sum\limits_{i = 0}^{2\kElem} c_i \cdot \prob^i$ for some fixed terms $\vct{c}$ and $2\kElem + 1$ distinct $\prob$ values, one can compute each $c_i$ in $\vct{c}$ exactly.
\end{Lemma}

\begin{proof}[Proof of ~\cref{lem:qEk-multi-p}]
It is trivial to see that one can readily expand the exponential expression by performing the $n^\kElem$ product operations, yielding the polynomial in the sum of products form of the lemma statement.  By definition $\rpoly_{G}^\kElem$ reduces all variable exponents greater than $1$ to $1$.  Thus, a monomial such as $X_i^\kElem X_j^\kElem$ is $X_iX_j$ in $\rpoly_{G}^\kElem$, and the value after substitution is $p_i\cdot p_j = p^2$.  Further, that the number of terms in the sum is no greater than $2\kElem + 1$, can be easily justified by the fact that each edge has two endpoints, and the most endpoints occur when we have $\kElem$ distinct edges, with non-intersecting points, a case equivalent to $p^{2\kElem}$.

Given that we have $2\kElem + 1$ distinct values of $\prob$ by the lemma statement, it follows that we then have $2\kElem + 1$ linear equations which are distinct.  Further, by construction of the summation, these $2\kElem + 1$ equations collectively form the Vandermonde matrix, from which it follows that we have a matrix with full rank, and we can solve the linear system to determine $\vct{c}$ exactly.
\end{proof}

\qed

\begin{Lemma}\label{lem:qEk-multi-p-k-match}
The number of $\kElem$-matchings in $\poly_{G}^\kElem(\vct{X})$ is exactly $\kElem!\cdot\numocc{G}{\threedis}$.
\end{Lemma}

\begin{proof}[Proof of Lemma ~\ref{lem:qEk-multi-p-k-match}]
A $\kElem$-matching occurs when there are $\kElem$ edges, $e_1, e_2,\ldots, e_\kElem$, such that all of them are disjoint, i.e., $e_1 \neq e_2 \neq \cdots \neq e_\kElem$.  In all $\kElem$ factors of $\poly_{G}^\kElem(\vct{X})$ there are $k$ choices from the first factor to select an edge for a given $\kElem$ matching, $\kElem - 1$ choices in the second factor, and so on throughout all the factors, yielding $\kElem!$ duplicate terms for each $\kElem$ matching in the expansion of $\poly_{G}^\kElem(\vct{X})$.

Thus, the product $\kElem!\cdot\numocc{G}{\threedis}$ is the exact number of $\kElem$-matchings in $\poly_{G}^\kElem(\vct{X})$.
\end{proof}

\qed

\begin{Corollary}\label{cor:lem-qEk}
One can compute $\numocc{G}{\threedis}$ in $\query_{G}^\kElem(\vct{X})$ exactly.
\end{Corollary}

\begin{proof}[Proof for Corollary ~\ref{cor:lem-qEk}]
By ~\cref{lem:qEk-multi-p}, the term $c_{2\kElem}$ can be exactly computed.  By ~\cref{lem:qEk-multi-p-k-match}, we know that $c_{2\kElem}$ can be broken into two factors, and by dividing $c_{2\kElem}$ by the factor $\kElem!$, it follows that the resulting value is indeed $\numocc{G}{\threedis}$.
\end{proof}

\qed

%\begin{Lemma}\label{lem:alt-qEk}
%Given $k$ distinct $\prob$ values and $\poly_{G}^k(\prob,\ldots, \prob)$, one can solve the number of $3$-matchings exactly.
%\end{Lemma}
%
%\begin{proof}[Proof for Lemma ~\ref{lem:alt-qEk}]
%By the same logic as ~\cref{lem:qEk-multi-p} it is the case that there are $k$ $\prob^i$ values for $i$ in $[0, k - 1]$.  This, combined with $k$ distinct $\prob$ values yields the Vandermonde matrix with full rank, and thus all the values $c_i$ in $\vct{c}$ can be computed exactly.  Finally, along the same lines as ~\cref{lem:qEk-multi-p-k-match}, dividing by $k!$ yields the desired result, $\numocc{G}{k-matchings}$.  This can be seen, since it is the case that only a $k-matching$ can have a $\prob^{2k}$ factor, and, secondly, for a $k-product$, there are $k$ choices in the first product, $k - 1$ choices in the second factor, and so on, yielding $k!$ copies of each $k-matching$.
%
%
%\AH{Any suggestions for a better notation/representation of k-matching??}
%\end{proof}
%
%\qed

\begin{Corollary}\label{cor:reduct}
By ~\cref{lem:qEk-multi-p}, ~\cref{lem:qEk-multi-p-k-match}, and ~\cref{cor:lem-qEk} it follows that computing $\rpoly(\vct{X})$ is hard.
\end{Corollary}

%Old proof
%%%%%%%%%%%%%%%%%%%%%
%Notice that ~\cref{lem:qE3-exp} is an example of a query that reduces to the hard problems in graph theory of counting triangles, three-matchings, three-paths, etc.  Thus, in general, computing $\expct_{\vct{w}}\pbox{\poly(\vct{w})} = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ is a hard problem.
%
%\begin{Claim}\label{claim:four-two}
% If one can compute $\rpoly_{G}(\prob,\ldots, \prob)$ in time T(\numedge), then we can compute the following in O(T(\numedge) + \numedge):
%\[\numocc{G}{\tri} + \numocc{G}{\threepath} \cdot \prob - \numocc{G}{\threedis}\cdot(3\prob^2 - \prob^3).\]
%\end{Claim}
%\begin{proof}[Proof of Claim \ref{claim:four-two}]
%%We have shown that the following subgraph cardinalities can be computed in $O(\numedge)$ time:
%%\[\numocc{G}{\ed}, \numocc{G}{\twopath}, \numocc{G}{\twodis}, \numocc{G}{\oneint}, \numocc{G}{\twopathdis} + \numocc{G}{\threedis}.\]
%It has already been shown previously that $\numocc{G}{\ed}, \numocc{G}{\twopath}, \numocc{G}{\twodis},$ and $\numocc{G}{\twopathdis} + 3\numocc{G}{\threedis}$ can be computed in $O(\numedge)$ time.
%
%Using the result of \cref{lem:qE3-exp}, let us show a derivation to the identity of the consequent in \cref{claim:four-two}.
%
%All of \cref{eq:1e}, \cref{eq:2p}, \cref{eq:2m}, \cref{eq:3s}, \cref{eq:2pd-3d} show that we can compute the respective edge patterns in $O(\numedge)$ time.  Rearrange ~\cref{claim:four-one}, $\rpoly_{G}$, with all linear time computations on one side, leaving only the hard computations,
%\begin{align}
%&\rpoly_{G}(\prob,\ldots, \prob) = \numocc{G}{\ed}\prob^2 + 6\numocc{G}{\twopath}\prob^3 + 6\numocc{G}{\twodis}\prob^4 + 6\numocc{G}{\oneint}\prob^4 + 6\numocc{G}{\tri}\prob^3 + 6\numocc{G}{\threepath}\prob^4 + 6\numocc{G}{\twopathdis}\prob^5 + 6\numocc{G}{\threedis}\prob^6\nonumber\\
%&\rpoly_{G}(\prob,\ldots, \prob) - \numocc{G}{\ed}\prob^2 - 6\numocc{G}{\twopath}\prob^3 - 6\numocc{G}{\twodis}\prob^4 - 6\numocc{G}{\oneint}\prob^4 = 6\numocc{G}{\tri}\prob^3 + 6\numocc{G}{\threepath}\prob^4 + 6\numocc{G}{\twopathdis}\prob^5 + 6\numocc{G}{\threedis}\prob^6\label{eq:LS-rearrange}\\
%&\frac{\rpoly_{G}(\prob,\ldots, \prob)}{6\prob^3} - \frac{\numocc{G}{\ed}}{6\prob} - \numocc{G}{\twopath} - \numocc{G}{\twodis}\prob - \numocc{G}{\oneint}\prob = \numocc{G}{\tri} + \numocc{G}{\threepath}\prob + \numocc{G}{\twopathdis}\prob^2 + \numocc{G}{\threedis}\prob^3\label{eq:LS-reduce}\\
%&\frac{\rpoly_{G}(\prob,\ldots, \prob)}{6\prob^3} - \frac{\numocc{G}{\ed}}{6\prob} - \numocc{G}{\twopath} - \numocc{G}{\twodis}\prob - \numocc{G}{\oneint}\prob - \big(\numocc{G}{\twopathdis} + 3\numocc{G}{\threedis}\big)\prob^2 = \numocc{G}{\tri} + \numocc{G}{\threepath}\prob - \numocc{G}{\threedis}\left(3\prob^2 - \prob^3\right)\label{eq:LS-subtract}
%\end{align}
%
%\cref{eq:LS-rearrange} is the result of simply subtracting from both sides terms that have $O(\numedge)$ complexity.  Dividing all terms by the common factor of $6\prob^3$ gives \cref{eq:LS-reduce}.  Equation ~\ref{eq:LS-subtract}, is the result of subtracting the $O(\numedge)$ computable term $\left(\numocc{G}{\twopathdis} + 3\numocc{G}{\threedis}\right)\prob^2$ from both sides.
%
%%\begin{equation}
%%\frac{\rpoly_{G}(\prob,\ldots, \prob)}{6\prob^3} - \frac{\numocc{G}{\ed}}{6\prob} - \numocc{G}{\twopath} - \numocc{G}{\twodis}\prob - \numocc{G}{\oneint}\prob - \big(\numocc{G}{\twopathdis} + 3\numocc{G}{\threedis}\big)\prob^2 = \numocc{G}{\tri} + \numocc{G}{\threepath}\prob - \numocc{G}{\threedis}\left(3\prob^2 - \prob^3\right)
%%\end{equation}
%
%
%The implication in \cref{claim:four-two} follows by the above and \cref{lem:qE3-exp}.
%\end{proof}
%\qed
%
%\begin{Lemma}\label{lem:gen-p}
%If we can compute $\rpoly_{G}(\vct{X})$ in $T(\numedge)$ time for $O(1)$ distinct values $\vct{\prob}$ such that all $\prob_i = \prob$ for all $i \in [\numvar], \prob_i \in \vct{\prob}$, then we can count the number of  triangles, 3-paths, and 3-matchings in $G$ in $T(\numedge) + O(\numedge)$ time.
%\end{Lemma}
%
%\begin{proof}[Proof of \cref{lem:gen-p}]
%
%\cref{claim:four-two} says that if we know $\rpoly_{G}(\prob,\ldots, \prob)$, then we can know in O(\numedge) additional time
%\[\numocc{G}{\tri} + \numocc{G}{\threepath} \cdot \prob - \numocc{G}{\threedis}\cdot(3\prob^2 - \prob^3).\]  We can think of each term in the above equation as a variable, where one can solve a linear system given 3 distinct $\prob$ values, assuming independence of the three linear equations.  In the worst case, without independence, 4 distinct values of $\prob$ would suffice.  This follows from the fact that the corresponding coefficient matrix is the so called Vandermonde matrix, which has full rank
%\end{proof}
%\qed