216 lines
15 KiB
TeX
216 lines
15 KiB
TeX
%root: main.tex
|
|
%!TEX root = ./main.tex
|
|
|
|
\section{Polynomial Formulation}
|
|
|
|
We can think of $\poly(\vct{w})$ as a function whose input are the variables $X_1,\ldots, X_M$ as in $\poly(X_1,\ldots, X_M)$. Denote the sum of products expansion of $\poly(X_1,\ldots, X_\numTup)$ as $\poly(X_1,\ldots, X_\numTup)_{\Sigma}$
|
|
|
|
\begin{Definition}\label{def:qtilde}
|
|
Define $\rpoly(X_1,\ldots, X_\numTup)$ as the reduced version of $\poly(X_1,\ldots, X_\numTup)_{\Sigma}$, of the form
|
|
$\rpoly(X_1,\ldots, X_\numTup) = $
|
|
|
|
\[\poly(X_1,\ldots, X_\numTup)_{\Sigma} \mod \wbit_1^2-\wbit_1\cdots\mod \wbit_\numTup^2 - \wbit_\numTup.\]
|
|
\end{Definition}
|
|
|
|
|
|
Intuitively, $\rpoly(\textbf{X})$ is the expanded sum of products form of $\poly(\textbf{X})$ such that if any $X_j$ term has an exponent $e > 1$, it is reduced to $1$, i.e. $X_j^e\mapsto X_j$ for any $e > 1$.
|
|
Alternatively, one can gain intuition for $\rpoly$ by thinking of $\rpoly$ as the resulting sum of product expansion of $\poly$ when $\poly$ is in a factorized form such that none of its terms have an exponent $e > 1$, with an idempotent product operator.
|
|
|
|
The usefulness of this reduction will be seen shortly.
|
|
|
|
\begin{Lemma}\label{lem:pre-poly-rpoly}
|
|
When $\poly(X_1,\ldots, X_\numTup) = \sum\limits_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}} \cdot \prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numTup}X_i^{d_i}$, we have then that $\rpoly(X_1,\ldots, X_\numTup) = \sum\limits_{\vct{d} \in \{0,\ldots, D\}^\numTup} q_{\vct{d}}\cdot\prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numTup}X_i$.
|
|
\end{Lemma}
|
|
\begin{proof}
|
|
Follows by the construction of $\rpoly$ in \cref{def:qtilde}.
|
|
\end{proof}
|
|
|
|
\qed
|
|
|
|
Note the following fact:
|
|
\begin{Proposition}
|
|
\[\text{For all } (\wbit_1,\ldots, \wbit_\numTup) \in \{0, 1\}^\numTup, \poly(\wbit_1,\ldots, \wbit_\numTup) = \rpoly(\wbit_1,\ldots, \wbit_\numTup).\]
|
|
\end{Proposition}
|
|
|
|
\begin{proof}
|
|
Note that any $\poly$ in factorized form is equivalent to its sum of product expansion. For each term in the expanded form, further note that for all $b \in \{0, 1\}$ and all $e \geq 1$, $b^e = 1$.
|
|
\end{proof}
|
|
|
|
\qed
|
|
|
|
Define all variables $X_i$ in $\poly$ to be independent.
|
|
\begin{Lemma}\label{lem:exp-poly-rpoly}
|
|
The expectation of a possible world in $\poly$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numTup)$.
|
|
\begin{equation*}
|
|
\expct_{\wVec}\pbox{\poly(\wVec)} = \rpoly(\prob_1,\ldots, \prob_\numTup).
|
|
\end{equation*}
|
|
\end{Lemma}
|
|
|
|
\begin{proof}
|
|
%Using the fact above, we need to compute \[\sum_{(\wbit_1,\ldots, \wbit_\numTup) \in \{0, 1\}}\rpoly(\wbit_1,\ldots, \wbit_\numTup)\]. We therefore argue that
|
|
%\[\sum_{(\wbit_1,\ldots, \wbit_\numTup) \in \{0, 1\}}\rpoly(\wbit_1,\ldots, \wbit_\numTup) = 2^\numTup \cdot \rpoly(\frac{1}{2},\ldots, \frac{1}{2}).\]
|
|
|
|
Let $\poly$ be the generalized polynomial, i.e., the polynomial of $\numTup$ variables with highest degree $= D$: %, in which every possible monomial permutation appears,
|
|
\[\poly(X_1,\ldots, X_\numTup) = \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup X_i^{d_i}\].
|
|
|
|
|
|
Then for expectation we have
|
|
\begin{align}
|
|
\expct_{\wVec}\pbox{\poly(\wVec)} &= \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \expct_{\wVec}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup w_i^{d_i}}\label{p1-s1}\\
|
|
&= \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup \expct_{\wVec}\pbox{w_i^{d_i}}\label{p1-s2}\\
|
|
&= \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup \expct_{\wVec}\pbox{w_i}\label{p1-s3}\\
|
|
&= \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup \prob_i\label{p1-s4}\\
|
|
&= \rpoly(\prob_1,\ldots, \prob_\numTup)\label{p1-s5}
|
|
\end{align}
|
|
|
|
In steps \cref{p1-s1} and \cref{p1-s2}, by linearity of expectation (recall the variables are independent), the expecation can be pushed all the way inside of the product. In \cref{p1-s3}, note that $w_i \in \{0, 1\}$ which further implies that for any exponent $e \geq 1$, $w_i^e = w_i$. Next, in \cref{p1-s4} the expectation of a tuple is indeed its probability.
|
|
|
|
%\OK{
|
|
% You don't need to tie this to TI-DBs if you define the variables ($X_i$) to be independent.
|
|
% Annotations
|
|
% Boolean expressions over uncorrelated boolean variables are sufficient to model TI-, BI-, and
|
|
% PC-Tables. This should still hold for arithmetic over the naturals.
|
|
%}
|
|
|
|
|
|
Finally, observe \cref{p1-s5} by construction in \cref{lem:pre-poly-rpoly}, that $\rpoly(\prob_1,\ldots, \prob_\numTup)$ is exactly the product of probabilities of each variable in each monomial across the entire sum.
|
|
|
|
\qed
|
|
\end{proof}
|
|
|
|
\begin{Corollary}
|
|
If $\poly$ is given to us in a sum of monomials form, the expectation of $\poly$ ($\ex{\poly}$) can be computed in $O(|\poly|)$, where $|\poly|$ denotes the total number of multiplication/addition operators.
|
|
\end{Corollary}
|
|
|
|
\begin{proof}
|
|
Note that \cref{lem:exp-poly-rpoly} shows that $\ex{\poly} = \rpoly(\prob_1,\ldots, \prob_\numTup)$. Therefore, if $\poly$ is already in sum of products form, one only needs to compute $\poly(\prob_1,\ldots, \prob_\numTup)$ ignoring exponent terms (note that such a polynomial is $\rpoly(\prob_1,\ldots, \prob_\numTup)$), which is indeed has $O(|\poly|)$ compututations.\qed
|
|
\end{proof}
|
|
|
|
\subsection{When $\poly$ is not in sum of monomials form}
|
|
|
|
|
|
We would like to argue that in the general case there is no computation of expectation in linear time.
|
|
|
|
To this end, consider the following graph $G(V, E)$, where $|E| = m$, $|V| = \numTup$, and $i, j \in [\numTup]$. Consider the query $q_E(X_1,\ldots, X_\numTup) = \sum\limits_{(i, j) \in E} X_i \cdot X_j$.
|
|
\AR{The two lemmas need to be re-written once notation for representing a query is finalized in Section 1.}
|
|
\AH{I think that we are okay with this now. We can use both polynomial and query notation interchangably. Does it matter which we use in the lemmas, i.e. $\poly(\vct{w})$ vs. $\poly(\wElem_1,\ldots, \wElem_N)$. Please let me know.}
|
|
\begin{Lemma}\label{lem:const-p}
|
|
If we can compute $\poly(\wElem_1,\ldots, \wElem_\numTup) = q_E(\wElem_1,\dots, \wElem_\numTup)^3$ in T(m) time for $\wElem_1 =\cdots= \wElem_\numTup = \prob$, then we can count the number of 3-matchings in $G$ in $T(m) + O(m)$ time.
|
|
\end{Lemma}
|
|
|
|
\begin{Lemma}\label{lem:gen-p}
|
|
If we can compute $\poly(\wElem_1,\ldots, \wElem_\numTup) = q_E(\wElem_1,\ldots, \wElem_\numTup)^3$ in T(m) time for O(1) distinct values of $\prob$ then we can count the number of triangles (and the number of 3-paths, the number of 3-matchings) in $G$ in O(T(m) + m) time.
|
|
\end{Lemma}
|
|
|
|
\begin{Lemma}\label{lem:qE3-exp}
|
|
When we expand $\poly(\wElem_1,\ldots, \wElem_N) = q_E(\wElem_1,\ldots, \wElem_\numTup)^3$ out and assign all exponents $e \geq 1$ a value of $1$, we have the following,
|
|
\begin{align}
|
|
&\rpoly(\prob,\ldots, \prob) = \numocc{\ed}\prob^2 + 6\numocc{\twopath}\prob^3 + 6\numocc{\twodis} + 6\numocc{\tri}\prob^3 +\nonumber\\
|
|
&\qquad\qquad6\numocc{\oneint}\prob^4 + 6\numocc{\threepath}\prob^4 + 6\numocc{\twopathdis}\prob^5 + 6\numocc{\threedis}\prob^6.\label{claim:four-one}
|
|
\end{align}
|
|
\end{Lemma}
|
|
|
|
\AH{The warm-up below is fine for now, but will need to be removed for the final draft}
|
|
First, let us do a warm-up by computing $\rpoly(\wElem_1,\dots, \wElem_\numTup)$ when $\poly = q_E(\wElem_1,\ldots, \wElem_\numTup)$. Before doing so, we introduce a notation. Let $\numocc{H}$ denote the number of occurrences that $H$ occurs in $G$. So, e.g., $\numocc{\ed}$ is the number of edges ($m$) in $G$.
|
|
|
|
\AH{We need to make a decision on subgraph notation, and number of occurrences notation. Waiting to hear back from Oliver before making a decision.}
|
|
|
|
\OK{
|
|
I'm not sure what I can add. The existing notation is fine (for now). I would suggest adding
|
|
a definition table.
|
|
}
|
|
\AH{UPDATE: I did a quick google, and it \textit{appears} that there is a bit of a learning curve to implement node/edge symbols in LaTeX. So, maybe, if time is of the essence, we go with another notation.}
|
|
|
|
\begin{Claim}
|
|
We can compute $\rpoly(\prob,\ldots, \prob)^2$ in O(m) time.
|
|
\end{Claim}
|
|
\begin{proof}
|
|
The proof basically follows by definition. When we expand $\poly^2$, and make all exponents $e = 1$, substituting $\prob$ for all $\wElem_i$ we get $\rpoly_2(\prob,\ldots, \prob) = \numocc{\ed} \cdot \prob^2 + 2\cdot \numocc{\twopath}\cdot \prob^3 + 2\cdot \numocc{\twodis}\cdot \prob^4$.
|
|
\begin{enumerate}
|
|
\item First note that
|
|
\begin{align*}
|
|
\poly^2(\wVec) &= \sum_{(i, j) \in E} (\wElem_i\wElem_j)^2 + \sum_{(i, j), (k, \ell) \in E s.t. (i, j) \neq (k, \ell)} \wElem_i\wElem_j\wElem_k\wElem_\ell\\
|
|
&= \sum_{(i, j) \in E} (\wElem_i\wElem_j)^2 + \sum_{\substack{(i, j), (j, \ell) \in E\\s.t. i \neq \ell}}\wElem_i
|
|
\wElem_j^2\wElem_\ell + \sum_{\substack{(i, j), (k, \ell) \in E\\s.t. i \neq j \neq k \neq \ell}} \wElem_i\wElem_j\wElem_k\wElem_\ell\\
|
|
\end{align*}
|
|
By definition of $\rpoly$,
|
|
\begin{equation*}
|
|
\rpoly^2(\wVec) = \sum_{(i, j) \in E} \wElem_i\wElem_j + \sum_{\substack{(i, j), (j, \ell) \in E\\s.t. i \neq \ell}}\wElem_i\wElem_j\wElem_\ell + \sum_{\substack{(i, j), (k, \ell) \in E\\s.t. i \neq j \neq k \neq \ell}} \wElem_i\wElem_j\wElem_k\wElem_\ell\label{eq:part-1}
|
|
\end{equation*}
|
|
Notice that the first term is $\numocc{\ed}\cdot \prob^2$, the second $\numocc{\twopath}\cdot \prob^3$, and the third $\numocc{\twodis}\cdot \prob^4.$
|
|
\item Note that
|
|
\AH{We need the correct formula for two-matchings below.}
|
|
\begin{align*}
|
|
&\numocc{\ed} = m,\\
|
|
&\numocc{\twopath} = \sum_{u \in V} \binom{d_u}{2} \text{where $d_u$ is the degree of vertex $u$}\\ &\numocc{\twodis} = \textbf{\textit{a correct formula}}
|
|
\end{align*}
|
|
\end{enumerate}
|
|
Thus, since each of the summations can be computed in O(m) time, this implies that by \cref{eq:part-1} $\rpoly(\prob,\ldots, \prob)$ can be computed in O(m) time.\qed
|
|
\end{proof}
|
|
\AH{END of the 'warm-up'}
|
|
We are now ready to state the claim we need to prove \cref{lem:const-p} and \cref{lem:gen-p}.
|
|
|
|
Let $\poly(\wVec) = q_E(\wVec)^3$.
|
|
\begin{Claim}\label{claim:four-two}
|
|
If one can compute $\rpoly(\prob,\ldots, \prob)$ in time T(m), then we can compute the following in O(T(m) + m):
|
|
\[\numocc{\tri} + \numocc{\threepath} \cdot \prob - \numocc{\threedis}\cdot(\prob^2 - \prob^3).\]
|
|
\end{Claim}
|
|
|
|
\begin{proof}
|
|
|
|
We have either shown or will show that the following subgraph cardinalities can be computed in $O(m)$ time:
|
|
\[\numocc{\ed}, \numocc{\twopath}, \numocc{\twodis}, \numocc{\oneint}, \numocc{\twopathdis} + \numocc{\threedis}.\]
|
|
|
|
By definition we have that
|
|
\[\poly(\wElem_1,\ldots, \wElem_\numTup) = \sum_{\substack{(i_1, j_1),\\ (i_2, j_2),\\ (i_3, j_3) \in E}} \prod_{\ell = 1}^{3}\wElem_{i_\ell}\wElem_{j_\ell}.\]
|
|
Rather than list all the expressions in full detail, let us make some observations regarding the sum. Let $e_1 = (i_1, j_1), e_2 = (i_2, j_2), e_3 = (i_3, j_3)$. Notice that each expression in the sum consists of a triple $(e_1, e_2, e_3)$. There are three forms the triple $(e_1, e_2, e_3)$ can take.
|
|
|
|
\underline{case 1:} $e_1 = e_2 = e_3$, where all edges are the same. There are exactly $m$ such triples, each with a $\prob^2$ factor.
|
|
|
|
\underline{case 2:} This case occurs when there are two distinct edges of the three. All 6 combinations of two distinct values consist of the same monomial in $\rpoly$, i.e. $(e_1, e_1, e_2)$ is the same as $(e_2, e_1, e_2)$. This case produces the following edge patterns: $\twodis, \twopath$.
|
|
|
|
\underline{case 3:} $e_1 \neq e_2 \neq e_3$, i.e., when all edges are distinct. This case consists of the following edge patterns: $\threedis, \twopathdis, \threepath, \oneint, \tri$.
|
|
|
|
\AH{This proof I think could some reorganization. We really don't need the warm-up anymore, but we can use the formulas for case 1 and case 2.}
|
|
|
|
It has already been shown previously that $\numocc{\ed}, \numocc{\twopath}, \numocc{\twodis}$ can be computed in O(m) time. Here are the arguments for the rest.
|
|
\[\numocc{\oneint} = \sum_{u \in V} \binom{d_u}{3}\]
|
|
$\numocc{\twopathdis} + \numocc{\threedis} = $ the number of occurrences of three distinct edges with five or six vertices. This can be counted in the following manner. For every edge $(u, v) \in E$, throw away all neighbors of $u$ and $v$ and pick two more distinct edges.
|
|
\[\numocc{\twopathdis} + \numocc{\threedis} = \sum_{(u, v) \in E} \binom{m - d_u - d_v + 1}{2}\] The implication in \cref{claim:four-two} follows by the above and \cref{lem:qE3-exp}.
|
|
|
|
\AH{Justify the last sentence.}
|
|
\end{proof}
|
|
|
|
\qed
|
|
|
|
\begin{proof}[Proof of \cref{lem:gen-p}]
|
|
|
|
%\AR{Also you can modify the text of \textsc{Proof} by using the following latex command \texttt{\\begin\{proof\}[Proof of Lemma 2]} and Latex will typeset this as \textsc{Proof of Lemma 2}, which is what you really want.}
|
|
|
|
\cref{claim:four-two} says that if we know $\rpoly_3(\prob,\ldots, \prob)$, then we can know in O(m) additional time
|
|
\[\numocc{\tri} + \numocc{\threepath} \cdot \prob - \numocc{\threedis}\cdot(\prob^2 - \prob^3).\] We can think of each term in the above equation as a variable, where one can solve a linear system given 3 distinct $\prob$ values, assuming independence of the three linear equations. In the worst case, without independence, 4 distince values of $\prob$ would suffice...because Atri said so, and I need to ask him for understanding why this is the case, of which I suspect that it has to do with basic result(s) in linear algebra.\AR{Follows from the fact that the corresponding coefficient matrix is the so called Vandermonde matrix, which has full rank.}
|
|
\AH{This Vandermonde matrix I need to research.}
|
|
\end{proof}
|
|
|
|
\qed
|
|
|
|
\AH{Below is only a transcription of the notes. The claims need to be verified and further worked out.}
|
|
|
|
\begin{proof}[Proof of \cref{lem:const-p}]
|
|
|
|
The argument for \cref{lem:gen-p} cannot be applied to \cref{lem:const-p} since we have that $\prob$ is fixed. We have hope in the following: we assume that we can solve this problem for all graphs, and the hope would be be to solve the problem for say $G_1, G_2, G_3$, where $G_1$ is arbitrary, and relate the values of $\numocc{H}$, where $H$ is a placeholder for the relevant edge combination. The hope is that these relations would result in three independent linear equations, and then we would be done.
|
|
|
|
The following is an option.
|
|
\begin{enumerate}
|
|
\item Let $G_1$ be an arbitrary graph
|
|
\item Build $G_2$ from $G_1$, where each edge in $G_1$ gets replaced by a 2 path.
|
|
\end{enumerate}
|
|
|
|
Then $\numocc{\tri}_2 = 0$, and if we can prove that
|
|
\begin{itemize}
|
|
\item $\numocc{\threepath}_2 = 2 \cdot \numocc{\twopath}_1$
|
|
\item $\numocc{\threedis}_2 = 8 \cdot \numocc{\threedis}_1$
|
|
\end{itemize}
|
|
we solve our problem for $q_E^3$ based on $G_2$ and we can compute $\numocc{\threedis}$, a hard problem.
|
|
\end{proof}
|