%root: main.tex %!TEX root = ./main.tex %\onecolumn \subsection{Polynomial Formulation and Equivalences} Since we have shown that computing the expected multiplicity of a result tuple is equivalent to computing the expectation of a polynomial (for that tuple) given a probability distribution over all possible assignments of variables in the polynomial to $\{0,1\}$, we from now on focus on this problem exclusively. Before proceeding, note that the following is assuming \bis (which subsume \tis as a special case). Thus, variables are independent of each other and each variable $X$ is associated with a probability $\vct{p}(X)$. Let us use the expression $(x + y)^2$ for a running example in the following definitions. \begin{Definition}[Monomial]\label{def:monomial} A monomial is a product of a fixed set of variables, each raised to a non-negative integer power. \end{Definition} For the term $2xy$, by ~\cref{def:monomial} the monomial is $xy$. \begin{Definition}[Standard Monomial Basis]\label{def:smb} A polynomial is in standard monomial basis when it is fully expanded out such that no product of sums exist and where each unique monomial appears exactly once. \end{Definition} The standard monomial basis for the running example is $x^2 +2xy + y^2$. While $x^2 + xy + xy + y^2$ is an expanded form of the expression, it is not the standard monomial basis since $xy$ appears more than once. Throughout this paper, we also make the following \textit{assumption}. \begin{Assumption}\label{assump:poly-smb} All polynomials considered are in standard monomial basis, i.e., $\poly(\vct{X}) = \sum\limits_{\vct{d} \in \mathbb{N}^\numvar}q_d \cdot \prod\limits_{i = 1, d_i \geq 1}^{\numvar}X_i^{d_i}$, where $q_d$ is the coefficient for the monomial encoded in $\vct{d}$ and $d_i$ is the $i^{th}$ element of $\vct{d}$. \end{Assumption} While the definition of polynomial $\poly(\vct{X})$ over a $\bi$ input doesn't change, we introduce an alternative notation which will come in handy. Given $\ell$ blocks, we write $\poly(\vct{X})$ = $\poly(X_{\block_1, 1},\ldots, X_{\block_1, \abs{\block_1}},$ $\ldots, X_{\block_\ell, \abs{\block_\ell}})$, where $\abs{\block_i}$ denotes the size of $\block_i$, and $\block_{i, j}$ denotes tuple $j$ residing in block $i$ for $j$ in $[\abs{\block_i}]$. The number of tuples in the $\bi$ instance can be (trivially) computed as $\numvar = \sum\limits_{i = 1}^{\ell}\abs{\block_i}$ . \begin{Definition}[Degree]\label{def:degree} The degree of polynomial $\poly(\vct{X})$ is the maximum sum of the exponents of a monomial, over all monomials when $\poly(\vct{X})$ is in SOP form. \end{Definition} The degree of the running example is $2$. In this paper we consider only finite degree polynomials. \begin{Definition}[$\rpoly(\vct{X})$] \label{def:qtilde} Define $\rpoly(X_1,\ldots, X_\numvar)$ as the reduced version of $\poly(X_1,\ldots, X_\numvar)$, of the form $\rpoly(X_1,\ldots, X_\numvar) = $ \[\poly(X_1,\ldots, X_\numvar) \mod X_1^2-X_1\cdots\mod X_\numvar^2 - X_\numvar.\] \end{Definition} \begin{Example}\label{example:qtilde} Consider when $\poly(x, y) = (x + y)(x + y)$. Then the expanded derivation for $\rpoly(x, y)$ is \begin{align*} (&x^2 + 2xy + y^2 \mod x^2 - x) \mod y^2 - y\\ = ~&x + 2xy + y^2 \mod y^2 - y\\ = ~& x + 2xy + y \end{align*} \end{Example} Intuitively, $\rpoly(\textbf{X})$ is the SOP form of $\poly(\textbf{X})$ such that if any $X_j$ term has an exponent $e > 1$, it is reduced to $1$, i.e. $X_j^e\mapsto X_j$ for any $e > 1$. Alternatively, one can gain intuition for $\rpoly$ by thinking of $\rpoly$ as the resulting SOP of $\poly(\vct{X})$ with an idemptent product operator. When considering $\bi$ input, it becomes necessary to redefine $\rpoly(\vct{X})$. \begin{Definition}[$\rpoly$ $\bi$ Redefinition] A polynomial $\poly(\vct{X})$ over a $\bi$ instance is reduced to $\rpoly(\vct{X})$ with the following criteria. First, all exponents $e > 1$ are reduced to $e = 1$. Second, all monomials sharing the same $\block$ are dropped. Formally this is expressed as \begin{equation*} \rpoly(\vct{X}) = \poly(\vct{X}) \mod X_i^2 - X_i \mod X_{\block_s, t}X_{\block_s, u} \end{equation*} for all $i$ in $[\numvar]$ and for all $s$ in $\ell$, such that for all $t, u$ in $[\abs{block_s}]$, $t \neq u$. \end{Definition} The usefulness of this reduction will be seen in ~\cref{lem:exp-poly-rpoly}. \begin{Lemma}\label{lem:pre-poly-rpoly} When $\poly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}} \cdot \prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i^{d_i}$, we have then that $\rpoly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \{0,\ldots, B\}^\numvar} q_{\vct{d}}\cdot\prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i$. \end{Lemma} \begin{proof} Follows by the construction of $\rpoly$ in \cref{def:qtilde}. \end{proof} \qed Note the following fact: \begin{Proposition}\label{proposition:q-qtilde} \[\text{For all } (X_1,\ldots, X_\numvar) \in \{0, 1\}^\numvar, \poly(X_1,\ldots, X_\numvar) = \rpoly(X_1,\ldots, X_\numvar).\] \end{Proposition} \begin{proof}[Proof for Proposition ~\ref{proposition:q-qtilde}] Note that any $\poly$ in factorized form is equivalent to its sum of product expansion. For each term in the expanded form, further note that for all $b \in \{0, 1\}$ and all $e \geq 1$, $b^e = b$. \end{proof} \qed Define all variables $X_i$ in $\poly$ to be independent. \begin{Lemma}\label{lem:exp-poly-rpoly} The expectation over possible worlds in $\poly(\vct{X})$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numvar)$. \begin{equation*} \expct_{\vct{w}}\pbox{\poly(\vct{w})} = \rpoly(\prob_1,\ldots, \prob_\numvar). \end{equation*} \end{Lemma} Note that in the preceding lemma, we have assigned $\vct{p}$ (introduced in ~\cref{subsec:def-data}) to the variables $\vct{X}$. \begin{proof}[Proof for Lemma ~\ref{lem:exp-poly-rpoly}] %Using the fact above, we need to compute \[\sum_{(\wbit_1,\ldots, \wbit_\numvar) \in \{0, 1\}}\rpoly(\wbit_1,\ldots, \wbit_\numvar)\]. We therefore argue that %\[\sum_{(\wbit_1,\ldots, \wbit_\numvar) \in \{0, 1\}}\rpoly(\wbit_1,\ldots, \wbit_\numvar) = 2^\numvar \cdot \rpoly(\frac{1}{2},\ldots, \frac{1}{2}).\] Let $\poly$ be the generalized polynomial, i.e., the polynomial of $\numvar$ variables with highest degree $= B$: %, in which every possible monomial permutation appears, \[\poly(X_1,\ldots, X_\numvar) = \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar X_i^{d_i}\]. Then, assigning $\vct{w}$ to $\vct{X}$, for expectation we have \begin{align} \expct_{\vct{w}}\pbox{\poly(\vct{w})} &= \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \expct_{\vct{w}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar w_i^{d_i}}\label{p1-s1}\\ &= \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i^{d_i}}\label{p1-s2}\\ &= \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i}\label{p1-s3}\\ &= \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\ &= \rpoly(\prob_1,\ldots, \prob_\numvar)\label{p1-s5} \end{align} In steps \cref{p1-s1} and \cref{p1-s2}, by linearity of expectation (recall the variables are independent), the expecation can be pushed all the way inside of the product. In \cref{p1-s3}, note that $w_i \in \{0, 1\}$ which further implies that for any exponent $e \geq 1$, $w_i^e = w_i$. Next, in \cref{p1-s4} the expectation of a tuple is indeed its probability. %\OK{ % You don't need to tie this to TI-DBs if you define the variables ($X_i$) to be independent. % Annotations % Boolean expressions over uncorrelated boolean variables are sufficient to model TI-, BI-, and % PC-Tables. This should still hold for arithmetic over the naturals. %} Finally, observe \cref{p1-s5} by construction in \cref{lem:pre-poly-rpoly}, that $\rpoly(\prob_1,\ldots, \prob_\numvar)$ is exactly the product of probabilities of each variable in each monomial across the entire sum. \qed \end{proof} \begin{Corollary}\label{cor:expct-sop} If $\poly$ is given as a sum of monomials, the expectation of $\poly$, i.e., $\expct\pbox{\poly} = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ can be computed in $O(|\poly|)$, where $|\poly|$ denotes the total number of multiplication/addition operators. \end{Corollary} \begin{proof}[Proof For Corollary ~\ref{cor:expct-sop}] Note that \cref{lem:exp-poly-rpoly} shows that $\expct\pbox{\poly} =$ $\rpoly(\prob_1,\ldots, \prob_\numvar)$. Therefore, if $\poly$ is already in sum of products form, one only needs to compute $\poly(\prob_1,\ldots, \prob_\numvar)$ ignoring exponent terms (note that such a polynomial is $\rpoly(\prob_1,\ldots, \prob_\numvar)$), which indeed has $O(|\poly|)$ compututations.\qed \end{proof}