Just about finished 1st subsubsection of subsection 3.

master
Aaron Huber 2022-01-26 18:11:14 -05:00
parent a1c86633ac
commit f5567e5e94
2 changed files with 16 additions and 16 deletions

View File

@ -143,20 +143,21 @@ Finally, note that there are exactly three cases where the expectation of a mono
\subsection{Proof for Lemma~\ref{lem:exp-poly-rpoly}}\label{subsec:proof-exp-poly-rpoly}
\begin{proof}
Let $\poly$ be a polynomial of $\numvar$ variables with highest degree $= B$, defined as follows: %, in which every possible monomial permutation appears,
Let $\poly$ be a polynomial of $\abs{\tupset}$ variables with highest degree $= B$, defined as follows: %, in which every possible monomial permutation appears,
\[\poly(X_1,\ldots, X_\numvar) = \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar X_i^{d_i}.\]
Let the boolean function $\isInd{\cdot}$ take $\vct{d}$ as input and return true if there does not exist any dependent variables in $\vct{d}$, i.e., $\not\exists ~\block, i\neq j\suchthat d_{\block, i}, d_{\block, j} \geq 1$.\footnote{This \abbrBIDB notation is used and discussed in \cref{subsec:tidbs-and-bidbs}}.
Then in expectation we have
%Let the boolean function $\isInd{\cdot}$ take $\vct{d}$ as input and return true if there does not exist any dependent variables in $\vct{d}$, i.e., $\not\exists ~\block, i\neq j\suchthat d_{\block, i}, d_{\block, j} \geq 1$.\footnote{This \abbrBIDB notation is used and discussed in \cref{subsec:tidbs-and-bidbs}}.
In expectation we have
\begin{align}
\expct_{\vct{\randWorld}}\pbox{\poly(\vct{\randWorld})} &= \expct_{\vct{\randWorld}}\pbox{\sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i} + \sum_{\substack{\vct{d} \in \{0,\ldots, B\}^\numvar\\\wedge ~\neg\isInd{\vct{d}}}} c_{\vct{d}}\cdot\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar\randWorld_i^{d_i}}\label{p1-s1a}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \expct_{\vct{\randWorld}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i}} + \sum_{\substack{\vct{d} \in \{0,\ldots, B\}^\numvar\\\wedge ~\neg\isInd{\vct{d}}}} c_{\vct{d}}\cdot\expct_{\vct{\randWorld}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar\randWorld_i^{d_i}}\label{p1-s1b}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\~\wedge\isInd{\vct{d}}}}c_{\vct{d}}\cdot \expct_{\vct{\randWorld}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i}}\label{p1-s1c}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{\randWorld}}\pbox{\randWorld_i^{d_i}}\label{p1-s2}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{\randWorld}}\pbox{\randWorld_i}\label{p1-s3}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\
\expct_{\vct{\randWorld}\sim\bpd}\pbox{\poly(\vct{\randWorld})} &= \expct_{\vct{\randWorld\sim\bpd}}\pbox{\sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i}} \label{p1-s1a}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar}}c_{\vct{d}}\cdot \expct_{\vct{\randWorld\sim\bpd}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i}}\label{p1-s1c}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{\randWorld}\sim\bpd}\pbox{\randWorld_i^{d_i}}\label{p1-s2}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{\randWorld}\sim\bpd}\pbox{\randWorld_i}\label{p1-s3}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\
&= \rpoly(\prob_1,\ldots, \prob_\numvar).\label{p1-s5}
\end{align}
\Cref{p1-s1a} is the result of substituting in the definition of $\poly$ given above. Then we arrive at \cref{p1-s1b} by linearity of expectation. Next, \cref{p1-s1c} is the result of the independence constraint of \abbrBIDB\xplural, specifically that any monomial composed of dependent variables, i.e., variables from the same block $\block$, has a probability of $0$. \Cref{p1-s2} is obtained by the fact that all variables in each monomial are independent, which allows for the expectation to be pushed through the product. In \cref{p1-s3}, since $\randWorld_i \in \{0, 1\}$ it is the case that for any exponent $e \geq 1$, $\randWorld_i^e = \randWorld_i$. Next, in \cref{p1-s4} the expectation of a tuple is indeed its probability.
\Cref{p1-s1a} is the result of substituting in the definition of $\poly$ given above. Then we arrive at \cref{p1-s1c} by linearity of expectation.
%Next, is the result of the independence constraint of \abbrBIDB\xplural, specifically that any monomial composed of dependent variables, i.e., variables from the same block $\block$, has a probability of $0$.
\Cref{p1-s2} is obtained by the fact that all variables in each surviving monomial are independent, which allows for the expectation to be pushed through the product. In \cref{p1-s3}, since $\randWorld_i \in \{0, 1\}$ it is the case that for any exponent $e \geq 1$, $\randWorld_i^e = \randWorld_i$. Next, in \cref{p1-s4} the expectation of a $1$-\abbrBIDB tuple is indeed its probability, since the only worlds considered are all those (and only those) which contribute to its marginal probability.
Finally, it can be verified that \Cref{p1-s5} follows since \cref{p1-s4} satisfies the construction of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ in \Cref{def:reduced-bi-poly}.
\qed

View File

@ -224,7 +224,7 @@ The lineage polynomial for $Q^2$ is given by $\poly^2\inparen{A, B, C, E, X, Y,
$$
=A^2X^2B^2 + B^2Y^2E^2 + B^2Z^2C^2 + 2AXB^2YE + 2AXB^2ZC + 2B^2YEZC.
$$
By exploiting linearity of expectation, further pushing expectation through independent \abbrTIDB variables and observing that for any $\randWorld\in\{0, 1\}$, we have $\randWorld^2=\randWorld$, the expectation is
By exploiting linearity of expectation, further pushing expectation through independent variables and observing that for any $\randWorld\in\{0, 1\}$, we have $\randWorld^2=\randWorld$, the expectation is
$\expct\limits_{\vct{\randWorld}\sim\pdassign}\pbox{\poly^2\inparen{\vct{\randWorld}}}$ (where $\randWorld_A$ is the random variable corresponding to $A$, distributed by $\pdassign$).
%Atri: Combined the the first step below with the next one to save space.
@ -246,10 +246,10 @@ $\expct\limits_{\vct{\randWorld}\sim\pdassign}\pbox{\poly^2\inparen{\vct{\randWo
\end{footnotesize}
\noindent This property leads us to consider a structure related to the lineage polynomial.
\begin{Definition}\label{def:reduced-poly}
For any polynomial $\poly(\vct{X})$ corresponding to a \abbrTIDB (henceforth, \abbrTIDB-lineage polynomial),
For any polynomial $\poly(\vct{X})$ corresponding to a \abbrCTIDB (henceforth, \abbrCTIDB-lineage polynomial),
%\BG{Better introduce the notion of TIDB lin poly before here, then it iis more clear?},
%Atri: Done
define the \emph{reduced polynomial} $\rpoly(\vct{X})$ to be the polynomial obtained by setting all exponents $e > 1$ in the \abbrSMB form of $\poly(\vct{X})$ to $1$.
define the \emph{reduced polynomial} $\rpoly(\vct{X})$ to be the polynomial obtained by setting all exponents $e > 1$ in the standard monomial basis (\abbrSMB) form of $\poly(\vct{X})$ to $1$.
\end{Definition}
With $\poly^2\inparen{A, B, C, E, X, Y, Z}$ as an example, we have:
\begin{align*}
@ -260,9 +260,8 @@ Note that we have argued that for our specific example the expectation that we w
%It can be verified that the reduced polynomial parameterized with each variable's respective marginal probability is a closed form of the expected count (i.e., $\expct\limits_{\vct{\randWorld}\sim\pd}\pbox{\Phi^2\inparen{\vct{X}}} = \widetilde{\Phi^2}(\probOf\pbox{A=1},$ $\probOf\pbox{B=1}, \probOf\pbox{C=1}), \probOf\pbox{D=1}, \probOf\pbox{X=1}, \probOf\pbox{Y=1}, \probOf\pbox{Z=1})$).
\Cref{lem:tidb-reduce-poly} generalizes the equivalence to {\em all} $\raPlus$ queries on \abbrTIDB\xplural (proof in \Cref{subsec:proof-exp-poly-rpoly}).
\begin{Lemma}\label{lem:tidb-reduce-poly}
Let $\pdb$ be a \abbrTIDB over $n$ input tuples
such that the probability distribution $\pdassign$ over $\vct{W}\in\{0,1\}^\numvar$ (the set of possible worlds) is induced by the probability vector $\probAllTup = \inparen{\prob_1,\ldots,\prob_\numvar}$ where $\prob_i=\probOf\inparen{W_i=1}$.
For any \abbrTIDB-lineage polynomial
Let $\pdb$ be a $1$-\abbrBIDB such that the probability distribution $\pdassign$ over $\vct{W}\in\{0,1\}^{\abs{\tupset}}$ (the set of all worlds) is induced by the disjoint condition and the probability vector $\probAllTup = \inparen{\prob_1,\ldots,\prob_{\abs{\tupset}}}$ where $\prob_i=\probOf\inparen{W_i=1}$.
For any $1$-\abbrBIDB-lineage polynomial
%\BG{Term has not been introduced yet.}
%Atri: fixed
$\poly\inparen{\vct{X}}=\apolyqdt(\vct{X})$, it holds that $