%root: main.tex %!TEX root = ./main.tex %\onecolumn %\subsection{Reduced Polynomials and Equivalences} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %\AH{\Cref{def:reduced-poly} replaces this.} %\begin{Definition}[Reduced \bi Polynomials]\label{def:reduced-bi-poly} % Let $\poly(\vct{X})$ be a \bi-lineage polynomial. % The reduced form $\rpoly(\vct{X})$ of $\poly(\vct{X})$ is the same as \Cref{def:reduced-poly} with the added constraint that all monomials with variables $X_{\block, i}, X_{\block, j}, i\neq j$ from the same block $\block$ are omitted. %\end{Definition} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % %Consider a $\abbrBIDB$ polynomial $\poly\inparen{\vct{X}} = X_{1, 1}X_{1, 2} + X_{1, 2}X_{2, 1}^2$. Then by \Cref{def:reduced-bi-poly}, we have that $\rpoly\inparen{\vct{X}} = X_{1, 2}X_{2, 1}$. Next, we show why the reduced form is useful for our purposes. %%Removing this example to save space \iffalse \begin{Example}\label{example:qtilde} Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blocks. The expanded derivation for $\rpoly(X, Y)$ is \begin{align*} (&X^2 + 2XY + Y^2 \mod X^2 - X) \mod Y^2 - Y\\ = ~&X + 2XY + Y^2 \mod Y^2 - Y\\ = ~& X + 2XY + Y \end{align*} \end{Example} \fi %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %\begin{Lemma}\label{lem:exp-poly-rpoly} %Let $\pdb$ be a \abbrBIDB over $\numvar$ input tuples such that the probability distribution $\pdassign$ over $\{0,1\}^\numvar$ (the all worlds set) is induced by the probability vector $\probAllTup = (\prob_1, \ldots, \prob_\numvar)$. As in \Cref{lem:tidb-reduce-poly} for \abbrTIDB, any \abbrBIDB-lineage polynomial $\poly(\vct{X})$ based on $\pdb$ and query $\query$ we have: % % The expectation over possible worlds in $\poly(\vct{X})$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numvar)$. %\begin{equation*} %\expct_{\vct{W}\sim \pdassign}\pbox{\poly(\vct{W})} = \rpoly(\probAllTup). %\end{equation*} %\end{Lemma} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Let $\abs{\poly}$ be the number of operators in $\poly$. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Corollary}\label{cor:expct-sop} If $\poly$ is a $1$-\abbrBIDB lineage polynomial already in \abbrSMB, then the expectation of $\poly$, i.e., $\expct\pbox{\poly} = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ can be computed in $\bigO{\abs{\poly}}$ time. \end{Corollary} \secrev{ Queries over probabilistic databases are traditionally viewed as being evaluated using the so-called possible world semantics. A general bag-\abbrPDB can be defined as the pair $\bpd = \inparen{\Omega, \bpd}$ where $\Omega$ is the set of possible worlds represented by $\pdb$. Under the possible world semantics, the result of a query $\query$ over an incomplete database $\Omega$ is the set of query answers produced by evaluating $\query$ over each possible world $\omega\in\Omega$: $\inset{\query\inparen{\omega}: \omega\in\Omega}$. The result of a query is the pair $\inparen{\query\inparen{\omega}, \bpd'}$ where $\bpd'$ is a probability distribution that assigns to each possible query result the sum of the probabilites of the worlds that produce this answer: $\probOf\pbox{\omega\in\Omega} = \sum_{\omega'\in\Omega,\\\query\inparen{\omega'}=\query\inparen{\omega}}\probOf\pbox{\omega'}$. } Suppose that $\bpd$ is a $1$-\abbrBIDB. Instead of looking only at the possible worlds of $\pdb$, one can consider all worlds, including those that cannot exist due to, e.g., disjointness. The all worlds set can be modeled by $\worldvec\in \{0, 1\}^{\bound\numvar}$, such that $\worldvec_{\tup, j} \in \worldvec$ represents whether or not the multiplicity of $\tup$ is $j$ (\emph{here and later, especially in \Cref{sec:algo}, we will rename the variables as $X_1,\dots,X_n$, where $n=\sum_{i=1}^\ell \abs{b_i}$}).%(where $k = \sum_{\ell = 1}^{i - 1} \abs{b_\ell} + j$). We can denote a probability distribution over all $\worldvec \in \{0, 1\}^{\bound\numvar}$ as $\bpd'$. When $\bpd'$ is the one induced from each $\prob_{\tup, j}$ while assigning $\probOf\pbox{\worldvec} = 0$ for any $\worldvec$ with $\worldvec_{\tup, j}, \worldvec_{\tup, j'} \geq 1$ for $j\neq j'$, we end up with a bijective mapping from $\bpd$ to $\bpd'$, such that each mapping is equivalent, implying the distributions are equivalent. \Cref{subsec:supp-mat-ti-bi-def} has more details. Recall \Cref{fig:nxDBSemantics} again, which defines the lineage polynomial $\apolyqdt$ for any $\raPlus$ query. We now make a meaningful connection between possible world semantics and world assignments on the lineage polynomial. \begin{Proposition}[Expectation of polynomials]\label{prop:expection-of-polynom} Given a \abbrBPDB $\pdb = (\Omega,\bpd)$, $\raPlus$ query $\query$, and lineage polynomial $\apolyqdt$ for arbitrary result tuple $\tup$, %$\semNX$-\abbrPDB $\pxdb = (\idb_{\semNX}',\pd')$ where $\rmod(\pxdb) = \pdb$, we have (denoting $\randDB$ as the random variable over $\Omega$): $ \expct_{\randDB \sim \bpd}[\query(\randDB)(t)] = \expct_{\vct{\randWorld}\sim \pdassign}\pbox{\apolyqdt\inparen{\vct{\randWorld}}}. $ \end{Proposition} \noindent A formal proof of \Cref{prop:expection-of-polynom} is given in \Cref{subsec:expectation-of-polynom-proof}.\footnote{Although \Cref{prop:expection-of-polynom} follows, e.g., as an obvious consequence of~\cite{IL84a}'s Theorem 7.1, we are unaware of any formal proof for bag-probabilistic databases.} We focus on the problem of computing $\expct_\pdassign\pbox{\apolyqdt\inparen{\vct{\randWorld}}}$ from now on, assume implicit $\query, \tupset, \tup$, and drop them from $\apolyqdt$ (i.e., $\poly\inparen{\vct{X}}$ will denote a polynomial). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% Local Variables: %%% mode: latex %%% TeX-master: "main" %%% End: