paper-BagRelationalPDBsAreHard/poly-form.tex

%root: main.tex
%!TEX root = ./main.tex
%\onecolumn
%\subsection{Reduced Polynomials and Equivalences}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\AH{\Cref{def:reduced-poly} replaces this.}
%\begin{Definition}[Reduced \bi Polynomials]\label{def:reduced-bi-poly}
%  Let $\poly(\vct{X})$ be a \bi-lineage polynomial.
%  The reduced form $\rpoly(\vct{X})$ of $\poly(\vct{X})$ is the same as \Cref{def:reduced-poly} with the added constraint that all monomials with variables $X_{\block, i}, X_{\block, j}, i\neq j$ from the same block $\block$ are omitted.
%\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%
%Consider a $\abbrBIDB$ polynomial $\poly\inparen{\vct{X}} = X_{1, 1}X_{1, 2} + X_{1, 2}X_{2, 1}^2$.  Then by \Cref{def:reduced-bi-poly}, we have that $\rpoly\inparen{\vct{X}} = X_{1, 2}X_{2, 1}$.  Next, we show why the reduced form is useful for our purposes.
%%Removing this example to save space
\iffalse
\begin{Example}\label{example:qtilde}
Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blocks.  The expanded derivation for $\rpoly(X, Y)$ is
\begin{align*}
(&X^2 + 2XY + Y^2 \mod X^2 - X) \mod Y^2 - Y\\
= ~&X + 2XY + Y^2 \mod Y^2 - Y\\
= ~& X + 2XY + Y
\end{align*}
\end{Example}
\fi
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{Lemma}\label{lem:exp-poly-rpoly}
%Let $\pdb$ be a \abbrBIDB over $\numvar$ input tuples such that the probability distribution $\pdassign$ over $\{0,1\}^\numvar$ (the all worlds set) is induced by the probability vector $\probAllTup = (\prob_1, \ldots, \prob_\numvar)$.  As in \Cref{lem:tidb-reduce-poly} for \abbrTIDB, any \abbrBIDB-lineage polynomial $\poly(\vct{X})$ based on $\pdb$ and query $\query$ we have:
%  % The expectation over possible worlds in $\poly(\vct{X})$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
%\begin{equation*}
%\expct_{\vct{W}\sim \pdassign}\pbox{\poly(\vct{W})}  = \rpoly(\probAllTup).
%\end{equation*}
%\end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Let $\abs{\poly}$ be the number of operators in $\poly$.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Corollary}\label{cor:expct-sop}
If $\poly$ is a $1$-\abbrBIDB lineage polynomial already in \abbrSMB, then the expectation of $\poly$, i.e., $\expct\pbox{\poly} = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ can be computed in $\bigO{\abs{\poly}}$ time.
\end{Corollary}

\subsubsection{Possible World Semantics}\label{subsub:possible-world-sem}
In this section, we show how the traditional possible worlds semantics corresponds to our setup.  Readers can safely skip this part without missing anything vital to the results of this paper.

Queries over probabilistic databases are traditionally viewed as being evaluated using the so-called possible world semantics.  A general bag-\abbrPDB can be defined as the pair $\pdb = \inparen{\Omega, \bpd}$ where $\Omega$ is the set of possible worlds represented by $\pdb$ and $\bpd$ the probability distribution over $\Omega$.  Under the possible world semantics, the result of a query $\query$ over an incomplete database $\Omega$ is the set of query answers produced by evaluating $\query$ over each possible world $\omega\in\Omega$: $\inset{\query\inparen{\omega}: \omega\in\Omega}$.
The result of a query is the pair $\inparen{\query\inparen{\Omega}, \bpd'}$ where $\bpd'$ is a probability distribution that assigns to each possible query result the sum of the probabilites of the worlds that produce this answer: $\probOf\pbox{\omega\in\Omega} = \sum_{\omega'\in\Omega,\\\query\inparen{\omega'}=\query\inparen{\omega}}\probOf\pbox{\omega'}$.


Suppose that $\pdb''$ is a reduced \abbrOneBIDB from \abbrCTIDB $\pdb'$ as defined by~\Cref{def:ctidb-reduct}.  Instead of looking only at the possible worlds of $\pdb''$, one can consider the set of all worlds, including those that cannot exist due to, e.g., disjointness.  Since $\abs{\tupset'} = \numvar$ the all worlds set can be modeled by $\worldvec\in \{0, 1\}^{\numvar\bound}$, such that $\worldvec_{\tup, j} \in \worldvec$ represents whether or not the multiplicity of $\tup$ is $j$ (\emph{here and later, especially in \Cref{sec:algo}, we will rename the variables as $X_1,\dots,X_{\numvar'}$, where $\numvar'=\sum_{\tup\in\tupset}\abs{\block_\tup}$}).
\footnote{
In this example, $\abs{\block_\tup} = \bound$ for all $\tup$.
}%(where $k = \sum_{\ell = 1}^{i - 1} \abs{b_\ell} + j$).
  We can denote a probability distribution over all $\worldvec \in \{0, 1\}^{\numvar\bound}$ as $\bpd''$.  When $\bpd''$ is the one induced from each $\prob_{\tup, j}$ while assigning $\probOf\pbox{\worldvec} = 0$ for any $\worldvec$ with $\worldvec_{\tup, j}, \worldvec_{\tup, j'} \neq 0$ for $j\neq j'$, we end up with a bijective mapping from $\bpd$ to $\bpd''$, such that each mapping is equivalent, implying the distributions are equivalent, and thus query results.
\Cref{subsec:supp-mat-ti-bi-def} has more details. \medskip


We now make a meaningful connection between possible world semantics and world assignments on the lineage polynomial.

\begin{Proposition}[Expectation of polynomials]\label{prop:expection-of-polynom}
Given a \abbrBPDB $\pdb = (\Omega,\bpd)$, $\raPlus$ query $\query$, and lineage polynomial $\apolyqdt$ for arbitrary result tuple $\tup$, %$\semNX$-\abbrPDB $\pxdb = (\idb_{\semNX}',\pd')$ where $\rmod(\pxdb) = \pdb$,
we have (denoting $\randDB$ as the random variable over $\Omega$):
  $ \expct_{\randDB \sim \bpd}[\query(\randDB)(t)] = \expct_{\vct{\randWorld}\sim \pdassign}\pbox{\apolyqdt\inparen{\vct{\randWorld}}}. $
\end{Proposition}
\noindent A formal proof of \Cref{prop:expection-of-polynom} is given in \Cref{subsec:expectation-of-polynom-proof}.\footnote{Although \Cref{prop:expection-of-polynom} follows, e.g., as an obvious consequence of~\cite{IL84a}'s Theorem 7.1, we are unaware of any formal proof for bag-probabilistic databases.}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End: