%root: main.tex %!TEX root=./main.tex \section{Background and Notation}\label{sec:background} \subsection{Polynomial Definition and Terminology} Given an index set $S$ over variables $X_\tup$ for $\tup\in S$, a (general) polynomial $\genpoly$ over $\inparen{X_\tup}_{\tup \in S}$ with individual degree $\hideg <\infty$ is formally defined as: \begin{align} \label{eq:sop-form} \genpoly\inparen{\inparen{X_\tup}_{\tup\in S}}=\sum_{\vct{d}\in\{0,\ldots,\hideg\}^{S}} c_{\vct{d}}\cdot \prod_{\tup\in S}X_\tup^{d_\tup}&&\text{ where } c_{\vct{d}}\in \semN. \end{align} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Definition}[Standard Monomial Basis]\label{def:smb} The term $\prod_{\tup\in S} X_\tup^{d_\tup}$ in \Cref{eq:sop-form} is a {\em monomial}. A polynomial $\genpoly\inparen{\vct{X}}$ is in standard monomial basis (\abbrSMB) when we keep only the terms with $c_{\vct{d}}\ne 0$ from \Cref{eq:sop-form}. \end{Definition} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Unless othewise noted, we consider all polynomials to be in \abbrSMB representation. When it is unclear, we use $\smbOf{\genpoly}~\inparen{\smbOf{\poly}}$ to denote the \abbrSMB form of a polynomial (lineage polynomial) $\genpoly~\inparen{\poly}$. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Definition}[Degree]\label{def:degree-of-poly} The degree of polynomial $\genpoly(\vct{X})$ is the largest $\sum_{\tup\in S}d_\tup $ for all $\vct{d}\in\inset{0,\ldots,\hideg}^S$ such that $c_{(d_1,\dots,d_n)}\ne 0$. We denote the degree of $\genpoly$ as $\deg\inparen{\genpoly}$. \end{Definition} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% As an example, the degree of the polynomial $X^2+2XY^2+Y^2$ is $3$. Product terms in lineage arise only from join operations (\Cref{fig:nxDBSemantics}), so intuitively, the degree of a lineage polynomial is analogous to the largest number of joins needed to produce a result tuple. We call a polynomial $\poly\inparen{\vct{X}}$ a \emph{\abbrCTIDB-lineage polynomial} (or simply lineage polynomial), if it is clear from context that there exists an $\raPlus$ query $\query$, \abbrCTIDB $\pdb$, and result tuple $\tup$ such that $\poly\inparen{\vct{X}} = \apolyqdt\inparen{\vct{X}}.$ \subsection{\abbrOneBIDB}\label{subsec:one-bidb} \label{subsec:tidbs-and-bidbs} \noindent A block independent database \abbrBIDB $\pdb'$ models a set of worlds each of which consists of a subset of the possible tuples $\tupset'$, where $\tupset'$ is partitioned into $\numblock$ blocks $\block_i$ and all $\block_i$ are independent random events. $\pdb'$ further constrains that all $\tup\in\block_i$ for all $i\in\pbox{\numblock}$ of $\tupset'$ be disjoint events. We refer to any monomial that includes $X_\tup X_{\tup'}$ for $\tup\neq\tup'\in\block_i$ as a \emph{cancellation}. We define next a specific construction of \abbrBIDB that is useful for our work. \begin{Definition}[\abbrOneBIDB]\label{def:one-bidb} Define a \emph{\abbrOneBIDB} to be the pair $\pdb' = \inparen{\bigtimes_{\tup\in\tupset'}\inset{0, \bound_\tup}, \bpd'},$ where $\tupset'$ is the set of possible tuples such that each $\tup \in \tupset'$ has a multiplicity domain of $\inset{0, \bound_\tup}$, with $\bound_\tup\in\mathbb{N}$. $\tupset'$ is partitioned into $\numblock$ independent blocks $\block_i,$ for $i\in\pbox{\numblock}$, of disjoint tuples. $\bpd'$ is characterized by the vector $\inparen{\prob_\tup}_{\tup\in\tupset'}$ where for every block $\block_i$, $\sum_{\tup \in \block_i}\prob_\tup \leq 1$. Given $W\in\onebidbworlds{\tupset'}$ and for $i\in\pbox{\numblock}$, let $\prob_i(W) = \begin{cases} 1 - \sum_{\tup\in\block_i}\prob_\tup & \text{if }W_\tup = 0\text{ for all }\tup\in\block_i\\ 0 & \text{if there exists } \tup \neq \tup'\in\block_i; W_\tup, W_{\tup'}\neq 0\\ \prob_\tup & W_\tup \ne 0 \text{ for the unique } t\in B_i.\\ \end{cases}$ \noindent$\bpd'$ is the probability distribution across all worlds such that, given $W\in\bigtimes_{\tup \in \tupset'}\inset{0,\bound_\tup}$, $\probOf\pbox{\worldvec = W} = \prod_{i\in\pbox{\numblock}}\prob_{i}(W)$. \footnote{ We slightly abuse notation here, denoting a world vector as $W$ rather than $\worldvec$ to distinguish between the random variable and the world instance. When there is no ambiguity, we will denote a world vector as $\worldvec$.} \end{Definition} \Cref{fig:lin-poly-bidb} shows the lineage construction of $\poly'\inparen{\vct{X}}$ given $\raPlus$ query $\query$ for arbitrary deterministic $\gentupset'$. Note that the semantics differ from~\Cref{fig:nxDBSemantics} only in the base case. \begin{Proposition}[\abbrCTIDB reduction]\label{prop:ctidb-reduct} Given \abbrCTIDB $\pdb =$\newline $\inparen{\worlds, \bpd}$, let $\pdb' = \inparen{\onebidbworlds{\tupset'}, \bpd'}$ be the \emph{\abbrOneBIDB} obtained in the following manner: for each $\tup\in\tupset$, create block $\block_\tup = \inset{\intuple{\tup, j}_{j\in\pbox{\bound}}}$ of disjoint tuples, for all $j\in\pbox{\bound}$ where $\bound_{\tup_j} = j$ for each $\tup_j$ in $\tupset'$. The probability distribution $\bpd'$ is the characterized by the vector $\vct{p} = \inparen{\inparen{\prob_{\tup, j}}_{\tup\in\tupset, j\in\pbox{\bound}}}$. Then, $\mathcal{P}$ and $\mathcal{P}'$ are equivalent. \end{Proposition} We now define the reduced polynomial $\rpoly'$ of a \abbrOneBIDB. \begin{figure}[t!] %\centering %\resizebox{0.5\textwidth}{!}{ %\begin{minipage}{0.5\textwidth} \begin{align*} &\begin{aligned}[t] &\poly'\pbox{\project_A\inparen{\query}, \gentupset', \tup_j} =\\ &~\sum_{\substack{\tup_{j'},\\\project_{A}\inparen{\tup_{j'}} = \tup_j}}\poly'\pbox{\query, \gentupset', \tup_{j'}} \end{aligned} & &\begin{aligned}[t] &\poly'\pbox{\query_1\union\query_2, \gentupset', \tup_j} = \\ &\qquad\poly'\pbox{\query_1, \gentupset', \tup_j}+\poly'\pbox{\query_2, \gentupset', \tup_j} \end{aligned}\\ &\begin{aligned} &\poly'\pbox{\select_\theta\inparen{\query}, \gentupset', \tup_j} =\\ &~\begin{cases}\theta = 1 &\poly'\pbox{\query, \gentupset', \tup_j}\\\theta = 0& 0\\\end{cases} \end{aligned} & &\begin{aligned} &\poly'\pbox{\query_1\join\query_2, \gentupset', \tup_j} = \\ &\qquad \poly'\pbox{\query_1, \gentupset', \project_{attr\inparen{\query_1}}\inparen{\tup_j}}\\ &\qquad\cdot\poly'\pbox{\query_2, \gentupset', \project_{attr\inparen{\query_2}} \inparen{\tup_j}} \end{aligned}\\ &&&\poly'\pbox{\rel,\gentupset', \tup_j} = j\cdot X_{\tup, j}. \end{align*}\\%[-10mm] %\end{minipage}} \setlength{\abovecaptionskip}{-0.2cm} \caption{Construction of the lineage (polynomial) for an $\raPlus$ query $\query$ over $\gentupset'$.} \label{fig:lin-poly-bidb} \vspace{-0.53cm} \end{figure} \begin{Definition}[$\rpoly'$]\label{def:reduced-poly-one-bidb} Given a polynomial $\poly'\inparen{\vct{X}}$ generated from a \abbrOneBIDB, let $\rpoly'\inparen{\vct{X}}$ denote the reduced $\poly'\inparen{\vct{X}}$ derived as follows: i) compute $\smbOf{\poly'\inparen{\vct{X}}}$ eliminating monomials with cross terms $X_{\tup}X_{\tup'}$ for $\tup\neq \tup' \in \block_i$ and ii) reduce all \emph{variable} exponents $e > 1$ to $1$. \end{Definition} Then given $\worldvec\in\inset{0,1}^{\tupset'}$ over the reduced \abbrOneBIDB of~\Cref{prop:ctidb-reduct}, the disjoint requirement and the semantics for constructing the lineage polynomial over a \abbrOneBIDB, $\poly'\inparen{\worldvec}$ is of the same structure as the reformulated polynomial $\refpoly{}\inparen{\worldvec}$ of step i) from~\Cref{def:reduced-poly}, which then implies that $\rpoly'$ is the reduced polynomial that results from step ii) of both~\Cref{def:reduced-poly} and~\Cref{def:reduced-poly-one-bidb}, and further that~\Cref{lem:tidb-reduce-poly} immediately follows for \abbrOneBIDB polynomials. \begin{Lemma}\label{lem:bin-bidb-phi-eq-redphi} Given any \emph{\abbrOneBIDB} $\pdb'$, $\raPlus$ query $\query$, and lineage polynomial $\poly'\inparen{\vct{X}}=\poly'\pbox{\query,\tupset',\tup}\inparen{\vct{X}}$, it holds that \newline$ \expct_{\vct{W} \sim \pdassign'}\pbox{\poly'\inparen{\vct{W}}} = \rpoly'\inparen{\probAllTup}. $ \end{Lemma} %%% Local Variables: %%% mode: latex %%% TeX-master: "main" %%% End: