%root: main.tex %!TEX root=./main.tex %\onecolumn \section{Background and Notation}\label{sec:background} \subsection{Polynomial Definition and Terminology} %We now introduce some terminology %and develop a reduced form of lineage polynomials for a \abbrBIDB or \abbrTIDB. %Note that Given an index set $S$ over variables $X_\tup$ for $\tup\in S$, a (general) polynomial $\genpoly$ over $\inparen{X_\tup}_{\tup \in S}$ with individual degree $\hideg <\infty$ is formally defined as: \begin{align} \label{eq:sop-form} \genpoly\inparen{\inparen{X_\tup}_{\tup\in S}}=\sum_{\vct{d}\in\{0,\ldots,\hideg\}^{S}} c_{\vct{d}}\cdot \prod_{\tup\in S}X_\tup^{d_\tup}&&\text{ where } c_{\vct{d}}\in \semN. \end{align} %where $c_{\vct{d}}\in \semN$. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Definition}[Standard Monomial Basis]\label{def:smb} The term $\prod_{\tup\in S} X_\tup^{d_\tup}$ in \Cref{eq:sop-form} is a {\em monomial}. A polynomial $\genpoly\inparen{\vct{X}}$ is in standard monomial basis (\abbrSMB) when we keep only the terms with $c_{\vct{d}}\ne 0$ from \Cref{eq:sop-form}. \end{Definition} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Unless othewise noted, we consider all polynomials to be in \abbrSMB representation. When it is unclear, we use $\smbOf{\genpoly}~\inparen{\smbOf{\poly}}$ to denote the \abbrSMB form of a polynomial (lineage polynomial) $\genpoly~\inparen{\poly}$. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Definition}[Degree]\label{def:degree-of-poly} The degree of polynomial $\genpoly(\vct{X})$ is the largest $\sum_{i\in\pbox{\numedge}}d_i %= \norm{\vct{d}}_1 $% = \sum_{\tup\in\tupset} d_\tup$ such that $c_{(d_1,\dots,d_n)}\ne 0$. We denote the degree of $\genpoly$ as $\deg\inparen{\genpoly}$. % maximum sum of exponents, over all monomials in $\smbOf{\poly(\vct{X})}$. \end{Definition} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% As an example, the degree of the polynomial $X^2+2XY^2+Y^2$ is $3$. Product terms in lineage arise only from join operations (\Cref{fig:nxDBSemantics}), so intuitively, the degree of a lineage polynomial is analogous to the largest number of joins needed to produce a result tuple. %in any clause of the $\raPlus$ query that created it. We call a polynomial $\poly\inparen{\vct{X}}$ a \emph{\abbrCTIDB-lineage polynomial} (%resp., \emph{\ti-lineage polynomial}, or simply lineage polynomial), if it is clear from context that there exists an $\raPlus$ query $\query$, \abbrCTIDB $\pdb$, and result tuple $\tup$ such that $\poly\inparen{\vct{X}} = \apolyqdt\inparen{\vct{X}}.$ %Following the typical representation of bags in production databases, for query inputs, we will use \abbrBPDB\xplural with multiplicities $\{0, 1\}$ (see \Cref{sec:gener-results-beyond} for more on this choice). \subsection{\abbrOneBIDB}\label{subsec:one-bidb} \label{subsec:tidbs-and-bidbs} %\noindent\secrev{ %A block independent database \abbrBIDB $\pdb'$ is the union of $\numblock$ sets of tuples, where each set of tuples consists of elements all of which are disjoint to one another. Each set of tuples is called a block, denoted $\block_i$ for $i\in\pbox{\numblock}$, where all $\block_i$ are independent events. We define next a specific construction of \abbrBIDB that is useful for our work.} \noindent A block independent database \abbrBIDB $\pdb'$ models a set of worlds each of which consists of a subset of the possible tuples $\tupset'$, where $\tupset'$ is partitioned into $\numblock$ blocks $\block_i$ and all $\block_i$ are independent random events. $\pdb'$ further constrains that all $\tup\in\block_i$ for all $i\in\pbox{\numblock}$ of $\tupset'$ be disjoint events. We define next a specific construction of \abbrBIDB that is useful for our work. %\secrev{ %A block independent database \abbrBIDB $\pdb'$ can viewed as a $1$-\abbrTIDB $\pdb$ with the added flexibility that each $\tup\in\tupset$ has multiple disjoint alternatives, i.e., all $\tup \in \tupset'$ are partitioned into $m$ independent blocks with the condition that tuples $\tup \in \block_i$ for $i \in \pbox{m}$ are disjoint events. We define next a specific construction of \abbrBIDB that is useful for our work. %} \begin{Definition}[\abbrOneBIDB]\label{def:one-bidb} Define a \emph{\abbrOneBIDB} to be the pair $\pdb' = \inparen{\bigtimes_{\tup\in\tupset'}\inset{0, \bound_\tup}, \bpd'},$ where $\tupset'$ is the set of possible tuples such that each $\tup \in \tupset'$ has a multiplicity domain of $\inset{0, \bound_\tup}$, with $\bound_\tup\in\mathbb{N}$. $\tupset'$ is partitioned into $\numblock$ independent blocks $\block_i,$ for $i\in\pbox{\numblock}$, of disjoint tuples. $\bpd'$ is characterized by the vector $\inparen{\prob_\tup}_{\tup\in\tupset'}$ where for every block $\block_i$, $\sum_{\tup \in \block_i}\prob_\tup \leq 1$. Given $W\in\onebidbworlds{\tupset'}$ and for $i\in\pbox{\numblock}$, let $\prob_i(W) = \begin{cases} 1 - \sum_{\tup\in\block_i}\prob_\tup & \text{if }W_\tup = 0\text{ for all }\tup\in\block_i\\ 0 & \text{if there exists } \tup,~\tup'\in\block_i, W_\tup, W_{\tup'}\neq 0\\ \prob_\tup & W_\tup \ne 0 \text{ for the unique } t\in B_i.\\ \end{cases}$ \noindent$\bpd'$ is the probability distribution across all worlds such that, given $W\in\bigtimes_{\tup \in \tupset'}\inset{0,\bound_\tup}$, $\probOf\pbox{\worldvec = W} = \prod_{i\in\pbox{\numblock}}\prob_{i}(W)$. % if for any $i \in\pbox{\numblock}$ there does \emph{not} exist a $\tup\neq\tup' \in \block_i$ such that $W_{\tup}, W_{\tup'} \geq 1$, where $\prob_{\tup}$ is the marginal probability $\tup$. Otherwise, $\probOf\pbox{\worldvec=W} = 0$.\ \footnote{ We slightly abuse notation here, denoting a world vector as $W$ rather than $\worldvec$ to distinguish between the random variable and the world instance. When there is no ambiguity, we will denote a world vector as $\worldvec$.}% $\worldvec\in\prod_{\tup\in\tupset'}\inset{0,\bound_\tup},\tup,~\tup'\in\block_i~:~\probOf\pbox{\worldvec_\tup, \worldvec_\tup'>0} = 0$. \end{Definition} We now present a reduction that is useful in deriving our results: \begin{Proposition}[\abbrCTIDB reduction]\label{def:ctidb-reduct} Given \abbrCTIDB $\pdb = \inparen{\worlds, \bpd}$, let $\pdb' = \inparen{\onebidbworlds{\tupset'}, \bpd'}$ be the \emph{\abbrOneBIDB} obtained in the following manner: for each $\tup\in\tupset$, create block $\block_\tup = \inset{\intup{\tup, j}_{j\in\pbox{\bound}}}$ of disjoint tuples, for all $j\in\pbox{\bound}$.% such that $X_{\tup, j}\in\inset{0,1}$. The probability distribution $\bpd'$ is the characterized by the vector $\vct{p} = \inparen{\inparen{\prob_{\tup, j}}_{\tup\in\tupset, j\in\pbox{\bound}}}$. % for $\tup\in\tupset$ with multiplicity $j$. Then, the distributions $\mathcal{P}$ and $\mathcal{P}'$ are equivalent. %and the \abbrBIDB disjoint requirement, where given any $\worldvec\in\onebidbworlds{\tupset'}$, $\probOf\pbox{\worldvec_{\tup, j}, \worldvec_{\tup, j'} > 0} = 0$ for any $j \neq j' \in \pbox{\bound}$.%, such that for any $W\in\prod_{\tup\in\tupset'}\inset{0, \bound_\tup}^{\tupset'}$, $\probOf\pbox{\worldvec = W} = \prod_{\tup\in\tupset', j\in\pbox{\bound}}W_{\tup, j}\cdot j\cdot\prob_\tup$ if $\forall \tup \in \tupset'\not\exists j\neq j'\in\pbox{\bound}, W_{\tup, j}, W_{\tup, j'} \geq 1$; otherwise $\probOf\pbox{\worldvec = W} = 0$.% that for any $X_{\tup, j} = 1, j'\in\pbox{\bound} - \inset{j}, X_{\tup, j'} = 0$. % $\block_\tup,~j\in\pbox{\bound}~|~X_{\tup, j} = 1,\not\exists j'\neq j~|~X_{\tup, j'} = 1$. %$\tup_j\geq1\implies \tup_{j'} = 0$.$\forall j, j' \in \pbox{\bound},\forall \tup\in\tupset, \tup_j\geq 1\implies \tup_{j'} = 0$ for any block $\block_\tup$. \end{Proposition} For $\poly\inparen{\vct{X}}$ generated from \abbrCTIDB $\pdb$, each $X_\tup\in\pbox{\bound}$, while, given $\poly'\inparen{\vct{X}}$ produced from the reduced \abbrOneBIDB $\pdb'$, each $X_{\tup, j}\in\inset{0, 1}$. %As previously noted, unlike $X_{\tup}\in\inset{0,\ldots,\bound}$ for $X_{\tup}\in\vars{\pdb}$, $X_{\tup, j}\in\inset{0,1}$ for $X_{\tup, j}\in\vars{\pdb'}$. Hence, in the setting of \abbrOneBIDB, we have the following semantics for generating lineage polynomials in $\raPlus$ queries shown in~\Cref{fig:lin-poly-bidb-redux}. Note that the semantics for lineage polynomial construction only changes for the base case. We now define the reduced polynomial $\rpoly'$ of a \abbrOneBIDB. \begin{figure}[t!] \centering \resizebox{\textwidth}{!}{ \begin{minipage}{\textwidth} \begin{align*} \poly'\pbox{\project_A\inparen{\query}, \tupset', \tup_j} =& \sum_{\substack{\tup_{j'},\\\project_{A}\inparen{\tup_{j'}} = \tup_j}}\poly'\pbox{\query, \tupset', \tup_{j'}} & \poly'\pbox{\query_1\union\query_2, \tupset', \tup_j} =& \poly'\pbox{\query_1, \tupset', \tup_j}+\poly'\pbox{\query_2, \tupset', \tup_j}\\ \poly'\pbox{\select_\theta\inparen{\query}, \tupset', \tup_j} =& \begin{cases}\theta = 1&\poly'\pbox{\query, \tupset', \tup_j}\\\theta = 0& 0\\\end{cases} & \begin{aligned} \poly'\pbox{\query_1\join\query_2, \tupset', \tup_j} = \\~ \end{aligned} & \begin{aligned} &\poly'\pbox{\query_1, \tupset', \project_{attr\inparen{\query_1}}\inparen{\tup_j}}\\ &~~~\cdot\poly'\pbox{\query_2, \tupset', \project_{attr\inparen{\query_2}}\inparen{\tup_j}} \end{aligned}\\ &&&\poly'\pbox{\rel,\tupset', \tup_j} = j\cdot X_{\tup, j}. \end{align*}\\[-10mm] \end{minipage}} \caption{Construction of the lineage (polynomial) for an $\raPlus$ query $\query$ over $\tupset'$.} \label{fig:lin-poly-bidb-redux} \end{figure} \begin{Definition}[$\rpoly'$]\label{def:reduced-poly-redux} Given a polynomial $\poly'\inparen{\vct{X}}$ generated from a \abbrOneBIDB produced from the reduction of~\Cref{def:ctidb-reduct} and let $\rpoly'\inparen{\vct{X}}$ denote the reduced form of $\poly'\inparen{\vct{X}}$ computed as follows: i) compute $\smbOf{\poly'\inparen{\vct{X}}}$, ii) reduce all \emph{variable} exponents $e > 1$ to $1$. \end{Definition} Then given the disjoint requirement and the semantics for constructing the lineage polynomial over a \abbrOneBIDB, $\poly'\pbox{\rel,\tupset',\tup}$ is of the same structure as the reformulated polynomial $\refpoly{}$ of step i) from~\Cref{def:reduced-poly}, which then implies that $\rpoly'$ is the reduced polynomial that results from step ii) of~\Cref{def:reduced-poly}, and further that~\Cref{lem:tidb-reduce-poly} immediately follows for \abbrOneBIDB polynomials. \begin{Lemma} Given any %\abbrCTIDB $\pdb$, its reduced counterpart \emph{\abbrOneBIDB} $\pdb'$, $\raPlus$ query $\query$, and lineage polynomial $\poly'\inparen{\vct{X}}=\poly'\pbox{\query,\tupset',\tup}\inparen{\vct{X}}$, it holds that $ \expct_{\vct{W} \sim \pdassign'}\pbox{\poly'\inparen{\vct{W}}} = \rpoly'\inparen{\probAllTup}. $%, where $\probAllTup = \inparen{\inparen{\prob_{\tup, j}}_{\tup\in\tupset, j\in\pbox{c}}}.$%,\ldots,\prob_{\abs{\tupset}, \bound}}$ is defined by $\bpd$. %$\expct_{\rvworld\sim\bpd'}\pbox{\poly'\inparen{\rvworld}} = \rpoly'\inparen{\vct{\prob}}$. \end{Lemma} %In this paper, we focus on two popular forms of \abbrPDB\xplural: Block-Independent (\bi) and Tuple-Independent (\ti) \abbrPDB\xplural. %% %A \bi $\pdb$ is a \abbrPDB with the constraint that %%(i) every tuple $\tup_i$ is annotated with a unique random variable $\randWorld_i \in \{0, 1\}$ and (ii) that %the tuples in $\dbbase$ can be partitioned into a set of $\ell$ blocks such that tuples $\tup_{i, j}, \tup_{k, j'}$ from separate blocks $(i\neq k)$ are independent of each other while tuples $\tup_{i, j}, \tup_{i, k}$ from the same block are disjoint events.\footnote{ % Although only a single independent, $[\abs{\block_i}+1]$-valued variable is customarily used per block~\cite{DBLP:series/synthesis/2011Suciu}, we decompose it into $\abs{\block_i}$ correlated $\{0,1\}$-valued variables per block that can be used directly in polynomials (without an indicator function). For $t_{i, j} \in b_i$, the event $(\randWorld_{i,j} = 1)$ corresponds to the event $(\randWorld_i = j)$ in the customary annotation scheme. %} %Each tuple $\tup_{i, j}$ is annotated with a random variable $\randWorld_{i, j} \in \{0, 1\}$ denoting its presence in a possible world $\db$. The probability distribution $\pd$ over $\dbbase$ is the one induced from individual tuple probabilities $\prob_{i, j}\in \vct{\prob}=\inparen{\prob_{1, 1},\ldots,\prob_{\abs{\block},\ldots,\abs{\block_{\abs{\block}}}}}$ (where $\forall i$, $\sum_j p_{i,j}\le 1$) and the conditions on the blocks. A \abbrTIDB is a \abbrBIDB where each block has size exactly $1$. %%% Local Variables: %%% mode: latex %%% TeX-master: "main" %%% End: