Changes to S2 @atri 021822 comments.

master
Aaron Huber 2022-02-21 17:13:01 -05:00
parent 5275d3815b
commit 867a297fc5
4 changed files with 54 additions and 53 deletions

View File

@ -143,18 +143,18 @@ Finally, note that there are exactly three cases where the expectation of a mono
\subsection{Proof for Lemma~\ref{lem:tidb-reduce-poly}}\label{subsec:proof-exp-poly-rpoly}
\begin{proof}
Let $\poly$ be a polynomial of $\numvar$ variables with highest degree $= B$, defined as follows: %, in which every possible monomial permutation appears,
\[\poly(X_1,\ldots, X_\numvar) = \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar X_i^{d_i}.\]
Let $\poly$ be a polynomial of $\numvar$ variables with highest degree $= \hideg$, defined as follows: %, in which every possible monomial permutation appears,
\[\poly(X_1,\ldots, X_\numvar) = \sum_{\vct{d} \in \{0,\ldots, \hideg\}^\numvar}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar X_i^{d_i}.\]
%Note that replacing the variables $X_1,\ldots, X_{\abs{\tupset}}$ with $\inset{j\cdot X_{\tup, j}~|~ \tup\in \tupset, j\in[\bound]}$ (i.e. replacing a variable with a polynomial) and converting to \abbrSMB produces a polynomial that satisfies the above definition (with $\numvar = j\cdot\abs{\tupset}$).
Let the boolean function $\isInd{\cdot}$ take $\vct{d}$ as input and return true if there does not exist any dependent variables in $\vct{d}$, i.e., $\not\exists ~\block, i\neq j\suchthat d_{\block, i}, d_{\block, j} \geq 1$.\footnote{This \abbrBIDB notation is used and discussed in \cref{subsec:tidbs-and-bidbs}}.
Then in expectation we have
\begin{align}
\expct_{\vct{\randWorld}}\pbox{\poly(\vct{\randWorld})} &= \expct_{\vct{\randWorld}}\pbox{\sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i} + \sum_{\substack{\vct{d} \in \{0,\ldots, B\}^\numvar\\\wedge ~\neg\isInd{\vct{d}}}} c_{\vct{d}}\cdot\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar\randWorld_i^{d_i}}\label{p1-s1a}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \expct_{\vct{\randWorld}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i}} + \sum_{\substack{\vct{d} \in \{0,\ldots, B\}^\numvar\\\wedge ~\neg\isInd{\vct{d}}}} c_{\vct{d}}\cdot\expct_{\vct{\randWorld}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar\randWorld_i^{d_i}}\label{p1-s1b}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\~\wedge\isInd{\vct{d}}}}c_{\vct{d}}\cdot \expct_{\vct{\randWorld}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i}}\label{p1-s1c}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{\randWorld}}\pbox{\randWorld_i^{d_i}}\label{p1-s2}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{\randWorld}}\pbox{\randWorld_i}\label{p1-s3}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,B\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\
\expct_{\vct{\randWorld}}\pbox{\poly(\vct{\randWorld})} &= \expct_{\vct{\randWorld}}\pbox{\sum_{\substack{\vct{d} \in \{0,\ldots,\hideg\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i} + \sum_{\substack{\vct{d} \in \{0,\ldots, \hideg\}^\numvar\\\wedge ~\neg\isInd{\vct{d}}}} c_{\vct{d}}\cdot\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar\randWorld_i^{d_i}}\label{p1-s1a}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,\hideg\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \expct_{\vct{\randWorld}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i}} + \sum_{\substack{\vct{d} \in \{0,\ldots, \hideg\}^\numvar\\\wedge ~\neg\isInd{\vct{d}}}} c_{\vct{d}}\cdot\expct_{\vct{\randWorld}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar\randWorld_i^{d_i}}\label{p1-s1b}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,\hideg\}^\numvar\\~\wedge\isInd{\vct{d}}}}c_{\vct{d}}\cdot \expct_{\vct{\randWorld}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i}}\label{p1-s1c}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,\hideg\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{\randWorld}}\pbox{\randWorld_i^{d_i}}\label{p1-s2}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,\hideg\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{\randWorld}}\pbox{\randWorld_i}\label{p1-s3}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,\hideg\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\
&= \rpoly(\prob_1,\ldots, \prob_\numvar).\label{p1-s5}
\end{align}
\Cref{p1-s1a} is the result of substituting in the definition of $\poly$ given above. Then we arrive at \cref{p1-s1b} by linearity of expectation. Next, \cref{p1-s1c} is the result of the independence constraint of \abbrBIDB\xplural, specifically that any monomial composed of dependent variables, i.e., variables from the same block $\block$, has a probability of $0$. \Cref{p1-s2} is obtained by the fact that all variables in each monomial are independent, which allows for the expectation to be pushed through the product. In \cref{p1-s3}, since $\randWorld_i \in \{0, 1\}$ it is the case that for any exponent $e \geq 1$, $\randWorld_i^e = \randWorld_i$. Next, in \cref{p1-s4} the expectation of a tuple is indeed its probability.

View File

@ -7,10 +7,7 @@ This work explores the problem of computing the expectation of a tuple's multipl
$\pdb = \inparen{\worlds, \bpd}$ encodes a bag of uncertain tuples such that each possible tuple encoded in $\pdb$ has a multiplicity of at most $\bound$. $\tupset$ is the set of tuples appearing across all possible worlds, and the set of all worlds is encoded in $\worlds$, which is the set of all vectors of length $\numvar=\abs{\tupset}$ such that each index corresponds to a distinct $\tup \in \tupset$ storing its multiplicity and $\bpd$ is the probability distribution over $\worlds$. A given world $\worldvec \in\worlds$ can be interpreted such that, for each $\tup \in \tupset$, $\worldvec_{\tup}$ is the multiplicity of $\tup$ in $\worldvec$. The probability distribution $\bpd$ for any tuple $\tup$ can then be encoded as $\prob_{\tup, j} = \probOf\pbox{\worldvec_{\tup} = j}$ (for $j \in\pbox{\bound}$), where each tuple multiplicity combination $\inparen{\inparen{\tup, \bound} \in \tupset\times\pbox{\bound}}$ %distribution
is an independent random event. %for $\tup \in \tupset$.
}
%\mypar{For a later section}
%\sout{
%Since each tuple in $\pdb$ has a mutually exclusive probability distribution over its possible multiplicities, it is natural to reduce a \abbrCTIDB to traditional (set) block independent database (\abbrBIDB). We refer to the reduced \abbrBIDB as a $1$-\abbrBIDB, as it is the case that each tuple can appear in a possible world at most $c = 1$ time. \Cref{fig:ctidb-red} shows an example of this reduction.
%}
\secrev{
Allowing for $\leq \bound$ multiplicities across all tuples gives rise to having $\leq \inparen{\bound+1}^\numvar$ possible worlds instead of the usual $2^\numvar$ possible worlds of a $1$-\abbrTIDB, which (assuming set query semantics), is the same as the traditional set \abbrTIDB.
In this work, since we are generally considering bag query input, we will only be considering bag query semantics. We denote by $\query\inparen{\worldvec}\inparen{\tup}$ the multiplicity of $\tup$ in query $\query$ over possible world $\worldvec\in\worlds$.
@ -105,11 +102,10 @@ A query $\query$ is an $\raPlus$ query if it is composed entirely of one or more
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
It is natural to explore computing the expected multiplicity of a result tuple as this is the analog for computing the marginal probability of a tuple in a set \abbrPDB.
In this work we will assume that $c =\bigO{1}$ since this is what typically seen in practice.
%because of the cancellation effect of queries over a $1$-\abbrBIDB (introduced later), where, for the worst case, a self join query, we would have a factor of $\frac{1}{c^{n-1}}$ cancellations.
Allowing for unbounded $c$ is an interesting open problem.
\mypar{Hardness of Set Query Semantics and Bag Query Semantics}
Set query evaluation semantics over $1$-\abbrTIDB\xplural have been studied extensively, and the data complexity of the problem in general has been shown by Dalvi and Suicu to be \sharpphard\cite{10.1145/1265530.1265571}. For our setting, there exists a trivial polytime algorithm to compute~\Cref{prob:expect-mult} for any $\raPlus$ query over a \abbrCTIDB due to linearity of expection by simply computing the expectation over a `sum-of-products' representation of the query operations of $\query\inparen{\pdb}\inparen{\tup}$. %We discuss polynomial representation and equivalence in the following subsection.
Set query evaluation semantics over $1$-\abbrTIDB\xplural have been studied extensively, and the data complexity of the problem in general has been shown by Dalvi and Suicu to be \sharpphard\cite{10.1145/1265530.1265571}. For our setting, there exists a trivial polytime algorithm to compute~\Cref{prob:expect-mult} for any $\raPlus$ query over a \abbrCTIDB due to linearity of expection by simply computing the expectation over a `sum-of-products' representation of the query operations of $\query\inparen{\pdb}\inparen{\tup}$.
Since we can compute~\Cref{prob:expect-mult} in polynomial time, the interesting question that we explore deals with analyzing the hardness of computing expectation using fine-grained analysis and parameterized complexity, where we are interested in the exponent of polynomial runtime.
}
@ -121,21 +117,7 @@ Since we can compute~\Cref{prob:expect-mult} in polynomial time, the interesting
\secrev{
Specifically, in this work we ask if~\Cref{prob:expect-mult} can be solved in time linear in the runtime of an equivalent deterministic query. If this is true, then this would open up the way for deployment of \abbrCTIDB\xplural in practice. To analyze this question we denote by $\timeOf{}^*(Q,\pdb)$ the optimal runtime complexity of computing~\Cref{prob:expect-mult} over \abbrCTIDB $\pdb$.
%Let $\gentupset$ denote the set of tuples in $\pdb$, i.e.,
%\begin{Definition}[$\gentupset$]
%Define $\gentupset$ to be the set of tuples appearing across all the possible worlds of a $\abbrCTIDB$, formally $\gentupset = \inset{\tup_i ~|~ \forall \worldvec \in \worlds,~\forall i \in \abs{\tupset}:~\worldvec\pbox{i} > 0}$. When a specific $\pdb = \inparen{\worlds, \bpd}$ is being referred to, we will use $\tupset$ to denote the set of tuples.
%\end{Definition}
Let $\qruntime{\optquery{\query},\gentupset,\bound}$ (see~\Cref{sec:gen} for further details) denote the runtime for query $\optquery{\query}$, deterministic database $\gentupset$, and multiplicity bound $\bound$. Being we consider $\raPlus$ queries in which order of operators can impact runtime, we denote the optimal query as $\optquery{\query} = \min_{\query'\in\raPlus, \query'\equiv\query}\qruntime{\query', \gentupset, \bound}$.
%let $\qruntim{\optquery{\query}, \gentupset, \bound} = \min_{\query'\in\raPlus,~\query'\equiv\query}T_{det}\inparen{\query, \gentupset, \bound}$ be the runtime for the optimally structured equivalent $\raPlus$ query $\query'$ (with some caveats; discussed in~\Cref{sec:gen}). % of query $\query$ on deterministic database $\tupset$.
%{\newline\noindent\centerline{\Huge \textcolor{black}{Or instead$\ldots$}}}
%\newline\noindent Let $T_{det}\inparen{\query, \gentupset, \bound}$ denote the runtime for $\raPlus$ query $\query$, deterministic database $\gentupset$, and multiplicity bound $\bound$. Since this paper does not consider optimization schemes, we leave optimization to the reader and show that our results hold across all inputs.
%We make this runtime concrete later on.
%We denote by $\dbbase$ the base \abbrCTIDB table containing all possible tuples, formally as,
%\AR{Again if we are defining \abbrCTIDB `from scratch' instead of in terms of general PDBs, then the above might not be needed. Also it should be \abbrCTIDB instead of \abbrPDB in the sentence below.}
Let $\qruntime{\query,\gentupset,\bound}$ (see~\Cref{sec:gen} for further details) denote the runtime for query $\query$, deterministic database $\gentupset$, and multiplicity bound $\bound$. Being we consider $\raPlus$ queries in which order of operators can impact runtime, we denote the optimized $\raPlus$ query as $\optquery{\query} = \min_{\query'\in\raPlus, \query'\equiv\query}\qruntime{\query', \gentupset, \bound}$. Then $\qruntime{\optquery{\query}, \gentupset,\bound}$ is the runtime for the optimized query.
\begin{table}[h!]
\begin{tabular}{|p{0.43\textwidth}|p{0.12\textwidth}|p{0.35\textwidth}|}

View File

@ -145,15 +145,20 @@
\newcommand{\worlds}{\world^\tupset}
\newcommand{\bpd}{\mathcal{P}}%bpd for bag probability distribution
%BIDB
\newcommand{\block}{b}
\newcommand{\block}{B}
\newcommand{\bivar}{x_{\block, i}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Binary-BIDB Notation %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\onebidbworlds}[1]{\bigtimes_{\tup\in[#1]}\inset{0,\bound_\tup}}
%PDB Abbreviations
\newcommand{\abbrOneBIDB}{\text{Binary-BIDB}\xspace}
\newcommand{\abbrPDB}{\textnormal{PDB}\xspace}
\newcommand{\abbrBPDB}{\textnormal{bag-PDB}\xspace}
\newcommand{\abbrTIDB}{\textnormal{TIDB}\xspace}%replace \ti with this
\newcommand{\abbrCTIDB}{\textnormal{$\bound$-TIDB}\xspace}
\newcommand{\abbrOneBIDB}{\textnormal{$1$-}\abbrBIDB}
\newcommand{\abbrTIDBs}{\textnormal{TIDBs}\xspace}%replace \ti with this
\newcommand{\abbrBIDB}{\textnormal{BIDB}\xspace}
\newcommand{\ti}{TIDB\xspace}
@ -227,13 +232,17 @@
\newcommand{\polyinput}[2]{\left(#1,\ldots, #2\right)}%do we still use this?
%Number of Variables--this could easily be number of tups--maybe move to Rel Model?
\newcommand{\numvar}{n}
%Number of blocks (BIDB)
\newcommand{\numblock}{m}
%Vector
\newcommand{\vct}[1]{{\bf #1}}
%norm
\newcommand{\norm}[1]{\left\lVert#1\right\rVert}
%using \wVec for world bit vector notation<-----Is this still the case?
%Polynomial
\newcommand{\hideg}{K}
\newcommand{\poly}{\Phi}
\newcommand{\genpoly}{\phi}
\newcommand{\vars}[1]{\func{Vars}\inparen{#1}}
\newcommand{\polyOf}[1]{\poly[#1]}
\newcommand{\polyqdt}[3]{\polyOf{#1,#2,#3}}

View File

@ -8,29 +8,29 @@
%and develop a reduced form of lineage polynomials for a \abbrBIDB or \abbrTIDB.
%Note that
\secrev{
A polynomial over a set of variables $\vct{S}$ with $\abs{S}=\numedge$ and individual degree $B <\infty$
Given an index set $S$ over variables $X_\tup$ for $\tup\in S$, a (general) polynomial $\genpoly$ over $\inparen{X_\tup}_{\tup \in S}$ with individual degree $\hideg <\infty$
is formally defined as (where $c_{\vct{d}}\in \semN$):
\begin{equation}
\label{eq:sop-form}
\poly\inparen{S_1,\dots,S_\numedge}=\sum_{\vct{d}\in\{0,\ldots,B\}^\tupset} c_{\vct{d}}\cdot \prod_{i\in\pbox{\numedge}}S_i^{d_i}.
\genpoly\inparen{\inparen{X_\tup}_{\tup\in S}}=\sum_{\vct{d}\in\{0,\ldots,\hideg\}^{S}} c_{\vct{d}}\cdot \prod_{\tup\in S}X_\tup^{d_\tup}.
\end{equation}
}
%where $c_{\vct{d}}\in \semN$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Standard Monomial Basis]\label{def:smb}
\secrev{The term $\prod_{i\in\pbox{\numedge}} S_i^{d_i}$ }in \Cref{eq:sop-form} is a {\em monomial}. A polynomial $\poly\inparen{\vct{X}}$ is in standard monomial basis (\abbrSMB) when we keep only the terms with $c_{\vct{d}}\ne 0$ from \Cref{eq:sop-form}.
\secrev{The term $\prod_{\tup\in S} X_\tup^{d_\tup}$ }in \Cref{eq:sop-form} is a {\em monomial}. A polynomial $\genpoly\inparen{\vct{X}}$ is in standard monomial basis (\abbrSMB) when we keep only the terms with $c_{\vct{d}}\ne 0$ from \Cref{eq:sop-form}.
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Unless othewise noted, we consider all polynomials to be in \abbrSMB representation.
When it is unclear, we use $\smbOf{\poly}$ to denote the \abbrSMB form of a polynomial $\poly$.
When it is unclear, we use $\smbOf{\genpoly}~\inparen{\smbOf{\poly}}$ to denote the \abbrSMB form of a polynomial (lineage polynomial) $\genpoly~\inparen{\poly}$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Degree]\label{def:degree-of-poly}
The degree of polynomial $\poly(\vct{X})$ is the largest \secrev{$\vct{d} = \sum_{i\in\pbox{\numedge}}d_i %= \norm{\vct{d}}_1
The degree of polynomial $\genpoly(\vct{X})$ is the largest \secrev{$\sum_{i\in\pbox{\numedge}}d_i %= \norm{\vct{d}}_1
$}% = \sum_{\tup\in\tupset} d_\tup$
such that $c_{(d_1,\dots,d_n)}\ne 0$. \secrev{
We denote the degree of $\poly$ as $\deg\inparen{\poly}$.
We denote the degree of $\genpoly$ as $\deg\inparen{\genpoly}$.
}% maximum sum of exponents, over all monomials in $\smbOf{\poly(\vct{X})}$.
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -39,34 +39,44 @@ Product terms in lineage arise only from join operations (\Cref{fig:nxDBSemantic
%in any clause of the $\raPlus$ query that created it.
\secrev{
We call a polynomial $\poly\inparen{\vct{X}}$ a \emph{\abbrCTIDB-lineage polynomial} (%resp., \emph{\ti-lineage polynomial},
or simply lineage polynomial), if there exists a $\raPlus$ query $\query$, \abbrCTIDB $\pdb$, and result tuple $\tup$ such that $\poly\inparen{\vct{X}} = \apolyqdt\inparen{\vct{X}}.$
or simply lineage polynomial), if it is clear from context that there exists an $\raPlus$ query $\query$, \abbrCTIDB $\pdb$, and result tuple $\tup$ such that $\poly\inparen{\vct{X}} = \apolyqdt\inparen{\vct{X}}.$
}
%Following the typical representation of bags in production databases, for query inputs, we will use \abbrBPDB\xplural with multiplicities $\{0, 1\}$ (see \Cref{sec:gener-results-beyond} for more on this choice).
\subsection{$\mathbf{1}$-BIDB}\label{subsec:one-bidb}
\subsection{\abbrOneBIDB}\label{subsec:one-bidb}
\label{subsec:tidbs-and-bidbs}
\noindent\secrev{
A block independent database \abbrBIDB $\pdb'$ can viewed as a $1$-\abbrTIDB $\pdb$ with the added flexibility that each $\tup\in\tupset$ has multiple disjoint alternatives, i.e., all $\tup \in \tupset'$ are partitioned into $m$ independent blocks with the condition that tuples $\tup \in \block_i$ for $i \in \pbox{m}$ are disjoint events. We define next a specific construction of \abbrBIDB that is useful for our work.
\begin{Definition}[$1$-\abbrBIDB]\label{def:one-bidb}
Define a $1$-\abbrBIDB to be the pair $\pdb' = \inparen{\prod_{\tup\in\tupset'}\inset{0, \bound_\tup}, \bpd'},$ where $\tupset'$ is the set of possible tuples such that each $\tup \in \tupset'$ has a multiplicity domain of $\inset{0, \bound_\tup}$, with $\bound_\tup\in\mathbb{N}$. The operation $\prod_{\tup\in\tupset'}$ is the direct product of all such multiplicity domain pairs. The tuples $\tup\in\tupset'$ are partitioned into $m$ independent blocks $\block_i,~i\in\pbox{m}$, of disjoint tuples. $\bpd'$ is the probability distribution across all worlds such that, given $\worldvec\in\prod_{\tup\in\tupset'}\inset{0,\bound_\tup},\tup,~\tup'\in\block_i~:~\probOf\pbox{\worldvec_\tup, \worldvec_\tup'>0} = 0$.
%\noindent\secrev{
%A block independent database \abbrBIDB $\pdb'$ is the union of $\numblock$ sets of tuples, where each set of tuples consists of elements all of which are disjoint to one another. Each set of tuples is called a block, denoted $\block_i$ for $i\in\pbox{\numblock}$, where all $\block_i$ are independent events. We define next a specific construction of \abbrBIDB that is useful for our work.}
\noindent \secrev{A block independent database \abbrBIDB $\pdb'$ models a set of worlds each of which consists of a subset of the possible tuples $\tupset'$, where $\tupset'$ is partitioned into $\numblock$ blocks $\block_i$ and all $\block_i$ are independent random events. $\pdb'$ further constrains that all $\tup\in\block_i$ for all $i\in\pbox{\numblock}$ of $\tupset'$ be disjoint events. We define next a specific construction of \abbrBIDB that is useful for our work.
%\secrev{
%A block independent database \abbrBIDB $\pdb'$ can viewed as a $1$-\abbrTIDB $\pdb$ with the added flexibility that each $\tup\in\tupset$ has multiple disjoint alternatives, i.e., all $\tup \in \tupset'$ are partitioned into $m$ independent blocks with the condition that tuples $\tup \in \block_i$ for $i \in \pbox{m}$ are disjoint events. We define next a specific construction of \abbrBIDB that is useful for our work.
%}
\begin{Definition}[\abbrOneBIDB]\label{def:one-bidb}
Define a \emph{\abbrOneBIDB} to be the pair $\pdb' = \inparen{\bigtimes_{\tup\in\tupset'}\inset{0, \bound_\tup}, \bpd'},$ where $\tupset'$ is the set of possible tuples such that each $\tup \in \tupset'$ has a multiplicity domain of $\inset{0, \bound_\tup}$, with $\bound_\tup\in\mathbb{N}$. $\tupset'$ is partitioned into $\numblock$ independent blocks $\block_i,$ for $i\in\pbox{\numblock}$, of disjoint tuples. $\bpd'$ is characterized by the vector $\inparen{\prob_\tup}_{\tup\in\tupset'}$ where for every block $\block_i$, $\sum_{\tup \in \block_i}\prob_\tup \leq 1$. Given $W\in\onebidbworlds{\tupset'}$ and for $i\in\pbox{\numblock}$, let $\prob_i = \begin{cases}
1 - \sum_{\tup\in\block_i}\prob_\tup & \text{if }W_\tup = 0\text{ for all }\tup\in\block_i\\
0 & \text{if there exists } \tup,~\tup'\in\block_i, W_\tup, W_{\tup'}\geq 1\\
\prob_\tup & W_\tup = 1.\\
\end{cases}$
\noindent$\bpd'$ is the probability distribution across all worlds such that, given $W\in\bigtimes_{\tup \in \tupset'}\inset{0,\bound_\tup}$, $\probOf\pbox{\worldvec = W} = \prod_{i\in\pbox{\numblock}}\prob_{i}$.
% if for any $i \in\pbox{\numblock}$ there does \emph{not} exist a $\tup\neq\tup' \in \block_i$ such that $W_{\tup}, W_{\tup'} \geq 1$, where $\prob_{\tup}$ is the marginal probability $\tup$. Otherwise, $\probOf\pbox{\worldvec=W} = 0$.\
\footnote{
We slightly abuse notation here, denoting a world vector as $W$ rather than $\worldvec$ to distinguish between the random variable and the world instance. When there is no ambiguity, we will denote a world vector as $\worldvec$.}% $\worldvec\in\prod_{\tup\in\tupset'}\inset{0,\bound_\tup},\tup,~\tup'\in\block_i~:~\probOf\pbox{\worldvec_\tup, \worldvec_\tup'>0} = 0$.
\end{Definition}
%A \abbrCTIDB $\pdb$ is a pair $\inparen{\worlds, \bpd}$ such that $\worlds$ is an incomplete database whose set of possible worlds is the $c+1^\numvar$ tuple/multiplicity combinations across all $\tup\in\tupset$, where $\abs{\tupset} = \numvar$, $\tupset = \bigcup_{\worldvec\in\worlds,~\worldvec_{\tup}\geq 1}\tup$ is the set of possible tuples across possible worlds, and $\bpd$ is a probability distribution over $\worlds$.
%\begin{Definition}[$\bound$-Block Independent Disjoint Database ($\bound$-\abbrBIDB)]\label{def:bidb}
%A $\bound$-block independent database ($\bound$-\abbrBIDB) $\pdb' = \inparen{\inset{0,\ldots,\bound}^{\tupset'}, \bpd'}$ is a probabilistic database such that the all worlds set is encoded as the set of vectors $\worldvec\in\inset{0,\ldots,\bound}^{\abs{\tupset'}}$ where $\worldvec_\tup\leq\bound$ is the multiplicity for tuple $\tup$. $\pdb'$ requires the set of all possible tuples $\tupset = \bigcup_{\worldvec\in\inset{0,\ldots, \bound}^{\tupset'},~\worldvec_\tup \geq 1}\tup$ to be partitioned into $m$ independent blocks $\block_i$ ($i\in\pbox{m}$) where all tuples $\tup_{i, j}\in \block_i$ are disjoint. $\bpd'$ is the probability distribution where, for all $\worldvec\in\inset{0,\ldots,\bound}^{\tupset'}$ such that $\worldvec_{\tup_{i, j}},\worldvec_{\tup_{i, j'}}\neq 0, j\neq j'$ for any $\block_i$, $\probOf\pbox{\worldvec} = 0$, where all other $\worldvec$ has $0<\probOf\pbox{\worldvec}\leq 1$.%bpd'$ set with the all worlds set $\worlds$ and probability distribution $\bpd'$ such that $\tupset' = \bigcup_{\worldvec\in\worlds, \worldvec_\tup \geq 1}\tup$ is the set of all possible tuples for which all $\tup\in\tupset'$ can be partitioned into $\numedge$ blocks $\block_i$ where the set of tuples $\tup_j \in \block_i$ are all disjoint, while blocks $\block_i$ are independent of one another. Each $\tup\in\tupset'$ has a multiplicity of at most $\bound$. $\bpd'$ is the distribution such that for any $\worldvec\in\worlds$ with $\worldvec_{\tup_{i, j}}\geq 1$ and $\worldvec_{\tup_{i, j'}}\geq 1$, $j\neq j'$ in any $\block_i$ more than one tuple present from the same block $\block_i$ has probability $\probOf\pbox{\worldvec} = 0$.
%\end{Definition}
%A block independent database (\abbrBIDB) is a related probabilistic data model $\pdb=\inparen{\Omega, \bpd}$ such that the base set of tuples $\tupset = \bigcup_{\omega\in\Omega,~\tup\in\omega}\tup$ is partitioned into a set of $\numvar$ independent blocks $\inset{\inparen{\block_\tup}_{\tup\in\pbox{\numvar}}}$ such that the set of tuples $\inset{\inparen{\tup_j}_{j\in\pbox{\abs{\block}}}}$ in block $\block_\tup$ are disjoint from one another. This construction produces the set of possible worlds $\Omega$ that consists of all unique combinations of tuples in $\tupset$ with the constraint that for any $\omega\in\Omega$, no two tuples $\tup_j, \tup_{j'}, j\neq j'$ from the same block $\block_\tup$ exist together. A $\bound$-\abbrBIDB has the further requirement that each block has a multiplicity of at most $c$.
We now present a reduction that is useful in deriving our results:
\begin{Definition}[\abbrCTIDB reduction]\label{def:ctidb-reduct}
Given \abbrCTIDB $\pdb = \inparen{\worlds, \bpd}$, let $\pdb' = \inparen{\prod_{\tup\in\tupset'}\inset{0, \bound_\tup}^{\tupset'}, \bpd'}$ be the \abbrOneBIDB obtained in the following manner: for each $\tup\in\tupset$, create block $\block_\tup = \inset{\intup{\tup, j}_{j\in\pbox{\bound}}}$ of disjoint tuples, for all $j\in\pbox{\bound}$.% such that $X_{\tup, j}\in\inset{0,1}$.
The probability distribution $\bpd'$ is the one induced by $\vct{p} = \inparen{\inparen{\prob_{\tup, j}}_{\tup\in\tupset, j\in\pbox{\bound}}}$ and the \abbrBIDB disjoint requirement, where given any $\worldvec\in\prod_{\tup\in\tupset'}\inset{0, \bound_\tup}^{\tupset'}$, $\probOf\pbox{\worldvec_{\tup, j}, \worldvec_{\tup, j'} > 0} = 0$ for any $j \neq j' \in \pbox{\bound}$, such that for any $W\in\prod_{\tup\in\tupset'}\inset{0, \bound_\tup}^{\tupset'}$, $\probOf\pbox{\worldvec = W} = \prod_{\tup\in\tupset', j\in\pbox{\bound}}W_{\tup, j}\cdot j\cdot\prob_\tup$ if $\forall \tup \in \tupset'\not\exists j\neq j'\in\pbox{\bound}, W_{\tup, j}, W_{\tup, j'} \geq 1$; otherwise $\probOf\pbox{\worldvec = W} = 0$.\footnote{
We slightly abuse notation here, denoting a world vector as $W$ rather than $\worldvec$ to distinguish between the random variable and the world instance. When there is no ambiguity, we will denote a world vector as $\worldvec$.}% that for any $X_{\tup, j} = 1, j'\in\pbox{\bound} - \inset{j}, X_{\tup, j'} = 0$.
\begin{Proposition}[\abbrCTIDB reduction]\label{def:ctidb-reduct}
Given \abbrCTIDB $\pdb = \inparen{\worlds, \bpd}$, let $\pdb' = \inparen{\onebidbworlds{\tupset'}, \bpd'}$ be the \emph{\abbrOneBIDB} obtained in the following manner: for each $\tup\in\tupset$, create block $\block_\tup = \inset{\intup{\tup, j}_{j\in\pbox{\bound}}}$ of disjoint tuples, for all $j\in\pbox{\bound}$.% such that $X_{\tup, j}\in\inset{0,1}$.
The probability distribution $\bpd'$ is the characterized by the vector $\vct{p} = \inparen{\inparen{\prob_{\tup, j}}_{\tup\in\tupset, j\in\pbox{\bound}}}$ for $\tup\in\tupset$ with multiplicity $j$.%and the \abbrBIDB disjoint requirement, where given any $\worldvec\in\onebidbworlds{\tupset'}$, $\probOf\pbox{\worldvec_{\tup, j}, \worldvec_{\tup, j'} > 0} = 0$ for any $j \neq j' \in \pbox{\bound}$.%, such that for any $W\in\prod_{\tup\in\tupset'}\inset{0, \bound_\tup}^{\tupset'}$, $\probOf\pbox{\worldvec = W} = \prod_{\tup\in\tupset', j\in\pbox{\bound}}W_{\tup, j}\cdot j\cdot\prob_\tup$ if $\forall \tup \in \tupset'\not\exists j\neq j'\in\pbox{\bound}, W_{\tup, j}, W_{\tup, j'} \geq 1$; otherwise $\probOf\pbox{\worldvec = W} = 0$.% that for any $X_{\tup, j} = 1, j'\in\pbox{\bound} - \inset{j}, X_{\tup, j'} = 0$.
% $\block_\tup,~j\in\pbox{\bound}~|~X_{\tup, j} = 1,\not\exists j'\neq j~|~X_{\tup, j'} = 1$.
%$\tup_j\geq1\implies \tup_{j'} = 0$.$\forall j, j' \in \pbox{\bound},\forall \tup\in\tupset, \tup_j\geq 1\implies \tup_{j'} = 0$ for any block $\block_\tup$.
\end{Definition}
\end{Proposition}
For the \abbrCTIDB $\pdb$, each $X_\tup\in\pbox{\bound}$, while in the reduced \abbrOneBIDB $\pdb'$, each $X_{\tup, j}\in\inset{0, 1}$. %As previously noted, unlike $X_{\tup}\in\inset{0,\ldots,\bound}$ for $X_{\tup}\in\vars{\pdb}$, $X_{\tup, j}\in\inset{0,1}$ for $X_{\tup, j}\in\vars{\pdb'}$.
Hence, in the setting of \abbrOneBIDB, we have the following semantics for generating lineage polynomials in $\raPlus$ queries: $\poly'\pbox{\project_A\inparen{\query}, \tupset', \tup_j} = \sum_{\tup_{j'} \in \project_{A}\inparen{\query\inparen{\tupset'}}: \tup_{j'} = \tup_j}\poly'\pbox{\query, \tupset', \tup_{j'}}$,