Moved commented out material into the appendix.

2021-09-17 18:10:41 -04:00 · 2021-09-17 18:10:41 -04:00 · ea8cb76bcd
parent e3faa018bc
commit ea8cb76bcd
7 changed files with 40 additions and 289 deletions
--- a/app_hardness-results.tex
+++ b/app_hardness-results.tex
@ -33,14 +33,17 @@ For the sake of contradiction, assume we can solve our problem in $\littleo{\kma
 %= &\bigO{k}\cdot \littleo{\kmatchtime} + O(\kElem^3)\label{eq:proof-omega-kmatch3}\\
 &\le \littleo{\kmatchtime}\label{eq:proof-omega-kmatch4}.
 \end{align}
-%Atri: The details below are fine to make sure our proofs are correct but the arguments below are bit too "basic" to include in an ICDT paper, I _think_
-%We obtain \Cref{eq:proof-omega-kmatch2} by the assumption that $\kmatchtime \in \littleomega{\numedge}$, an assumption which is upheld by the assumption in \Cref{thm:k-match-hard} that $\#W[0]\neq\sharpwone$ and the further assumption  that the best runtime to compute $k$-matchings is $\bigOmega{\numedge^{k/2}}$ in \cite{k-match}.  \Cref{eq:proof-omega-kmatch3} then follows by the fact that $\bigO{\numedge}\in \littleo{\numedge}\in \littleo{\kmatchtime}$.  
 We obtain \Cref{eq:proof-omega-kmatch4} from the facts that $k$ is fixed (related to $m$) and the assumption that $\kmatchtime\ge\omega(m)$.
-%by the observation that $\bigO{k}\in\bigO{k^3}$ and $\bigO{k^3} \in \littleo{\numedge}$ since by definition of parameterized complexity we have $\frac{\numedge}{k}\rightarrow \infty$.  
 Thus we obtain the contradiction that we can achieve a runtime $\littleo{\kmatchtime}$ that is better than the optimal time $\kmatchtime$ required to compute $k$-matchings.
 \qed
 \end{proof}

+\subsection{\Cref{lem:qEk-multi-p}}
+\noindent The following lemma reduces the problem of counting $\kElem$-matchings in a graph to our problem (and proves \Cref{thm:mult-p-hard-result}):
+\begin{Lemma}\label{lem:qEk-multi-p}
+Let $\prob_0,\ldots, \prob_{2\kElem}$ be distinct values in $(0, 1]$.  Then given the values $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for $0\leq i\leq 2\kElem$, the number of $\kElem$-matchings in $G$ can be computed in $\bigO{\kElem^3}$ time.
+\end{Lemma}
+
 \subsection{Proof of Lemma~\ref{lem:qEk-multi-p}}
 \input{lem_mult-p}

--- a/app_notation-background.tex
+++ b/app_notation-background.tex
@ -100,6 +100,22 @@ Denote the vector $\vct{p}$ to be a vector whose elements are the individual pro
 \end{align}
 %
 Recall that tuple blocks in a TIDB always have size 1, so the outer summation of \cref{eq:tidb-expectation} is over the full set of vectors.
+\AH{Have cut and pasted the subsequent text.  Need to verify this is the appropriate place for it.}
+Let $\semNX$ denote the set of polynomials over variables $\vct{X}=(X_1,\dots,X_\numvar)$ with natural number coefficients and exponents.
+We model incomplete relations using Green et. al.'s $\semNX$-databases~\cite{DBLP:conf/pods/GreenKT07}, discussed in detail in \Cref{subsec:supp-mat-krelations}. 
+ $\semNX$-databases are functions from tuples to elements of $\semNX$, typically called annotations.
+Given an $\semNX$-database $\db$,  it is common to use $\db(\tup)$ to denote the polynomial annotating tuple $\tup$ in $\db$. 
+%Note that based on this definition of $\rel$, $\rel(\tup)$ is the lineage polynomial for $\tup$.  
+Let $\numvar$ be the number of tuples in $\pdb$.  Then, each possible world is defined by an assignment of $\numvar$ binary values $\vct{\wElem} \in \{0, 1\}^{\numvar}$ to $\vct{X}$.
+The multiplicity of $\tup \in \db$, denoted $\db(\tup)(\vct{\wElem})$, is obtained by evaluating the polynomial annotating $\tup$ on $\vct{\wElem}$.
+$\semNX$-relations are closed under $\raPlus$ (\Cref{fig:nxDBSemantics}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+We will use $\semNX$-\abbrPDB $\pxdb$, defined as the tuple $(\idb_{\semNX}, \pd)$, where $\semNX$-database $\idb_{\semNX}$ is paired with probability distribution $\pd$ over the assignments to $\vct{X}$.
+We denote by $\polyForTuple$ the annotation of tuple $t$ in the result of $\query$ on an implicit $\semNX$-\abbrPDB (i.e., $\polyForTuple = \query(\pxdb)(t)$ for some $\pxdb$) and as before, interpret it as a function $\polyForTuple: \{0,1\}^{\numvar} \rightarrow \semN$ from vectors of variable assignments to the corresponding value of the annotating polynomial.
+$\semNX$-\abbrPDB\xplural and a function $\rmod$ (which transforms an $\semNX$-\abbrPDB to  a classical bag-\abbrPDB, or $\semN$-\abbrPDB~\cite{DBLP:conf/pods/GreenKT07,feng:2019:sigmod:uncertainty}) are both formalized in \Cref{subsec:supp-mat-background}.
+
 \BG{Oliver's conjecture: Bag-\tis + Q can express any finite bag-PDB:
 A well-known result for set semantics PDBs is that while not all finite PDBs can be encoded as \tis, any finite PDB can be encoded using a \ti and a query. An analog result holds in our case: any finite $\semN$-PDB can be encoded as a bag \ti and a query (WHAT CLASS? ADD PROOF)
 }
@ -139,6 +155,14 @@ Follows by the construction of $\rpoly$ in \cref{def:reduced-bi-poly}.
 \end{proof}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

+\begin{Definition}[Valid Worlds]
+For probability distribution $\pd$, % and its corresponding probability mass function $\probOf$,
+the set of valid worlds $\valworlds$ consists of all the worlds with probability value greater than $0$; i.e., for random world variable vector $\vct{W}$
+\[
+\valworlds = \comprehension{\vct{w}}{\probOf[\vct{W} = \vct{w}] > 0}
+\]
+\end{Definition}
+
 \subsection{Proposition~\ref{proposition:q-qtilde}}\label{app:subsec-prop-q-qtilde}
 \noindent Note the following fact:

--- a/approx_alg.tex
+++ b/approx_alg.tex
@ -12,15 +12,6 @@ The folowing approximation algorithm applies to \abbrBIDB lineage polynomials (o

 We now introduce useful definitions and notation related to circuits and polynomials.  

-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%\begin{Definition}[Variables in a monomial]\label{def:vars}
-% Given a monomial $v$, we use $\var(v)$ to denote the set of variables in $v$.
-%\end{Definition}
-%\noindent For example the monomial $XY$ has $\var(XY)=\inset{X,Y}$.
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-
 \begin{Definition}[$\expansion{\circuit}$]\label{def:expand-circuit}
 For a circuit $\circuit$, we define $\expansion{\circuit}$ as a list of tuples $(\monom, \coef)$, where $\monom$ is a set of variables and $\coef \in \domN$.  
 $\expansion{\circuit}$ has the following recursive definition ($\circ$ is list concatenation).
@ -46,15 +37,6 @@ Conveniently, $\abs{\circuit}\inparen{1,\ldots,1}$ gives us $\sum\limits_{\inpar
 The functions \size and \depth output the number of gates and levels respectively for input \circuit.
 \end{Definition}

-%\begin{Definition}[\depth($\cdot$)]
-%The function \depth has circuit $\circuit$ as input and outputs the number of levels in \circuit.
-%\end{Definition}
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%
-%NEEDS to be moved to appendix
-%%%%%%%%%%%%%%%%%%%%%%%%%
-
 \begin{Definition}[$\degree(\cdot)$]\label{def:degree}\footnote{Note that the degree of $\polyf(\abs{\circuit})$ is always upper bounded by $\degree(\circuit)$ and the latter can be strictly larger (e.g. consider the case when $\circuit$ multiplies two copies of the constant $1$-- here we have $\deg(\circuit)=1$ but degree of $\polyf(\abs{\circuit})$ is $0$).}
 $\degree(\circuit)$ is defined recursively as follows:
 \[\degree(\circuit)=
@ -66,9 +48,6 @@ $\degree(\circuit)$ is defined recursively as follows:
 \end{cases}
 \]
 \end{Definition}
-%%%%%%%%%%%%%%%%%%%%%%%%%%
-%END move to appendix
-%%%%%%%%%%%%%%%%%%%%%%%%%%

 Finally, we use the following notation for the complexity of multiplying integers:
 \begin{Definition}[$\multc{\cdot}{\cdot}$]\footnote{We note that when doing arithmetic operations on the RAM model for input of size $N$, we have that $\multc{O(\log{N})}{O(\log{N})}=O(1)$. More generally we have $\multc{N}{O(\log{N})}=O(N\log{N}\log\log{N})$.}
@ -145,64 +124,9 @@ Given a lineage polynomial $\poly(\vct{X})=\polyf(\circuit)$ for circuit \circui
 \end{equation}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-
-%%%%%%%%%%%%%%%%%%%%%%%%%
-%NEED to move to appendix
-%%%%%%%%%%%%%%%%%%%%%%%%%
-%\input{app_approx-alg-pseudo-code}
-%%%%%%%%%%%%%%%%%%%%%%%%%
-%END move to appendix
-%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-Given the above, the algorithm is a sampling based algorithm for the above sum: we sample (via \sampmon) $(\monom,\coef)\in \expansion{\circuit}$ with probability proportional %\footnote{We could have also uniformly sampled from $\expansion{\circuit}$ but this gives better parameters.}
- to $\abs{\coef}$ and compute $\vari{Y}=\indicator{\isInd{\encMon}}%\monom\mod{\mathcal{B}}\not\equiv 0}
+Given the above, the algorithm is a sampling based algorithm for the above sum: we sample (via \sampmon) $(\monom,\coef)\in \expansion{\circuit}$ with probability proportional
+ to $\abs{\coef}$ and compute $\vari{Y}=\indicator{\isInd{\encMon}}
 \cdot \prod_{X_i\in \monom} p_i$. Taking $\ceil{\frac{2 \log{\frac{2}{\conf}}}{\error^2}}$ samples and computing the average of $\vari{Y}$ gives us our final estimate. \onepass is used to compute the sampling probabilities needed in \sampmon (details are in \Cref{sec:proofs-approx-alg}).
-%\approxq (\Cref{alg:mon-sam}) modifies \circuit with a call to \onepass.  It then samples from $\circuit_{\vari{mod}}\numsamp$ times and uses that information to approximate $\rpoly$.
-
-
-
-
-%\subsubsection{Correctness}
-
-%In order to prove \Cref{lem:approx-alg}, we will need to argue the correctness of \approxq, which relies on the correctness of auxiliary algorithms \onepass and \sampmon.
-
-%\begin{Lemma}\label{lem:one-pass}
-%The $\onepass$ function completes in time:
-%$$O\left(\size(\circuit) \cdot \multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit}}\right)$$
-%  $\onepass$ guarantees two post-conditions:  First, for each subcircuit $\vari{S}$ of $\circuit$, we have that $\vari{S}.\vari{partial}$ is set to $\abs{\vari{S}}(1,\ldots, 1)$.  Second, when $\vari{S}.\type  = \circplus$, \subcircuit.\lwght $= \frac{\abs{\subcircuit_\linput}(1,\ldots, 1)}{\abs{\subcircuit}(1,\ldots, 1)}$ and likewise for \subcircuit.\rwght.
-%\end{Lemma}
-%To prove correctness of \Cref{alg:mon-sam}, we only use the following fact that follows from the above lemma: for the modified circuit ($\circuit_{\vari{mod}}$), $\circuit_{\vari{mod}}.\vari{partial}=\abs{\circuit}(1,\dots,1)$.
-
-%\begin{Lemma}\label{lem:sample}
-%The function $\sampmon$ completes in time
-%$$O(\log{k} \cdot k \cdot \depth(\circuit)\cdot\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log{\size(\circuit)}})$$
-% where $k = \degree(\circuit)$.  The function returns every $\left(\monom, sign(\coef)\right)$ for $(\monom, \coef)\in \expansion{\circuit}$ with probability $\frac{|\coef|}{\abs{\circuit}(1,\ldots, 1)}$.
-%\end{Lemma}
-
-%With the above two lemmas, we are ready to argue the following result (proof in \Cref{sec:proofs-approx-alg}):
-%\begin{Theorem}\label{lem:mon-samp}
-%For any $\circuit$ with $\degree(poly(|\circuit|)) = k$, algorithm \ref{alg:mon-sam} outputs an estimate $\vari{acc}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ such that
-%\[\probOf\left(\left|\vari{acc} - \rpoly(\prob_1,\ldots, \prob_\numvar)\right|> \error \cdot \abs{\circuit}(1,\ldots, 1)\right) \leq \conf,\]
-% in $O\left(\left(\size(\circuit)+\frac{\log{\frac{1}{\conf}}}{\error^2} \cdot k \cdot\log{k} \cdot \depth(\circuit)\right)\cdot \multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log{\size(\circuit)}}\right)$ time.
-%\end{Theorem}
-
-
-%\subsection{\onepass\ Algorithm}
-%\label{sec:onepass}
-
-%\noindent \onepass\ (Algorithm ~\ref{alg:one-pass-iter} in \Cref{sec:proofs-approx-alg}) iteratively visits each gate one time according to the topological ordering of \circuit annotating the \lwght, \rwght, and \prt variables of each node according to the definitions above.  Lemma~\ref{lem:one-pass} is proved in \Cref{sec:proofs-approx-alg}.
-
-%\subsection{\sampmon\ Algorithm}
-%\label{sec:samplemonomial}
-
-%A naive (slow) implementation of \sampmon\ would first compute $\expansion{\circuit}$ and then sample from it.
-%Instead, \Cref{alg:sample} selects a monomial from $\expansion{\circuit}$ by top-down traversal of the input \circuit.  More details on the traversal can be found in \Cref{subsec:sampmon-remarks}.
-
-%
-%$\sampmon$ is given in \Cref{alg:sample}, and a proof of its correctness (via \Cref{lem:sample}) is provided in \Cref{sec:proofs-approx-alg}.
-
-
 %%%%%%%%%%%%%%%%%%%%%%%

 %%% Local Variables:
--- a/mult_distinct_p.tex
+++ b/mult_distinct_p.tex
@ -4,22 +4,18 @@
 \label{sec:hard}

 In this section, we will prove the hardness results claimed in Table~\ref{tab:lbs} for a specific (family) of hard instance $(\query,\pdb)$ for \Cref{prob:bag-pdb-poly-expected} where $\pdb$ is a \abbrTIDB.
-% that computing $\expct\pbox{\poly(\vct{W})}$ exactly for a \ti-lineage polynomial  $\poly(\vct{X})$ generated from a project-join query (even an expression tree representation) is \sharpwonehard. 
 Note that this implies hardness for \bis and general \abbrBPDB, answering \Cref{prob:bag-pdb-poly-expected} (and hence the equivalent \Cref{prob:bag-pdb-query-eval}) in the negative. 
-%Furthermore, we demonstrate in \Cref{sec:single-p} that the problem remains hard, even if $\probOf[X_i=1] = \prob$ for all $X_i$ and any fixed valued $\prob \in (0, 1)$ as long as certain popular hardness conjectures in fine-grained complexity hold. 
-
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Preliminaries}\label{sec:hard:sub:pre}
-Our hardness results are based on (exactly) counting the number of (not necessarily induced) subgraphs in $G$ isomorphic to $H$. Let $\numocc{G}{H}$ denote this quantity.  We can think of $H$ as being of constant size and $G$ as growing.  %In query processing, $H$ can be viewed as the query while $G$ as the database instance.
+Our hardness results are based on (exactly) counting the number of (not necessarily induced) subgraphs in $G$ isomorphic to $H$. Let $\numocc{G}{H}$ denote this quantity.  We can think of $H$ as being of constant size and $G$ as growing.  
 In particular, we will consider the problems of computing the following counts (given $G$ in its adjacency list representation): $\numocc{G}{\tri}$ (the number of triangles), $\numocc{G}{\threedis}$ (the number of $3$-matchings), and the latter's generalization $\numocc{G}{\kmatch}$ (the number of $k$-matchings).  We use $\kmatchtime$ to denote the optimal runtime of computing $\numocc{G}{\kmatch}$.  Our hardness results in \Cref{sec:multiple-p} are based on the following hardness results/conjectures:

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Theorem}[\cite{k-match}]
 \label{thm:k-match-hard}
-Given positive integer $k$ and undirected graph $G=(\vset,\edgeSet)$ with no self-loops or parallel edges, the time $\kmatchtime$ to compute $\numocc{G}{\kmatch}$ exactly is $\littleomega{f(k)\cdot |\edgeSet|^c}$ for any function $f$ and fixed constant $c$ independent of $\numedge$ and $k$ (assuming $\sharpwzero\ne\sharpwone$. %counting the number of $k$-matchings in $G$ is\sharpwonehard (parameterization is in $k$).
+Given positive integer $k$ and undirected graph $G=(\vset,\edgeSet)$ with no self-loops or parallel edges, the time $\kmatchtime$ to compute $\numocc{G}{\kmatch}$ exactly is $\littleomega{f(k)\cdot |\edgeSet|^c}$ for any function $f$ and fixed constant $c$ independent of $\numedge$ and $k$ (assuming $\sharpwzero\ne\sharpwone$. 
 \end{Theorem}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%The above result means that we cannot hope to count the number of $k$-matchings in $G=(\vset,\edgeSet)$ in time $f(k)\cdot |\vset|^{c}$ for any function $f$ and constant $c$ independent of $k$. 
 \begin{hypo}\label{conj:known-algo-kmatch}
 There exists an absolute constant $c_0>0$ such that for every $G=(\vset,\edgeSet)$, we have $\kmatchtime \ge \Omega\inparen{|E|^{c_0\cdot k}}$.
 \end{hypo}
@ -35,8 +31,6 @@ There exists a constant $\eps_0>0$ such that given an undirected graph $G=(\vset
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %
 Based on the so called {\em Triangle detection hypothesis} (cf.~\cite{triang-hard}), which states that detection of whether $G$ has a triangle or not takes time $\Omega\inparen{|\edgeSet|^{4/3}}$, implies that in Conjecture~\ref{conj:graph} we can take $\eps_0\ge \frac 13$.
-%The current best known algorithm to count the number of $3$-matchings, to
-%\AR{Need to add something about 3-paths and 3-matchings as well.}

 All of our hardness results rely on a simple lineage polynomial encoding of the edges of a graph.
 To prove our hardness result, consider a graph $G=(\vset, \edgeSet)$, where $|\edgeSet| = m$, $\vset = [\numvar]$. Our lineage polynomial has a variable $X_i$ for every $i$ in $[\numvar]$.
@ -49,8 +43,6 @@ For any graph $G=(V,\edgeSet)$ and $\kElem\ge 1$, define
 \[\poly_{G}^\kElem(X_1,\dots,X_n) = \left(\sum\limits_{(i, j) \in \edgeSet} X_i \cdot X_j\right)^\kElem.\]
 \end{Definition}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%Our hardness results only need a \ti instance; We also consider the special case when all the tuple probabilities (probabilities assigned to $X_i$ by $\probAllTup$) are the same value. Note that our hardness results % do not require the general circuit representation and
-%even hold for the expression trees. %this polynomial can be encoded in an expression tree of size $\Theta(km)$.

 \noindent Returning to \Cref{fig:two-step}, it is easy to see that $\poly_{G}^\kElem(\vct{X})$ is the lineage polynomial corresponding to the query that generalizes our example query from \Cref{sec:intro}. Let us alias 
 \begin{lstlisting}
@ -61,22 +53,9 @@ as $R_i$ for each $i \in [k]$.  The query $\query^k$ then becomes
 \begin{lstlisting}
 SELECT COUNT(*) FROM $R_1$ JOIN $R_2$ JOIN$\cdots$JOIN $R_k$
 \end{lstlisting}          
-%RA format for the same query
-%\begin{align*}
-%\query^k_G \coloneqq &\inparen{\project_\emptyset\inparen{OnTime \join_{City = City_1} Route \join_{{City}_2 = City'}\rename_{City' \leftarrow City}(OnTime)}}\times_2\cdots\\
-%&\cdots \times_k \inparen{\project_\emptyset\inparen{OnTime \join_{City = City_1} Route \join_{{City}_2 = City'}\rename_{City' \leftarrow City}(OnTime)}}
-%\end{align*}
-
-%\resizebox{1\linewidth}{!}{
-%\begin{minipage}{1.05\linewidth}
-%\[\poly^k_G\dlImp OnTime(C_1),Route(C_1, C_1'),OnTime(C_1'),\dots,OnTime(C_\kElem),Route(C_\kElem,C_\kElem'),OnTime(C_\kElem')\]
-%\end{minipage}
-%}
 \noindent Further, the PDB instance generalizes the one in \Cref{fig:two-step} as follows. Relation $OnTime$ has $n$ tuples corresponding to each vertex for $i$ in $[n]$, each with probability $\prob_i$ and $Route$ has tuples corresponding to the edges $\edgeSet$ (each with probability of $1$).\footnote{Technically, $\poly_{G}^\kElem(\vct{X})$ should have variables corresponding to tuples in $Route$ as well, but since they always are present with probability $1$, we drop those. Our argument also works when all the tuples in $Route$ also are present with probability $\prob$ but to simplify notation we assign probability $1$ to edges.}
 In other words, for this instance $\dbbase$ contains the set of $n$ unary tuples in $OnTime$ (which corresponds to $\vset$) and $m$ binary tuples in $Route$ (which corresponds to $\edgeSet$).
-Note that this implies that $\poly_{G}^\kElem$ 
-%our hard lineage polynomial can be represented as an expression tree produced by a  project-join query with same probability value for each input tuple $\prob_i$, and hence
- is indeed a lineage polynomial for a \abbrTIDB \abbrPDB.
+Note that this implies that $\poly_{G}^\kElem$ is indeed a lineage polynomial for a \abbrTIDB \abbrPDB.

 Next, we note that the runtime for \abbrStepOne with $\query^k$ and $\dbbase$ as defined above is $O(m)$ (i.e.  \abbrStepOne is `easy' for this query):
 \begin{Lemma}\label{lem:tdet-om}
@ -85,16 +64,6 @@ Let $\query^k$ and $\dbbase$ be as defined above. Then
 $\qruntime{\query^k, \dbbase}$ is $O(\kElem\numedge)$.
 \end{Lemma}

-%\begin{Corollary}\label{cor:at-least-kmatch}
-%\end{Corollary}
-%\begin{proof}[Proof of \Cref{cor:at-least-kmatch}
-%\end{proof}
-%
-%\begin{Corollary}\label{cor:best-curr-algo}
-%\end{Corollary}
-%\begin{proof}[Proof of \Cref{cor:best-curr-algo}
-%\end{proof}
-
 \subsection{Multiple Distinct $\prob$ Values}
 \label{sec:multiple-p}
 %Unless otherwise noted, all proofs for this section are in \Cref{app:single-mult-p}.
@ -109,16 +78,7 @@ needs time $\bigOmega{\kmatchtime}$, assuming $\kmatchtime\ge \omega\inparen{\ab
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %
 Note that the second row of \Cref{tab:lbs} follows from \Cref{prop:expection-of-polynom}, \Cref{thm:mult-p-hard-result}, \Cref{lem:tdet-om}, and \Cref{thm:k-match-hard} while the third row is proved by \Cref{prop:expection-of-polynom}, \Cref{thm:mult-p-hard-result}, \Cref{lem:tdet-om}, and \Cref{conj:known-algo-kmatch}. Since \Cref{conj:known-algo-kmatch} is non-standard, the latter hardness result should be interpreted as follows. Any substantial polynomial improvement for \Cref{prob:bag-pdb-poly-expected} (over the trivial algorithm that converts $\poly$ into SMB and then runs the obvious algorithm for \abbrStepTwo) would lead to an improvement over the state of the art {\em upper} bounds on  $\kmatchtime$. Finally, note that \Cref{thm:mult-p-hard-result} needs one to be able to compute the expected multiplicities over $(2k+1)$ distinct values of $p_i$, each of which corresponds to distinct $\pd$ (for the same $\dbbase$), which explain the `Multiple' entry in the second column in the second and third row in \Cref{tab:lbs}. Next, we argue how to get rid of this latter requirement.
-%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%NEEDS to be moved to appendix
-%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%\noindent The following lemma reduces the problem of counting $\kElem$-matchings in a graph to our problem (and proves \Cref{thm:mult-p-hard-result}):
-%\begin{Lemma}\label{lem:qEk-multi-p}
-%Let $\prob_0,\ldots, \prob_{2\kElem}$ be distinct values in $(0, 1]$.  Then given the values $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for $0\leq i\leq 2\kElem$, the number of $\kElem$-matchings in $G$ can be computed in $\bigO{\kElem^3}$ time.
-%\end{Lemma}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%END move to appendix
-%%%%%%%%%%%%%%%%%%%%%%%%%%%
+

 %%% Local Variables:
 %%% mode: latex
--- a/poly-form.tex
+++ b/poly-form.tex
@ -3,12 +3,10 @@
 %\onecolumn
 \subsection{Reduced Polynomials and Equivalences}

-We now introduce some terminology % for polynomials
+We now introduce some terminology 
 and develop a reduced form of lineage polynomials for a \abbrBIDB or \abbrTIDB.
-%We will use $(X + Y)^2$ as a running example.
 Note that a polynomial over $\vct{X}=(X_1,\dots,X_n)$ with individual degree $B <\infty$ 
-%\footnote{The standard definition of polynomials requires a finite number of terms.} and $c_\vct{i} \in \domN$ 
-is formally defined as: %(with $c_\vct{i} \in \domN$):
+is formally defined as: 
 \begin{equation}
  \label{eq:sop-form}
 \poly\inparen{X_1,\dots,X_n}=\sum_{\vct{d}\in\{0,\ldots,B\}^n} c_{\vct{d}}\cdot \prod_{i=1}^n X_i^{d_i},
@ -33,62 +31,15 @@ Product terms in lineage arise only from join operations (\Cref{fig:nxDBSemantic
 %in any clause of the $\raPlus$ query that created it.
 We call a polynomial $\poly\inparen{\vct{X}}$ a \emph{\bi-lineage polynomial} (resp., \emph{\ti-lineage polynomial}, or simply lineage polynomial), if there exists a $\raPlus$ query $\query$, \bi (\ti) $\pdb$, and tuple $\tup$ such that $\poly\inparen{\vct{X}} = \apolyqdt\inparen{\vct{X}}.$
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%\begin{Definition}[Modding with a set]\label{def:mod-set}
-%Let $S$ be a {\em set} of polynomials over $\vct{X}$. Then $\poly(\vct{X})\mod{S}$ is the polynomial obtained by taking the mod of $\poly(\vct{X})$ over {\em all} polynomials in $S$ (order does not matter).
-%\end{Definition}
-%For example for a set of polynomials $S=\inset{X^2-X, Y^2-Y}$, taking the polynomial $2X^2 + 3XY - 2Y^2\mod S$ yields $2X+3XY-2Y$.
-%%
-%\begin{Definition}[$\mathcal B$, $\mathcal T$]\label{def:mod-set-polys}
-%Given the set of BIDB variables $\inset{X_{i,j}}$, define
-%
-%\setlength\parindent{0pt}
-%\vspace*{-3mm}
-%{\small
-%\begin{tabular}{@{}l l}
-%	\begin{minipage}[b]{0.45\linewidth}
-%		\centering
-%		\begin{equation*}
-%		\mathcal{B}=\comprehension{X_{i,j}\cdot X_{i,j'}}{i \in [\ell], j\neq j' \in [~\abs{\block_i}~]}
-%		\end{equation*}
-%	\end{minipage}%
-%	\hspace{13mm}
-%	&
-%	\begin{minipage}[b]{0.45\linewidth}
-%		\centering
-%		\begin{equation*}
-%		\mathcal{T}=\comprehension{X_{i,j}^2-X_{i,j}}{i \in [\ell], j \in [~\abs{\block_i}~]}
-%		\end{equation*}
-%	\end{minipage}
-%	\\
-%\end{tabular}
-%}
-%\end{Definition}
-%%
+
 \begin{Definition}[Reduced \bi Polynomials]\label{def:reduced-bi-poly}
  Let $\poly(\vct{X})$ be a \bi-lineage polynomial.
  The reduced form $\rpoly(\vct{X})$ of $\poly(\vct{X})$ is the same as \Cref{def:reduced-poly} with the added constraint that all monomials with variables $X_{\block, i}, X_{\block, j}, i\neq j$ from the same block $\block$ are omitted.
-  
-%: $\rpoly(\vct{X}) = \poly(\vct{X}) \mod \inparen{\mathcal{T} \cup \mathcal{B}}$
 \end{Definition}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %

 Consider a $\abbrBIDB$ polynomial $\poly\inparen{\vct{X}} = X_{1, 1}X_{1, 2} + X_{1, 2}X_{2, 1}^2$.  Then by \Cref{def:reduced-bi-poly}, we have that $\rpoly\inparen{\vct{X}} = X_{1, 2}X_{2, 1}$.  Next, we show why the reduced form is useful for our purposes.
-%, (recall the constraint on tuples from the same block being disjoint in a \bi).% any monomial containing more than one tuple from a block has $0$ probability and can be ignored).
-%
-%For the special case of \tis, the second step is not necessary since every block contains a single tuple.
-%Alternatively, one can think of $\rpoly$ as the \abbrSMB of $\poly(\vct{X})$ when the product operator is idempotent.
-%
-% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% \begin{Definition}[$\rpoly(\vct{X})$] \label{def:qtilde}
-% Define $\rpoly(X_1,\ldots, X_\numvar)$ as the reduced version of $\poly(X_1,\ldots, X_\numvar)$, of the form
-% $\rpoly(X_1,\ldots, X_\numvar) = $
-
-% \[\poly(X_1,\ldots, X_\numvar) \mod X_1^2-X_1\cdots\mod X_\numvar^2 - X_\numvar.\]
-% \end{Definition}
-% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%Removing this example to save space
 \iffalse
 \begin{Example}\label{example:qtilde}
@ -101,43 +52,8 @@ Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blo
 \end{Example}
 \fi
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%
-% Intuitively, $\rpoly(\textbf{X})$ is the \abbrSMB form of $\poly(\textbf{X})$ such that if any $X_j$ term  has an exponent $e > 1$, it is reduced to $1$, i.e. $X_j^e\mapsto X_j$ for any $e > 1$.
-%
-%When considering $\bi$ input, it becomes necessary to redefine $\rpoly(\vct{X})$.
-%
-%\noindent The usefulness of this will reduction become clear in \Cref{lem:exp-poly-rpoly}.
-%


-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%NEEDS to be moved to the appendix
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%\begin{Definition}[Valid Worlds]
-%For probability distribution $\pd$, % and its corresponding probability mass function $\probOf$,
-%the set of valid worlds $\valworlds$ consists of all the worlds with probability value greater than $0$; i.e., for random world variable vector $\vct{W}$
-%\[
-%\valworlds = \comprehension{\vct{w}}{\probOf[\vct{W} = \vct{w}] > 0}
-%\]
-%\end{Definition}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%END move to appendix
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-%We state additional equivalences between $\poly(\vct{X})$ and $\rpoly(\vct{X})$ in \Cref{app:subsec-pre-poly-rpoly} and \Cref{app:subsec-prop-q-qtilde}.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-%Define all variables $X_i$ in $\poly$ to be independent.
-
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Lemma}\label{lem:exp-poly-rpoly}
 Let $\pdb$ be a \abbrBIDB over $\numvar$ input tuples such that the probability distribution $\pdassign$ over $\vct{\randWorld}^\numvar$ (the all worlds set) is induced by the probability vector $\probAllTup = (\prob_1, \ldots, \prob_\numvar)$.  As in \Cref{lem:tidb-reduce-poly} for \abbrTIDB, any \abbrBIDB-lineage polynomial $\poly(\vct{X})$ based on $\pdb$ and query $\query$ we have:
@ -148,17 +64,6 @@ Let $\pdb$ be a \abbrBIDB over $\numvar$ input tuples such that the probability
 \end{Lemma}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 By \Cref{lem:exp-poly-rpoly} and linearity of expectation, the following corollary results.
-%Note that in the preceding lemma, we have assigned $\vct{p}$
-%%(introduced in \Cref{subsec:def-data})
-%to the variables $\vct{X}$. Intuitively, \Cref{lem:exp-poly-rpoly} states that when we replace each variable $X_i$ with its probability $\prob_i$ in the reduced form of a \bi-lineage polynomial and evaluate the resulting expression in $\mathbb{R}$, then the result is the expectation of the polynomial.
-
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Corollary}\label{cor:expct-sop}
--- a/prob-def.tex
+++ b/prob-def.tex
@ -19,41 +19,10 @@ When the underlying DAG is a tree (with edges pointing towards the root), the st

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-
-%As stated in \Cref{def:circuit}, every internal node has at most two incoming edges, is labeled as an addition or a multiplication node, and has no limit on its outdegree.
-%Note that if we limit the outdegree to one, then we get expression trees.
-
 The circuits in \Cref{fig:two-step} encode their respective polynomials in column $\poly$.
-%\circuit in \Cref{fig:circuit-express-tree} encodes the polynomial $XY + WZ$.  
 Note that each circuit \circuit encodes a tree, with edges pointing towards the root.


-%\begin{figure}[t]
-%	\begin{subfigure}[b]{0.45\linewidth}
-%		\centering
-%		\begin{tikzpicture}[thick]
-%			\node[tree_node] (a1) at (0, 0){$\boldsymbol{X}$};
-%			\node[tree_node] (b1) at (1, 0){$\boldsymbol{Y}$};
-%			\node[tree_node] (c1) at (2, 0){$\boldsymbol{W}$};
-%			\node[tree_node] (d1) at (3, 0){$\boldsymbol{Z}$};
-%
-%			\node[tree_node] (a2) at (0.5, 1){$\boldsymbol{\circmult}$};
-%			\node[tree_node] (b2) at (2.5, 1){$\boldsymbol{\circmult}$};
-%
-%			\node[tree_node] (a3) at (1.5, 2){$\boldsymbol{\circplus}$};
-%
-%			\draw[->] (a1) -- (a2);
-%			\draw[->] (b1) -- (a2);
-%			\draw[->] (c1) -- (b2);
-%			\draw[->] (d1) -- (b2);
-%			\draw[->] (a2) -- (a3);
-%			\draw[->] (b2) -- (a3);
-%			\draw[->] (a3) -- (1.5, 2.5);
-%		\end{tikzpicture}
-%		\caption{Circuit encoding $XY + WZ$, a special case of an expression tree}
-%		\label{fig:circuit-express-tree}
-%	\end{subfigure}
-%	\hspace{5mm}
 	\begin{wrapfigure}{l}{0.45\linewidth}
 		\centering
 		\begin{tikzpicture}[thick]
--- a/ra-to-poly.tex
+++ b/ra-to-poly.tex
@ -11,39 +11,15 @@ An \textit{incomplete database} $\idb$ is a set of deterministic databases $\db$
 A \textit{probabilistic database} $\pdb$ is a pair $(\idb, \pd)$ where $\idb$ is an incomplete database and $\pd$ is a probability distribution over $\idb$. Queries over probabilistic databases are evaluated using the so-called possible world semantics. Under the possible world semantics, the result of a query $\query$ over an incomplete database $\idb$ is the set of query answers produced by evaluating $\query$ over each possible world: $\query(\idb) = \comprehension{\query(\db)}{\db \in \idb}$.

 For a probabilistic  database $\pdb = (\idb, \pd)$,  the result of a query is the pair $(\query(\idb), \pd')$ where $\pd'$ is a probability distribution over $\query(\idb)$  that assigns to each possible query result the sum of the probabilities of the worlds that produce this answer:
-%
-%\[\forall \db' \in \query(\idb): \pd'(\db') = \sum_{\db \in \idb: \query(\db) = \db'} \pd(\db). \]

-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%NEEDS to be moved to the appendix.
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%Let $\semNX$ denote the set of polynomials over variables $\vct{X}=(X_1,\dots,X_\numvar)$ with natural number coefficients and exponents.
-%We model incomplete relations using Green et. al.'s $\semNX$-databases~\cite{DBLP:conf/pods/GreenKT07}, discussed in detail in \Cref{subsec:supp-mat-krelations}. 
-% $\semNX$-databases are functions from tuples to elements of $\semNX$, typically called annotations.
-%Given an $\semNX$-database $\db$,  it is common to use $\db(\tup)$ to denote the polynomial annotating tuple $\tup$ in $\db$. 
-%%Note that based on this definition of $\rel$, $\rel(\tup)$ is the lineage polynomial for $\tup$.  
-%Let $\numvar$ be the number of tuples in $\pdb$.  Then, each possible world is defined by an assignment of $\numvar$ binary values $\vct{\wElem} \in \{0, 1\}^{\numvar}$ to $\vct{X}$.
-%The multiplicity of $\tup \in \db$, denoted $\db(\tup)(\vct{\wElem})$, is obtained by evaluating the polynomial annotating $\tup$ on $\vct{\wElem}$.
-%$\semNX$-relations are closed under $\raPlus$ (\Cref{fig:nxDBSemantics}).
-%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%
-%We will use $\semNX$-\abbrPDB $\pxdb$, defined as the tuple $(\idb_{\semNX}, \pd)$, where $\semNX$-database $\idb_{\semNX}$ is paired with probability distribution $\pd$ over the assignments to $\vct{X}$.
-%We denote by $\polyForTuple$ the annotation of tuple $t$ in the result of $\query$ on an implicit $\semNX$-\abbrPDB (i.e., $\polyForTuple = \query(\pxdb)(t)$ for some $\pxdb$) and as before, interpret it as a function $\polyForTuple: \{0,1\}^{\numvar} \rightarrow \semN$ from vectors of variable assignments to the corresponding value of the annotating polynomial.
-%$\semNX$-\abbrPDB\xplural and a function $\rmod$ (which transforms an $\semNX$-\abbrPDB to  a classical bag-\abbrPDB, or $\semN$-\abbrPDB~\cite{DBLP:conf/pods/GreenKT07,feng:2019:sigmod:uncertainty}) are both formalized in \Cref{subsec:supp-mat-background}.
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%END: move to appendix.
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 Recall \Cref{fig:nxDBSemantics} which depicts the semantics for constructing a lineage polynomial $\apolyqdt$ for any $\raPlus$ query.  We now make a meaningful connection between possible world semantics and world assignments on the lineage polynomial.

 \begin{Proposition}[Expectation of polynomials]\label{prop:expection-of-polynom}
-Given a \abbrBPDB $\pdb = (\idb,\pd)$ and lineage polynomial $\apolyqdt$ for aribitrary output tuple $\tup$, %$\semNX$-\abbrPDB $\pxdb = (\idb_{\semNX}',\pd')$ where $\rmod(\pxdb) = \pdb$, 
+Given a \abbrBPDB $\pdb = (\idb,\pd)$ and lineage polynomial $\apolyqdt$ for aribitrary output tuple $\tup$, 
 we have (denoting $\randDB$ as the random variable over $\idb$):
  $ \expct_{\randDB \sim \pd}[\query(\randDB)(t)] = \expct_{\vct{\randWorld}\sim \pdassign}\pbox{\apolyqdt\inparen{\vct{\randWorld}}}. $
 \end{Proposition}
 \noindent A formal proof of \Cref{prop:expection-of-polynom} is given in \Cref{subsec:expectation-of-polynom-proof}.\footnote{Although \Cref{prop:expection-of-polynom} follows, e.g., as an obvious consequence of~\cite{IL84a}'s Theorem 7.1, we are unaware of any formal proof for bag-probabilistic databases.}
-%This proposition shows that computing expected tuple multiplicities is equivalent to computing the expectation of a polynomial (for that tuple) from a probability distribution over all possible assignments of variables in the polynomial to $\{0,1\}$.
 We focus on the problem of computing $\expct_\pdassign\pbox{\apolyqdt\inparen{\vct{\randWorld}}}$ from now on, assume implicit $\query, \dbbase, \tup$, and so drop the subscript from $\apolyqdt$ (i.e., $\poly\inparen{\vct{X}}$ will denote a polynomial).

 \subsubsection{\tis and \bis}
@ -51,23 +27,13 @@ We focus on the problem of computing $\expct_\pdassign\pbox{\apolyqdt\inparen{\v
 In this paper, we focus on two popular forms of \abbrPDB\xplural: Block-Independent (\bi) and Tuple-Independent (\ti) \abbrPDB\xplural.
 %
 A \bi $\pdb$ is a \abbrPDB with the constraint that 
-%(i) every tuple $\tup_i$ is annotated with a unique random variable $\randWorld_i \in \{0, 1\}$ and (ii) that 
 the tuples in $\dbbase$ can be partitioned into a set of $\ell$ blocks such that tuples $\tup_{i, j}, \tup_{k, j'}$ from separate blocks $(i\neq k, j \in [\abs{i}], j' \in [\abs{k}])$ are independent of each other while tuples $\tup_{i, j}, \tup_{i, k}$ from the same block are disjoint events.\footnote{
  Although only a single independent, $[\abs{\block_i}+1]$-valued variable is customarily used per block~\cite{DBLP:series/synthesis/2011Suciu}, we decompose it into $\abs{\block_i}$ correlated $\{0,1\}$-valued variables per block that can be used directly in polynomials (without an indicator function).  For $t_{i, j} \in b_i$, the event $(\randWorld_{i,j} = 1)$ corresponds to the event $(\randWorld_i = j)$ in the customary annotation scheme.
 }  
 Each tuple $\tup_{i, j}$ is annotated with a random variable $\randWorld_{i, j} \in \{0, 1\}$ denoting its presence in a possible world $\db$.  The probability distribution $\pd$ over $\dbbase$ is the one induced from individual tuple probabilities $\prob_{i, j}\in \vct{\prob}=\inparen{\prob_{1, 1},\ldots,\prob_{\abs{\block},\ldots,\abs{\block_{\abs{\block}}}}}$ and the conditions on the blocks.  A \abbrTIDB is a \abbrBIDB where each block has size exactly $1$.

 Instead of looking only at the possible worlds of $\pdb$, one can consider all worlds, including those that cannot exist due to disjointness.  The all worlds set can be modeled by $\vct{\randWorld}\in \{0, 1\}^\numvar$,\footnote{Here and later on in the paper, especially in \Cref{sec:algo}, we will overload notation and rename the variables as $X_1,\dots,X_n$, where $n=\sum_{i=1}^\ell \abs{b_i}$.} such that $\randWorld_k \in \vct{\randWorld}$ represents the presence of $\tup_{i, j}$ (where $k = \sum_{\ell = 1}^{i - 1} \abs{b_\ell} + j$).  We denote a probability distribution over all $\vct{\randWorld} \in \{0, 1\}^\numvar$ as $\pdassign$.  When $\pdassign$ is the one induced from each $\prob_{i, j}$ while assigning $\probOf\pbox{\vct{\randWorld}} = 0$ for any $\vct{\randWorld}$ with $\randWorld_{i, j} = \randWorld_{i, k} = 1$ for any block $i$ and $j\neq k$, we end up with a bijective mapping from $\pd$ to $\pdassign$, such that each mapping is equivalent, implying the distributions are equivalent.  
-%that $\forall i \in \abs{\block}, \forall j\neq k \in [\block_i] \suchthat \db\inparen{\tup_{i, j}} = 0 \vee \db\inparen{\tup_{i, k} = 0}$.In other words, each random variable corresponds to the event of a single tuple's presence.
-%A \emph{\ti} is a \bi where each block contains exactly one tuple.
 \Cref{subsec:supp-mat-ti-bi-def} explains \abbrTIDB\xplural and \abbrBIDB\xplural in greater detail.
-%%
-%In a \bi (and by extension a \ti), tuples are partitioned into $\ell$ blocks $\block_1, \ldots, \block_\ell$ where tuple $t_{i,j} \in \block_i$ is associated with a probability $\prob_{\tup_{i,j}} = \probOf[X_{i,j} = 1]$, and is annotated with a unique variable $X_{i,j}$.
-%Because blocks are independent and tuples from the same block are disjoint, the probabilities $\prob_{\tup_{i,j}}$ and the blocks induce the probability distribution $\pd$ of $\pdb$.
-%We will write a \bi-lineage polynomial $\poly(\vct{X})$ for a \bi with $\ell$ blocks as
-%$\poly(\vct{X})$ = $\poly(X_{1, 1},\ldots, X_{1, \abs{\block_1}},$ $\ldots, X_{\ell, \abs{\block_\ell}})$, where $\abs{\block_i}$ denotes the size of $\block_i$.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


 %%% Local Variables: