Finished porting all @atri asked in 121820 meeting

2020-12-18 18:23:24 -05:00 · 2020-12-18 18:23:24 -05:00 · 899edc4248
parent 1673b0393f
commit 899edc4248
7 changed files with 275 additions and 276 deletions
--- a/approx_alg.tex
+++ b/approx_alg.tex
@ -387,133 +387,12 @@ Algorithm ~\ref{alg:one-pass} essentially implements the above definitions.

 %\subsubsection{Psuedo Code}
 %See algorithm ~\ref{alg:one-pass} for details.
-\begin{algorithm}[h!]
-	\caption{\onepass$(\etree)$}
-	\label{alg:one-pass}
-\begin{algorithmic}[1]
-	\Require \etree: Binary Expression Tree
-	\Ensure \etree: Binary Expression Tree
-	\Ensure \vari{sum} $\in \mathbb{R}$
-	\If{$\etree.\type = +$}\label{alg:one-pass-equality1}
-		\State $\accum \gets 0$\label{alg:one-pass-plus-assign1}
-		\For{$child$ in $\etree.\vari{children}$}\Comment{Sum up all children coefficients}
-			\State $(child, \vari{s}) \gets \onepass(child)$
-			\State $\accum \gets \accum + \vari{s}$\label{alg:one-pass-plus-add}
-		\EndFor
-		\State $\etree.\vari{partial} \gets \accum$\label{alg:one-pass-plus-assign2}
-		\For{$child$ in $\etree.\vari{children}$}\Comment{Record distributions for each child}
-			\State $child.\vari{weight} \gets \frac{child.\vari{partial}}{\etree.\vari{partial}}$\label{alg:one-pass-plus-prob}
-		\EndFor
-		%\State $\vari{sum} \gets \etree.\vari{partial}$\label{alg:one-pass-plus-assign3}
-		\State \Return (\etree, \etree.\vari{partial})
-	\ElsIf{$\etree.\type = \times$}\label{alg:one-pass-equality2}
-		\State $\accum \gets 1$\label{alg:one-pass-times-assign1}
-		\For{$child \text{ in } \etree.\vari{children}$}\Comment{Compute the product of all children coefficients}
-			\State $(child, \vari{s}) \gets \onepass(child)$
-			\State $\accum \gets \accum \times \vari{s}$\label{alg:one-pass-times-product}
-		\EndFor
-		\State $\etree.\vari{partial}\gets \accum$\label{alg:one-pass-times-assign2}
-		%\State $\vari{sum} \gets \etree.\vari{partial}$\label{alg:one-pass-times-assign3}
-		\State \Return (\etree, \etree.\vari{partial})
-	\ElsIf{$\etree.\type = numeric$}\Comment{Base case}\label{alg:one-pass-equality3}
-		\State $\vari{sum} \gets |\etree.\val|$\label{alg:one-pass-leaf-assign1}\Comment{This step effectively converts $\etree$ into $\abs{\etree}$}
-		\State \Return (\etree, \vari{sum})
-	\Else\Comment{$\etree.\type = \var$}\label{alg:one-pass-equality4}
-		%\State $\vari{sum} \gets 1$\label{alg:one-pass-global-assign}
-		\State \Return (\etree,$1$) % \vari{sum})
-	\EndIf
-\end{algorithmic}
-\end{algorithm}
-
-\begin{Example}\label{example:one-pass}
- Let $\etree$ encode the expression $(x_1 + x_2)(x_1 - x_2) + x_2^2$.  After one pass, \cref{alg:one-pass} would have computed the following weight distribution.  For the two children of the root $+$ node $\etree$, $\etree_\lchild.\wght = \frac{4}{5}$ and $\etree_\rchild.\wght = \frac{1}{5}$.  Similarly, let $\stree$ denote the left-subtree of $\etree_{\lchild}$, $\stree_\lchild.\wght = \stree_\rchild.\wght = \frac{1}{2}$.  This is depicted in~\Cref{fig:expr-tree-T-wght}. %Note that in this example, the sampling probabilities for the children of each inner $+$ node of $\stree$ are equal to one another because both parents have the same number of children, and, in each case, the children of each parent $+$ node share the same $|\coef_i|$.
-\end{Example}
-
-\begin{figure}[h!]
-	\begin{tikzpicture}[thick, every tree node/.style={default_node, thick, draw=black, black, circle, text width=0.3cm, font=\bfseries, minimum size=0.65cm}, every child/.style={black}, edge from parent/.style={draw, thick},
-level 1/.style={sibling distance=0.95cm},
-level 2/.style={sibling distance=0.7cm},
-%level 2+/.style={sibling distance=0.625cm}
-%level distance = 1.25cm,
-%sibling distance = 1cm,
-%every node/.append style = {anchor=center}
-]
-
-	\Tree [.\node(root){$\boldsymbol{+}$};
-			\edge [wght_color] node[midway, auto= right, font=\bfseries, gray] {$\bsym{\frac{4}{5}}$}; [.\node[highlight_color](tl){$\boldsymbol{\times}$};
-				[.\node(s){$\bsym{+}$};
-					\edge[wght_color] node[pos=0.35, left, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](sl){$\bsym{x_1}$}; ]
-					\edge[wght_color] node[pos=0.35, right, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](sr){$\bsym{x_2}$}; ]
-					]
-				[.\node(sp){$\bsym{+}$};
-					\edge[wght_color] node[pos=0.35, left, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](spl){$\bsym{x_1}$}; ]
-					\edge[wght_color] node[pos=0.35, right, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](spr){$\bsym{\times}$};
-						[.$\bsym{-1}$ ] [.$\bsym{x_2}$ ]
-						]
-					]
-				]
-			\edge [wght_color] node[midway, auto=left, font=\bfseries, gray] {$\bsym{\frac{1}{5}}$}; [.\node[highlight_color](tr){$\boldsymbol{\times}$};
-				[.$\bsym{x_2}$
-					\edge [draw=none]; [.\node[draw=none]{}; ]
-					\edge [draw=none]; [.\node[draw=none]{}; ]
-				]
-				[.$\bsym{x_2}$ ] ]
-	]
-%	labels for plus node children, with arrows
-	\node[left=2pt of sl, highlight_color, inner sep=0pt] (sl-label) {$\stree_\lchild$};
-	\draw[highlight_color] (sl) -- (sl-label);
-	\node[right=2pt of sr, highlight_color, inner sep=0pt] (sr-label) {$\stree_\rchild$};
-	\draw[highlight_color] (sr) -- (sr-label);
-	\node[below left=2pt of spl, inner sep=0pt, highlight_color](spl-label) {$\stree_\lchild'$};
-	\draw[highlight_color] (spl) -- (spl-label);
-	\node[right=2pt of spr, highlight_color, inner sep=0] (spr-label) {$\stree_\rchild'$};
-	\draw[highlight_color] (spr) -- (spr-label);
-	\node[above left=2pt of tl, inner sep=0pt, highlight_color] (tl-label) {$\etree_\lchild$};
-	\draw[highlight_color] (tl) -- (tl-label);
-	\node[above right=2pt of tr, highlight_color, inner sep=0pt] (tr-label) {$\etree_\rchild$};
-	\node[above = 2pt of root, highlight_color, inner sep=0pt, font=\bfseries] (root-label) {$\etree$};
-	\node[above = 2pt of s, highlight_color, inner sep=0pt, font=\bfseries] (s-label) {$\stree$};
-	\node[above = 2pt of sp, highlight_color, inner sep=0pt, font=\bfseries] (sp-label) {$\stree'$};
-	\draw[highlight_color] (tr) -- (tr-label);
-%	\draw[<-|, highlight_color] (s) -- (s-label);
-%	\draw[<-|, highlight_color] (sp) -- (sp-label);
-%	\draw[<-|, highlight_color]  (root) -- (root-label);
-%\node[above right=0.7cm of TR, highlight_color, inner sep=0pt, font=\bfseries] (tr-comment) {$\etree_\rchild$};
-%		\draw[<-|, highlight_color] (TR) -- (tr-comment);
-	\end{tikzpicture}


-%	\begin{tikzpicture}[thick, level distance=1.2cm, level 1/.style={sibling distance= 5cm}, level 2/.style={sibling distance=3cm}, level 3/.style={sibling distance=1.5cm}, level 4/.style={sibling distance= 1cm}, every child/.style={black}]
-%		\node[tree_node](root) {$\boldsymbol{+}$}
-%			child[red]{node[tree_node](tl) {$\boldsymbol{\times}$}
-%				child{node[tree_node] {$\boldsymbol{+}$}
-%					child{node[tree_node]{$\boldsymbol{x_1}$}	}
-%					child{node[tree_node] {$\boldsymbol{x_2}$}}
-%					}
-%				child{node[tree_node] {$\boldsymbol{+}$}
-%					child{node[tree_node] {$\boldsymbol{x_1}$}}
-%						%child[missing]{node[tree_node] {$\boldsymbol{1}$}}
-%					child[red]{node[tree_node] {$\boldsymbol{\times}$}
-%						child{node[tree_node] {$\boldsymbol{-1}$}}
-%						child{node[tree_node] {$\boldsymbol{x_2}$}}
-%						}
-%					}
-%			}
-%			child{node[tree_node] {$\boldsymbol{\times}$} edge from parent [red]
-%				child{node[tree_node] {$\boldsymbol{x_2}$}}
-%				child{node[tree_node] {$\boldsymbol{x_2}$}}
-%				};
-%		\node[font=\bfseries, red] at (-2.8, -0.2) {$\etree_\lchild.\wght \boldsymbol{= \frac{4}{5} } $};
-%	\end{tikzpicture}
-	\caption{Weights computed by $\onepass$ in ~\cref{example:one-pass}. 
-%\AH{I fixed the labels; @atri, let me know if you would rather have the labels positioned in alternative locations.}
-%\AR{Looks good-- thanks!}
-}
-	\label{fig:expr-tree-T-wght}
-\end{figure}
+	


-We prove the correctness of Algorithm ~\ref{alg:one-pass} by proving~\Cref{lem:one-pass} in~\Cref{sec:proofs-approx-alg}.
+For an example of how $\onepass$ works, the pseudocode, and the proof of correctness (~\Cref{lem:one-pass}) of Algorithm ~\ref{alg:one-pass}see~\Cref{sec:proofs-approx-alg}.

 \subsection{\sampmon\ Algorithm}
 \label{sec:samplemonomial}
@ -571,8 +450,7 @@ We argue the correctness of Algorithm ~\ref{alg:sample} by proving~\Cref{lem:sam

 \subsection{Experimental results}
 \label{sec:experiments}
-
-\input{experiments}
+We conducted an experiment running modified TPCH queries over uncertain data generated by pdbench~\cite{pdbench}, both of which (data and queries) represent what is typically encountered in practice.  Queries were run two times, once filtering $\bi$ cancellations, and then second not filtering the cancellations.  The purpose of this was to determine an indication for how many $\bi$ cancellations occur in practice.  Details and results can be found in~\Cref{app:subsec:experiment}.

 %\AR{Experimental stuff about BIDB should go in here}
 %%%%%%%%%%%%%%%%%%%%%%%
--- a/circuits-model-runtime.tex
+++ b/circuits-model-runtime.tex
@ -20,13 +20,7 @@ We first note that since expression trees are a special case of them, all of our

 For the approximation algorithm in~\Cref{sec:algo} we note that \textsc{Approx}\textsc{imate}$\rpoly$ (\Cref{alg:mon-sam}) works for lineage circuits as long as the same guarantees on $\onepass$ and $\sampmon$ (\Cref{lem:one-pass} and \Cref{lem:sample} respectively) hold for lineage circuits as well. It turns out that both $\onepass$ and $\sampmon$ work for lineage circuits as well, simply because the only property these use for expression trees is that each node has two children. This is still valid of lineage circuits where for each non-source node the children correspond to the two nodes that have incoming edges to the given node. Put another way, our argument never used the fact that in an expression tree, each node has at most one parent.

-More specifically consider $\onepass$. The algorithm (as well as its analysis) basically uses the fact that one can compute the corresponding polynomial at all $1$s input with a simple recursive formula (\cref{eq:T-all-ones}), and that we can compute a probability distribution based on these weights (as in~\cref{eq:T-weights}). It can be verified that all the arguments go through if we replace $\etree_\lchild$ and $\etree_\rchild$ for expression tree $\etree$ with the two incoming nodes of the sink for the given lineage circuit. Another way to look at this is we could `unroll' the recursion in $\onepass$ and think of the algorithm as doing the evaluation at each node bottom up from leaves to the root in the expression tree. For lineage circuits, we start from the source nodes and do the computation in the topological order till we reach the sink(s).
-
-The argument for $\sampmon$ is similar. Since we argued that $\onepass$ works as intended for lineage circuits since~\Cref{alg:one-pass} only recurses on children of the current node in the expression tree and we can generalize it to lineage circuits by recursing to the two children of the current node in the lineage circuit. Alternatively, as we have already used in the proof of~\Cref{lem:sample}, we can think of the sampling algorithm sampling a sub-graph of the expression tree. For lineage circuits, we can think of $\sampmon$ as sampling the same sub-graph. Alternatively, one can implicitly expand the circuit lineage into a (larger but) equivalent expression tree. Since $\sampmon$ only explores one sub-graph during its run we can think of its run on a lineage circuit as being done on the implicit equivalent expression tree\footnote{
-  Recall that $\sampmon$ scales only in the depth of the expression and its polynomial degree ($k$). There exist polynomials that can be encoded in size $\Omega(\log k)$, but we follow convention in assuming that the circuit size is asymptotically larger than $k$ and thus treat the degree (i.e., join width) as a constant.
-}. Hence, all of the results on $\sampmon$  on expression trees carry over to lineage circuits.
-
-Thus, we have argued that~\Cref{lem:approx-alg} also holds if we use a lineage circuit instead of an expression tree as the input to our approximation algorithm.
+For further discussion on why~\Cref{lem:approx-alg} holds for a lineage circuit, see~\Cref{app:lineage-circuit-ext}.

 \subsubsection{The cost model}
 \label{sec:cost-model}
@ -44,9 +38,8 @@ We adopt a minimalistic compute-bound model of query evaluation drawn from worst
 Under this model the query plan $Q(D)$ has runtime $O(\qruntime{Q(D)})$.
 Base relations assume that a full table scan is required; We model index scans by treating an index scan query $\sigma_\theta(R)$ as a single base relation.

-It can be verified that the worst-case join algorithms~\cite{skew,ngo-survey}, as well as query evaluation via factorized databases~\cite{factorized-db} (and work on FAQs~\cite{DBLP:conf/pods/KhamisNR16}) can be modeled as select-union-project-join queries (though these queries can be data dependent).\footnote{This claim can be verified by e.g. simply looking at the {\em Generic-Join} algorithm in~\cite{skew} and {\em factorize} algorithm in~\cite{factorized-db}.} Further, it can be verified that the above cost model on the corresponding SUPJ join queries correctly captures their runtime.
-\AH{I am used to folks using the order SPJU, is this ordering of operations a `standard' that we should follow?}
-\AR{Am not sure if we need to motivate the cost model more.} 
+It can be verified that the worst-case join algorithms~\cite{skew,ngo-survey}, as well as query evaluation via factorized databases~\cite{factorized-db} (and work on FAQs~\cite{DBLP:conf/pods/KhamisNR16}) can be modeled as select-union-project-join queries (though these queries can be data dependent).\footnote{This claim can be verified by e.g. simply looking at the {\em Generic-Join} algorithm in~\cite{skew} and {\em factorize} algorithm in~\cite{factorized-db}.} Further, it can be verified that the above cost model on the corresponding SPJU join queries correctly captures their runtime.
+
 %We now make a simple observation on the above cost model:
 %\begin{proposition}
 %\label{prop:queries-need-to-output-tuples}
@ -56,7 +49,7 @@ It can be verified that the worst-case join algorithms~\cite{skew,ngo-survey}, a

 \subsubsection{Lineage circuit for query plans}
 \label{sec:circuits-formal}
-We now define a lineage circuit more formally and also show how to construct a lineage circuit given a SUPJ query $Q$.
+We now define a lineage circuit more formally and also show how to construct a lineage circuit given a SPJU query $Q$.

 As mentioned earlier, we represent lineage polynomials with arithmetic circuits over $\mathbb N$ with $+$, $\times$.  
 A circuit for query $Q$ is a directed acyclic graph $\tuple{V_Q, E_Q, \phi_Q, \ell_Q}$ with vertices $V_Q$ and directed edges $E_Q \subset V_Q^2$.  
@ -66,140 +59,23 @@ We require that $\phi_Q$'s range be limited to sink vertices (i.e., vertices wit
 A function $\ell_Q : V_Q \rightarrow \{\;+,\times\;\}\cup \mathbb N \cup \vct X$ assigns a label to each node: Source nodes (i.e., vertices with in-degree 0) are labeled with constants or variables (i.e., $\mathbb N \cup \vct X$), while the remaining nodes are labeled with the symbol $+$ or $\times$.
 We require that vertices have an in-degree of at most two.

-\newcommand{\getpoly}[1]{\textbf{poly}\inparen{#1}}
-Each vertex $v \in V_Q$ in the arithmetic circuit for $\tuple{V_Q, E_Q, \phi_Q, \ell_Q}$ encodes a polynomial, realized as
-\AH{We already have a function named poly (not in bold however).  Is \textbf{poly} enough to convey to the reader that this is a \emph{different} function, or is another name a better idea ?} 
-$$\getpoly{v} = \begin{cases}
-\sum_{v' : (v',v) \in E_Q} \getpoly{v'} & \textbf{if } \ell(v) = +\\
-\prod_{v' : (v',v) \in E_Q} \getpoly{v'} & \textbf{if } \ell(v) = \times\\
-\ell(v) & \textbf{otherwise}
-\end{cases}$$
+For the specifics on how lineage circuits are translated to represent polynomials see~\Cref{app:subsec-rep-poly-lin-circ}.

-\newcommand{\caseheading}[1]{\smallskip \noindent \textbf{#1}.~}
-We define the circuit for a select-union-project-join $Q$ recursively by cases as follows.  In each case, let $\tuple{V_{Q_i}, E_{Q_i}, \phi_{Q_i}, \ell_{Q_i}}$ denote the circuit for subquery $Q_i$.
-
-\caseheading{Base Relation}
-Let $Q$ be a base relation $R$.  We define one node for each tuple.  Formally, let $V_Q = \comprehension{v_t}{t\in R}$, let $\phi_Q(t) = v_t$, let $\ell_Q(v_t) = R(t)$, and let $E_Q = \emptyset$.
-This circuit has $|R|$ vertices.
-
-\caseheading{Selection}
-Let $Q = \sigma_\theta \inparen{Q_1}$.
-We re-use the circuit for $Q_1$. %, but define a new distinguished node $v_0$ with label $0$ and make it the sink node for all tuples that fail the selection predicate.  
-Formally, let $V_Q = V_{Q_1}$, let $\ell_Q(v_0) = 0$, and let $\ell_Q(v) = \ell_{Q_1}(v)$ for any $v \in V_{Q_1}$.  Let $E_Q = E_{Q_1}$, and define
-$$\phi_Q(t) =
-\phi_{Q_1}(t)  \text{ for } t \text{ s.t.}\; \theta(t).$$
-Dead sinks are iteratively removed, and so 
-%\AH{While not explicit, I assume a reviewer would know that the notation above discards tuples/vertices not satisfying the selection predicate.}
-%v_0 & \textbf{otherwise}
-%\end{cases}$$
-this circuit has at most $|V_{Q_1}|$ vertices.
-
-\caseheading{Projection}
-Let $Q = \pi_{\vct A} {Q_1}$.
-We extend the circuit for ${Q_1}$ with a new set of sum vertices (i.e., vertices with label $+$) for each tuple in $Q$, and connect them to the corresponding sink nodes of the circuit for ${Q_1}$.
-Naively, let $V_Q = V_{Q_1} \cup \comprehension{v_t}{t \in \pi_{\vct A} {Q_1}}$, let $\phi_Q(t) = v_t$, and let $\ell_Q(v_t) = +$.  Finally let 
-$$E_Q = E_{Q_1} \cup \comprehension{(\phi_{Q_1}(t'), v_t)}{t = \pi_{\vct A} t', t' \in {Q_1}, t \in \pi_{\vct A} {Q_1}}$$
-This formulation will produce vertices with an in-degree greater than two, a problem that we correct by replacing every vertex with an in-degree over two by an equivalent fan-in tree.  The resulting structure has at most $|{Q_1}|-1$ new vertices.
-% \AH{Is the rightmost operator \emph{supposed} to be a $-$?  In the beginning we add $|\pi_{\vct A}{Q_1}|$ vertices.}
-The corrected circuit thus has at most $|V_{Q_1}|+|{Q_1}|$ vertices.
-
-\caseheading{Union}
-Let $Q = {Q_1} \cup {Q_2}$.
-We merge graphs and produce a sum vertex for all tuples in both sides of the union.
-Formally, let $V_Q = V_{Q_1} \cup V_{Q_2} \cup \comprehension{v_t}{t \in {Q_1} \cap {Q_2}}$, let $\ell_Q(v_t) = +$, and let 
-$$E_Q = E_{Q_1} \cup E_{Q_2} \cup \comprehension{(\phi_{Q_1}(t), v_t), (\phi_{Q_2}(t), v_t)}{t \in {Q_1} \cap {Q_2}}$$
-$$\phi_Q(t) = \begin{cases}
-v_t & \textbf{if } t \in {Q_1} \cap {Q_1}\\
-\phi_{Q_1}(t) & \textbf{if } t \not \in {Q_2}\\
-\phi_{Q_2}(t) & \textbf{if } t \not \in {Q_1}\\
-\end{cases}$$
-This circuit has $|V_{Q_1}|+|V_{Q_2}|+|{Q_1} \cap {Q_2}|$ vertices.
-
-\caseheading{$k$-ary Join}
-Let $Q = {Q_1} \bowtie \ldots \bowtie {Q_k}$.
-We merge graphs and produce a multiplication vertex for all tuples resulting from the join
-Naively, let $V_Q = V_{Q_1} \cup \ldots \cup V_{Q_k} \cup \comprehension{v_t}{t \in {Q_1} \bowtie \ldots \bowtie {Q_k}}$, let 
-{\small
-\begin{multline*}
-E_Q = E_{Q_1} \cup \ldots \cup E_{Q_k} \cup 
-\left\{\;
-(\phi_{Q_1}(\pi_{\sch({Q_1})}t), v_t), \right.\\
-\ldots, (\phi_{Q_k}(\pi_{\sch({Q_k})}t), v_t)
-\;\left|\;t \in {Q_1} \bowtie \ldots \bowtie {Q_k}\;\right\}
-\end{multline*}
-}
-Let $\ell_Q(v_t) = \times$, and let $\phi_Q(t) = v_t$
-As in projection, newly created vertices will have an in-degree of $k$, and a fan-in tree is required.  
-There are $|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ such vertices, so the corrected circuit has $|V_{Q_1}|+\ldots+|V_{Q_k}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.

 \subsubsection{Circuit size vs. runtime}
 \label{sec:circuit-runtime}

-We now connect the size of a lineage circuit (where the size of a lineage circuit is the number of vertices in the corresponding DAG\footnote{since each node has indegree at most two, this also is the same up to constants to counting the number of edges in the DAG.})\AH{Wouldn't it be the same for an arbitrary indegree?  On another note, for a base relation with no edges, is this still considered the same \emph{up to a constant}?  What if the base relation contains $10^{10}$ tuples/vertices?} for a given SUPJ query $Q$ to its $\qruntime{Q}$.  We do this formally by showing that the size of the lineage circuit is asymptotically no worse than the corresponding runtime of a large class of deterministic query processing algorithms.
+We now connect the size of a lineage circuit (where the size of a lineage circuit is the number of vertices in the corresponding DAG\footnote{since each node has indegree at most two, this also is the same up to constants to counting the number of edges in the DAG.}) for a given SPJU query $Q$ to its $\qruntime{Q}$.  We do this formally by showing that the size of the lineage circuit is asymptotically no worse than the corresponding runtime of a large class of deterministic query processing algorithms.

 \begin{lemma}
 \label{lem:circuits-model-runtime}
 The runtime of any query plan $Q$ has the same or better complexity as the lineage of the corresponding query result for any specific database instance.  That is, for any query plan $Q$ we have $|V_Q| \leq (k-1)\qruntime{Q}$, where $k$ is the degree of query polynomial corresponding to $Q$. 
 \end{lemma}
-\begin{proof}
-Proof by induction.  The base case is a base relation: $Q = R$ and is trivially true since $|V_R| = |R|$.
-For the inductive step, we assume that we have circuits for subplans $Q_1, \ldots, Q_n$ such that $|V_{Q_i}| \leq (k_i-1)\qruntime{Q_i}$ where $k_i$ is the degree of $Q_i$.
+Proof is in~\Cref{app:subsec-lem-lin-vs-qplan}.

-\caseheading{Selection}
-Assume that $Q = \sigma_\theta(Q_1)$.
-In the circuit for $Q$, $|V_Q| = |V_{Q_1}|$ vertices, so from the inductive assumption and $\qruntime{Q} = \qruntime{Q_1}$ by definition, we have $|V_Q| \leq (k-1) \qruntime{Q} $.
-% \AH{Technically, $\kElem$ is the degree of $\poly_1$, but I guess this is a moot point since one can argue that $\kElem$ is also the degree of $\poly$.}
-% OK: Correct
-\caseheading{Projection}
-Assume that $Q = \pi_{\vct A}(Q_1)$.
-The circuit for $Q$ has at most $|V_{Q_1}|+|{Q_1}|$ vertices.
-% \AH{The combination of terms above doesn't follow the details for projection above.}
-\begin{align*}
-|V_{Q}| & \leq |V_{Q_1}| + |Q_1|\\
-%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1} \geq |Q_1|$}
-%& \leq |V_{Q_1}| + 2 \qruntime{Q_1}\\
-\intertext{(From the inductive assumption)}
-& \leq (k-1)\qruntime{Q_1} + \abs{Q_1}\\
-\intertext{(By definition  of $\qruntime{Q}$)}
-& \le (k-1)\qruntime{Q}.
-\end{align*}
-\AH{In the inductive step above, where does $\abs{\poly_1}$ come from?  I understand that $b_i$ is part of the inductive hypothesis, but, is it \emph{legal/justifiable} to just throw in \emph{any} constant we so desire?}
-
-\caseheading{Union}
-Assume that $Q = Q_1 \cup Q_2$.
-The circuit for $Q$ has $|V_{Q_1}|+|V_{Q_2}|+|{Q_1} \cap {Q_2}|$ vertices.
-\begin{align*}
-|V_{Q}| & \leq |V_{Q_1}|+|V_{Q_2}|+|{Q_1}|+|{Q_2}|\\
-%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1} \geq |Q_1|$}
-%& \leq |V_{Q_1}|+|V_{Q_2}|+\qruntime{Q_1}+\qruntime{Q_2}|\\
-\intertext{(From the inductive assumption)}
-& \leq (k-1)(\qruntime{Q_1} + \qruntime{Q_2}) + (b_1 + b_2)
-\intertext{(By definition of $\qruntime{Q}$)}
-& \leq (k-1)(\qruntime{Q}).
-\end{align*}
-
-\caseheading{$k$-ary Join}
-Assume that $Q = Q_1 \bowtie \ldots \bowtie Q_k$.
-The circuit for $Q$ has $|V_{Q_1}|+\ldots+|V_{Q_k}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
-\begin{align*}
-|V_{Q}| & = |V_{Q_1}|+\ldots+|V_{Q_k}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
-\intertext{From the inductive assumption and noting $\forall i: k_i \leq k-1$}
-& \leq (k-1)\qruntime{Q_1}+\ldots+(k-1)\qruntime{Q_k}+\\
-&\;\;\; (k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
-& \leq (k-1)(\qruntime{Q_1}+\ldots+\qruntime{Q_k}+\\
-&\;\;\;|{Q_1} \bowtie \ldots \bowtie {Q_k}|)\\
-\intertext{(By definition of $\qruntime{Q}$)}
-& = (k-1)\qruntime{Q}.
-\end{align*}
-
-The property holds for all recursive queries, and the proof holds.
-
-\end{proof}
-\qed
-
-We now have all the pieces to argue the following, which formally states that our approximation algorithm implies that approximating the expected multiplicities of  SUPJ query can be done in essentially the same runtime as deterministic query processing of the same query:
+We now have all the pieces to argue the following, which formally states that our approximation algorithm implies that approximating the expected multiplicities of  SPJU query can be done in essentially the same runtime as deterministic query processing of the same query:
 \begin{Corollary}
-Given an SUPJ query $Q$ for a TIDB, we can present $(1\pm\eps)$ approximation to the expectation of each output tuple with probability at least $1-\delta$ in time $O_k\left(\frac 1{\eps^2}\cdot\qruntime{Q}\cdot \log{\frac{1}{\conf}}\cdot \log(n)\right)$.
+Given an SPJU query $Q$ for a TIDB, we can present $(1\pm\eps)$ approximation to the expectation of each output tuple with probability at least $1-\delta$ in time $O_k\left(\frac 1{\eps^2}\cdot\qruntime{Q}\cdot \log{\frac{1}{\conf}}\cdot \log(n)\right)$.
 \end{Corollary}
 \begin{proof}
 This follows from~\Cref{lem:circuits-model-runtime} and (the lineage circuit counterpart-- see~\Cref{sec:results-circuits} of)~\Cref{cor:approx-algo-const-p} (where the latter is used with $\delta$ being substituted\footnote{Recall that~\Cref{cor:approx-algo-const-p} is stated for a single output tuple so to get the required guarantee for all (at most $n^k$) output tuples of $Q$ we get at most $\frac \delta{n^k}$ probability of failure for each output tuple and then just a union bound over all output tuples. } with $\frac \delta{n^k}$).
--- a/hardness-app.tex
+++ b/hardness-app.tex
@ -67,7 +67,7 @@ The closure under $\raPlus$ queries follows from the fact that an assignment $\v
 Now let us consider computing the expected multiplicity of a tuple $\tup$ in the result of a query $\query$ over an $\semN$-PDB $\pdb$ using the annotation of $\tup$ in the result of evaluating $\query$ over an $\semNX$-PDB $\pxdb$ for which $\rmod(\pxdb) = \pdb$. The expectation of the polynomial $\poly = \query(\pxdb)(\tup)$ based on the probability distribution of $\pxdb$ over the variables in $\pxdb$ is:

 \begin{equation}
-  \expct_{\vct{X} \sim \pd}\pbox{\poly(\vct{X})} = \sum_{\vct{w} \in \{0,1\}^n} \query(\assign_{\vct{w}}(\pxdb))(\tup) \cdot \pd(\vct{w})\label{eq:expect-q-nx}
+  \expct_{\vct{W} \sim \pd}\pbox{\poly(\vct{W})} = \sum_{\vct{w} \in \{0,1\}^n} \query(\assign_{\vct{w}}(\pxdb))(\tup) \cdot \pd(\vct{w})\label{eq:expect-q-nx}
 \end{equation}

 Since $\semNX$-PDBs $\pxdb$ are a complete representation system for $\semN$-PDBs which are closed under $\raPlus$, computing the expectation of the  multiplicity of a tuple $t$ in the result of an $\raPlus$ query over the $\semN$-PDB $\rmod(\pxdb)$, is the same as computing the expectation of the polynomial $\query(\pxdb)(t)$.
@ -80,7 +80,7 @@ Since $\semNX$-PDBs $\pxdb$ are a complete representation system for $\semN$-PDB
 Two important subclasses of $\semNX$-PDBs that are of interest to us are the bag versions of tuple-independent databases (\tis) and block-independent databases (\bis). Under set semantics, a \ti is a deterministic database $\db$ where each tuple $\tup$ is assigned a probability $\prob(\tup)$. The set of possible worlds represented by a \ti $\db$ is all subsets of $\db$. The probability of each world is the product of the probabilities of all tuples that exist with one minus the probability of all tuples of $\db$ that are not part of this world, i.e., tuples are treated  as independent  random events. In a \bi, we also  assign each tuple a  probability,  but  additionally partition  $\db$ into blocks. The possible worlds of a \bi $\db$ are all subsets  of $\db$ that contain at most one tuple  from each block.  Note then that the tuples sharing the same block are disjoint, and the sum of the probabilitites of all the tuples in the same block $\block$ is $1$.  The probability of such a world is the product of the probabilities of all tuples present in the world.  %and one minus the sum of the probabilities of all tuples from blocks for which no  tuple is present in the world.  
 For bag \tis and \bis, we define the probability of a tuple to  be the probability that the tuple exists with multiplicity at least $1$.

-\AH{This part needs more work if we include it.}
+\AH{This part \emph{below} needs more work if we include it.}
 Note that the main difference to the standard definitions of \tis and \bis is that we define them as subclasses of $\semNX$-PDBs and that we use bag semantics. Even though tuples cannot occur more than once in the input \ti or \bi, they can occur with a multiplicity larger than one in the result of a query. Since in \tis and \bis, there is a one-to-one correspondence between tuples in the database and variables, we can interpret a vector $\vct{w} \in \{0,1\}^n$ as denoting which tuples exist in the possible world $\assign_{\vct{w}}(\pxdb)$ (the ones where $\vct{w}[i] = 1$). Denote the vector $\vct{p}$ to be a vector whose elements are the individual probabilities $\prob_i$ of each tuple $\tup_i$.  Let $\pd^{(\vct{p})}$ denote the distribution induced by $\vct{p}$.

 %
@ -125,7 +125,19 @@ Note that \cref{lem:exp-poly-rpoly} shows that $\expct\pbox{\poly} =$ $\rpoly(\p


 \section{Missing details from Section~\ref{sec:hard}}
-\label{app:hard}
+\label{app:single-mult-p}
+
+We use~\Cref{lem:qEk-multi-p} to prove~\Cref{thm:mult-p-hard-result}:
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Proof of Theorem~\ref{thm:mult-p-hard-result}}
+For the sake of contradiction, let us assume we can solve our problem in $f(\kElem)\cdot m^c$ time for some absolute constant $c$. Then given a graph $G$ we can compute the query polynomial $\rpoly_G^\kElem$ (in the obvious way) in $O(km)$ time. Then after we run our algorithm on $\rpoly_G^\kElem$, we get $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for $0\leq i\leq 2\kElem$ in additional $f(\kElem)\cdot m^c$ time. \Cref{lem:qEk-multi-p} then computes the number of $k$-matchings in $G$ in $O(\kElem^3)$ time. Thus, overall we have an algorithm for computing the number of $k$-matchings in time
+\begin{align*}
+ O(km) + f(\kElem)\cdot m^c + O(\kElem^3)
+&\le \inparen{O(\kElem^3) + f(\kElem)}\cdot m^{c+1} \\
+&\le \inparen{O(\kElem^3) + f(\kElem)}\cdot n^{2c+2},
+\end{align*}
+which contradicts \Cref{thm:k-match-hard}.
+

 \subsection{Proofs of~\cref{eq:1e}-\cref{eq:2pd-3d}}
 \label{app:easy-counts}
@ -389,7 +401,111 @@ Applying this bound in the runtime bound in~\Cref{lem:approx-alg} gives the firs



+\subsection{$\onepass$ Pseudocode}
+\begin{algorithm}[h!]
+	\caption{\onepass$(\etree)$}
+	\label{alg:one-pass}
+\begin{algorithmic}[1]
+	\Require \etree: Binary Expression Tree
+	\Ensure \etree: Binary Expression Tree
+	\Ensure \vari{sum} $\in \mathbb{R}$
+	\If{$\etree.\type = +$}\label{alg:one-pass-equality1}
+		\State $\accum \gets 0$\label{alg:one-pass-plus-assign1}
+		\For{$child$ in $\etree.\vari{children}$}\Comment{Sum up all children coefficients}
+			\State $(child, \vari{s}) \gets \onepass(child)$
+			\State $\accum \gets \accum + \vari{s}$\label{alg:one-pass-plus-add}
+		\EndFor
+		\State $\etree.\vari{partial} \gets \accum$\label{alg:one-pass-plus-assign2}
+		\For{$child$ in $\etree.\vari{children}$}\Comment{Record distributions for each child}
+			\State $child.\vari{weight} \gets \frac{child.\vari{partial}}{\etree.\vari{partial}}$\label{alg:one-pass-plus-prob}
+		\EndFor
+		%\State $\vari{sum} \gets \etree.\vari{partial}$\label{alg:one-pass-plus-assign3}
+		\State \Return (\etree, \etree.\vari{partial})
+	\ElsIf{$\etree.\type = \times$}\label{alg:one-pass-equality2}
+		\State $\accum \gets 1$\label{alg:one-pass-times-assign1}
+		\For{$child \text{ in } \etree.\vari{children}$}\Comment{Compute the product of all children coefficients}
+			\State $(child, \vari{s}) \gets \onepass(child)$
+			\State $\accum \gets \accum \times \vari{s}$\label{alg:one-pass-times-product}
+		\EndFor
+		\State $\etree.\vari{partial}\gets \accum$\label{alg:one-pass-times-assign2}
+		%\State $\vari{sum} \gets \etree.\vari{partial}$\label{alg:one-pass-times-assign3}
+		\State \Return (\etree, \etree.\vari{partial})
+	\ElsIf{$\etree.\type = numeric$}\Comment{Base case}\label{alg:one-pass-equality3}
+		\State $\vari{sum} \gets |\etree.\val|$\label{alg:one-pass-leaf-assign1}\Comment{This step effectively converts $\etree$ into $\abs{\etree}$}
+		\State \Return (\etree, \vari{sum})
+	\Else\Comment{$\etree.\type = \var$}\label{alg:one-pass-equality4}
+		%\State $\vari{sum} \gets 1$\label{alg:one-pass-global-assign}
+		\State \Return (\etree,$1$) % \vari{sum})
+	\EndIf
+\end{algorithmic}
+\end{algorithm}
+
+\subsection{$\onepass$ Example}
+\begin{Example}\label{example:one-pass}
+ Let $\etree$ encode the expression $(X_1 + X_2)(X_1 - X_2) + X_2^2$.  After one pass, \cref{alg:one-pass} would have computed the following weight distribution.  For the two children of the root $+$ node $\etree$, $\etree_\lchild.\wght = \frac{4}{5}$ and $\etree_\rchild.\wght = \frac{1}{5}$.  Similarly, let $\stree$ denote the left-subtree of $\etree_{\lchild}$, $\stree_\lchild.\wght = \stree_\rchild.\wght = \frac{1}{2}$.  This is depicted in~\Cref{fig:expr-tree-T-wght}. %Note that in this example, the sampling probabilities for the children of each inner $+$ node of $\stree$ are equal to one another because both parents have the same number of children, and, in each case, the children of each parent $+$ node share the same $|\coef_i|$.
+\end{Example}
+
+\begin{figure}[h!]
+	\begin{tikzpicture}[thick, every tree node/.style={default_node, thick, draw=black, black, circle, text width=0.3cm, font=\bfseries, minimum size=0.65cm}, every child/.style={black}, edge from parent/.style={draw, thick},
+level 1/.style={sibling distance=0.95cm},
+level 2/.style={sibling distance=0.7cm},
+%level 2+/.style={sibling distance=0.625cm}
+%level distance = 1.25cm,
+%sibling distance = 1cm,
+%every node/.append style = {anchor=center}
+]
+
+	\Tree [.\node(root){$\boldsymbol{+}$};
+			\edge [wght_color] node[midway, auto= right, font=\bfseries, gray] {$\bsym{\frac{4}{5}}$}; [.\node[highlight_color](tl){$\boldsymbol{\times}$};
+				[.\node(s){$\bsym{+}$};
+					\edge[wght_color] node[pos=0.35, left, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](sl){$\bsym{x_1}$}; ]
+					\edge[wght_color] node[pos=0.35, right, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](sr){$\bsym{x_2}$}; ]
+					]
+				[.\node(sp){$\bsym{+}$};
+					\edge[wght_color] node[pos=0.35, left, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](spl){$\bsym{x_1}$}; ]
+					\edge[wght_color] node[pos=0.35, right, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](spr){$\bsym{\times}$};
+						[.$\bsym{-1}$ ] [.$\bsym{x_2}$ ]
+						]
+					]
+				]
+			\edge [wght_color] node[midway, auto=left, font=\bfseries, gray] {$\bsym{\frac{1}{5}}$}; [.\node[highlight_color](tr){$\boldsymbol{\times}$};
+				[.$\bsym{x_2}$
+					\edge [draw=none]; [.\node[draw=none]{}; ]
+					\edge [draw=none]; [.\node[draw=none]{}; ]
+				]
+				[.$\bsym{x_2}$ ] ]
+	]
+%	labels for plus node children, with arrows
+	\node[left=2pt of sl, highlight_color, inner sep=0pt] (sl-label) {$\stree_\lchild$};
+	\draw[highlight_color] (sl) -- (sl-label);
+	\node[right=2pt of sr, highlight_color, inner sep=0pt] (sr-label) {$\stree_\rchild$};
+	\draw[highlight_color] (sr) -- (sr-label);
+	\node[below left=2pt of spl, inner sep=0pt, highlight_color](spl-label) {$\stree_\lchild'$};
+	\draw[highlight_color] (spl) -- (spl-label);
+	\node[right=2pt of spr, highlight_color, inner sep=0] (spr-label) {$\stree_\rchild'$};
+	\draw[highlight_color] (spr) -- (spr-label);
+	\node[above left=2pt of tl, inner sep=0pt, highlight_color] (tl-label) {$\etree_\lchild$};
+	\draw[highlight_color] (tl) -- (tl-label);
+	\node[above right=2pt of tr, highlight_color, inner sep=0pt] (tr-label) {$\etree_\rchild$};
+	\node[above = 2pt of root, highlight_color, inner sep=0pt, font=\bfseries] (root-label) {$\etree$};
+	\node[above = 2pt of s, highlight_color, inner sep=0pt, font=\bfseries] (s-label) {$\stree$};
+	\node[above = 2pt of sp, highlight_color, inner sep=0pt, font=\bfseries] (sp-label) {$\stree'$};
+	\draw[highlight_color] (tr) -- (tr-label);
+%	\draw[<-|, highlight_color] (s) -- (s-label);
+%	\draw[<-|, highlight_color] (sp) -- (sp-label);
+%	\draw[<-|, highlight_color]  (root) -- (root-label);
+%\node[above right=0.7cm of TR, highlight_color, inner sep=0pt, font=\bfseries] (tr-comment) {$\etree_\rchild$};
+%		\draw[<-|, highlight_color] (TR) -- (tr-comment);
+	\end{tikzpicture}
+	
+		\caption{Weights computed by $\onepass$ in ~\cref{example:one-pass}.} 
+		
+		\label{fig:expr-tree-T-wght}
+\end{figure}
+
+
 \subsection{Proof of~\Cref{lem:one-pass}}
+
 We prove the first part of lemma ~\ref{lem:one-pass}, i.e., correctness, by structural induction over the depth $d$ of the binary tree $\etree$.

 For the base case, $d = 0$, it is the case that the node is a leaf and therefore by definition ~\ref{def:express-tree} must be a variable or coefficient.  When it is a variable, \textsc{OnePass} returns $1$, and we have in this case that $\polyf(\etree) = X_i = \polyf(\abs{\etree})$ for some $i$ in $[\numvar]$, and this evaluated at all $1$'s indeed gives $1$, verifying the correctness of the returned value of $\abs{\etree}(1,\ldots, 1) = 1$.  When the root is a coefficient, the absolute value of the coefficient is returned, which is indeed $\abs{\etree}(1,\ldots, 1)$.  This proves the base case.
@ -445,4 +561,140 @@ We now bound the number of recursive calls in $\sampmon$ by $O\left(k\cdot depth
 It is easy to check that except for~\Cref{alg:sample-times-union}, all other lines take $O(1)$ time. Thus, overall all lines except for~\Cref{alg:sample-times-union} take $O(k\cdot depth(\etree))$ time. Now consider all executions of~\Cref{alg:sample-times-union} together. We note that at each level we will be adding a given set of variables to some set at most once: since the sum of the sizes of the sets at a given level is at most $k$, each level involves $O(k\log{k})$ time. Thus, overall all executions of~\Cref{alg:sample-times-union} takes $O(k\log{k}\cdot depth(T))$ time, as desired.


+\subsection{Experimental Results}\label{app:subsec:experiment}
+\input{experiments}
+
+\section{Circuits}\label{app:sec-cicuits}
+\subsection{Extending to Lineage Circuits}\label{app:lineage-circuit-ext}
+
+More specifically consider $\onepass$. The algorithm (as well as its analysis) basically uses the fact that one can compute the corresponding polynomial at all $1$s input with a simple recursive formula (\cref{eq:T-all-ones}), and that we can compute a probability distribution based on these weights (as in~\cref{eq:T-weights}). It can be verified that all the arguments go through if we replace $\etree_\lchild$ and $\etree_\rchild$ for expression tree $\etree$ with the two incoming nodes of the sink for the given lineage circuit. Another way to look at this is we could `unroll' the recursion in $\onepass$ and think of the algorithm as doing the evaluation at each node bottom up from leaves to the root in the expression tree. For lineage circuits, we start from the source nodes and do the computation in the topological order till we reach the sink(s).
+
+The argument for $\sampmon$ is similar. Since we argued that $\onepass$ works as intended for lineage circuits since~\Cref{alg:one-pass} only recurses on children of the current node in the expression tree and we can generalize it to lineage circuits by recursing to the two children of the current node in the lineage circuit. Alternatively, as we have already used in the proof of~\Cref{lem:sample}, we can think of the sampling algorithm sampling a sub-graph of the expression tree. For lineage circuits, we can think of $\sampmon$ as sampling the same sub-graph. Alternatively, one can implicitly expand the circuit lineage into a (larger but) equivalent expression tree. Since $\sampmon$ only explores one sub-graph during its run we can think of its run on a lineage circuit as being done on the implicit equivalent expression tree\footnote{
+  Recall that $\sampmon$ scales only in the depth of the expression and its polynomial degree ($k$). There exist polynomials that can be encoded in size $\Omega(\log k)$, but we follow convention in assuming that the circuit size is asymptotically larger than $k$ and thus treat the degree (i.e., join width) as a constant.
+}. Hence, all of the results on $\sampmon$  on expression trees carry over to lineage circuits.
+
+Thus, we have argued that~\Cref{lem:approx-alg} also holds if we use a lineage circuit instead of an expression tree as the input to our approximation algorithm.
+
+\subsection{Representing Polynomials with Lineage Circuits}\label{app:subsec-rep-poly-lin-circ}
+\newcommand{\getpoly}[1]{\textbf{lin}\inparen{#1}}
+Each vertex $v \in V_Q$ in the arithmetic circuit for $\tuple{V_Q, E_Q, \phi_Q, \ell_Q}$ encodes a polynomial, realized as
+
+$$\getpoly{v} = \begin{cases}
+\sum_{v' : (v',v) \in E_Q} \getpoly{v'} & \textbf{if } \ell(v) = +\\
+\prod_{v' : (v',v) \in E_Q} \getpoly{v'} & \textbf{if } \ell(v) = \times\\
+\ell(v) & \textbf{otherwise}
+\end{cases}$$
+
+
+We define the circuit for a select-union-project-join $Q$ recursively by cases as follows.  In each case, let $\tuple{V_{Q_i}, E_{Q_i}, \phi_{Q_i}, \ell_{Q_i}}$ denote the circuit for subquery $Q_i$.
+
+\caseheading{Base Relation}
+Let $Q$ be a base relation $R$.  We define one node for each tuple.  Formally, let $V_Q = \comprehension{v_t}{t\in R}$, let $\phi_Q(t) = v_t$, let $\ell_Q(v_t) = R(t)$, and let $E_Q = \emptyset$.
+This circuit has $|R|$ vertices.
+
+\caseheading{Selection}
+Let $Q = \sigma_\theta \inparen{Q_1}$.
+We re-use the circuit for $Q_1$. %, but define a new distinguished node $v_0$ with label $0$ and make it the sink node for all tuples that fail the selection predicate.  
+Formally, let $V_Q = V_{Q_1}$, let $\ell_Q(v_0) = 0$, and let $\ell_Q(v) = \ell_{Q_1}(v)$ for any $v \in V_{Q_1}$.  Let $E_Q = E_{Q_1}$, and define
+$$\phi_Q(t) =
+\phi_{Q_1}(t)  \text{ for } t \text{ s.t.}\; \theta(t).$$
+Dead sinks are iteratively removed, and so 
+%\AH{While not explicit, I assume a reviewer would know that the notation above discards tuples/vertices not satisfying the selection predicate.}
+%v_0 & \textbf{otherwise}
+%\end{cases}$$
+this circuit has at most $|V_{Q_1}|$ vertices.
+
+\caseheading{Projection}
+Let $Q = \pi_{\vct A} {Q_1}$.
+We extend the circuit for ${Q_1}$ with a new set of sum vertices (i.e., vertices with label $+$) for each tuple in $Q$, and connect them to the corresponding sink nodes of the circuit for ${Q_1}$.
+Naively, let $V_Q = V_{Q_1} \cup \comprehension{v_t}{t \in \pi_{\vct A} {Q_1}}$, let $\phi_Q(t) = v_t$, and let $\ell_Q(v_t) = +$.  Finally let 
+$$E_Q = E_{Q_1} \cup \comprehension{(\phi_{Q_1}(t'), v_t)}{t = \pi_{\vct A} t', t' \in {Q_1}, t \in \pi_{\vct A} {Q_1}}$$
+This formulation will produce vertices with an in-degree greater than two, a problem that we correct by replacing every vertex with an in-degree over two by an equivalent fan-in tree.  The resulting structure has at most $|{Q_1}|-1$ new vertices.
+% \AH{Is the rightmost operator \emph{supposed} to be a $-$?  In the beginning we add $|\pi_{\vct A}{Q_1}|$ vertices.}
+The corrected circuit thus has at most $|V_{Q_1}|+|{Q_1}|$ vertices.
+
+\caseheading{Union}
+Let $Q = {Q_1} \cup {Q_2}$.
+We merge graphs and produce a sum vertex for all tuples in both sides of the union.
+Formally, let $V_Q = V_{Q_1} \cup V_{Q_2} \cup \comprehension{v_t}{t \in {Q_1} \cap {Q_2}}$, let $\ell_Q(v_t) = +$, and let 
+$$E_Q = E_{Q_1} \cup E_{Q_2} \cup \comprehension{(\phi_{Q_1}(t), v_t), (\phi_{Q_2}(t), v_t)}{t \in {Q_1} \cap {Q_2}}$$
+$$\phi_Q(t) = \begin{cases}
+v_t & \textbf{if } t \in {Q_1} \cap {Q_1}\\
+\phi_{Q_1}(t) & \textbf{if } t \not \in {Q_2}\\
+\phi_{Q_2}(t) & \textbf{if } t \not \in {Q_1}\\
+\end{cases}$$
+This circuit has $|V_{Q_1}|+|V_{Q_2}|+|{Q_1} \cap {Q_2}|$ vertices.
+
+\caseheading{$k$-ary Join}
+Let $Q = {Q_1} \bowtie \ldots \bowtie {Q_k}$.
+We merge graphs and produce a multiplication vertex for all tuples resulting from the join
+Naively, let $V_Q = V_{Q_1} \cup \ldots \cup V_{Q_k} \cup \comprehension{v_t}{t \in {Q_1} \bowtie \ldots \bowtie {Q_k}}$, let 
+{\small
+\begin{multline*}
+E_Q = E_{Q_1} \cup \ldots \cup E_{Q_k} \cup 
+\left\{\;
+(\phi_{Q_1}(\pi_{\sch({Q_1})}t), v_t), \right.\\
+\ldots, (\phi_{Q_k}(\pi_{\sch({Q_k})}t), v_t)
+\;\left|\;t \in {Q_1} \bowtie \ldots \bowtie {Q_k}\;\right\}
+\end{multline*}
+}
+Let $\ell_Q(v_t) = \times$, and let $\phi_Q(t) = v_t$
+As in projection, newly created vertices will have an in-degree of $k$, and a fan-in tree is required.  
+There are $|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ such vertices, so the corrected circuit has $|V_{Q_1}|+\ldots+|V_{Q_k}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
+
+\subsection{Proof for~\Cref{lem:circuits-model-runtime}}\label{app:subsec-lem-lin-vs-qplan}
+
+Proof by induction.  The base case is a base relation: $Q = R$ and is trivially true since $|V_R| = |R|$.
+For the inductive step, we assume that we have circuits for subplans $Q_1, \ldots, Q_n$ such that $|V_{Q_i}| \leq (k_i-1)\qruntime{Q_i}$ where $k_i$ is the degree of $Q_i$.
+
+\caseheading{Selection}
+Assume that $Q = \sigma_\theta(Q_1)$.
+In the circuit for $Q$, $|V_Q| = |V_{Q_1}|$ vertices, so from the inductive assumption and $\qruntime{Q} = \qruntime{Q_1}$ by definition, we have $|V_Q| \leq (k-1) \qruntime{Q} $.
+% \AH{Technically, $\kElem$ is the degree of $\poly_1$, but I guess this is a moot point since one can argue that $\kElem$ is also the degree of $\poly$.}
+% OK: Correct
+\caseheading{Projection}
+Assume that $Q = \pi_{\vct A}(Q_1)$.
+The circuit for $Q$ has at most $|V_{Q_1}|+|{Q_1}|$ vertices.
+% \AH{The combination of terms above doesn't follow the details for projection above.}
+\begin{align*}
+|V_{Q}| & \leq |V_{Q_1}| + |Q_1|\\
+%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1} \geq |Q_1|$}
+%& \leq |V_{Q_1}| + 2 \qruntime{Q_1}\\
+\intertext{(From the inductive assumption)}
+& \leq (k-1)\qruntime{Q_1} + \abs{Q_1}\\
+\intertext{(By definition  of $\qruntime{Q}$)}
+& \le (k-1)\qruntime{Q}.
+\end{align*}
+\AH{In the inductive step above, where does $\abs{\poly_1}$ come from?  I understand that $b_i$ is part of the inductive hypothesis, but, is it \emph{legal/justifiable} to just throw in \emph{any} constant we so desire?}
+
+\caseheading{Union}
+Assume that $Q = Q_1 \cup Q_2$.
+The circuit for $Q$ has $|V_{Q_1}|+|V_{Q_2}|+|{Q_1} \cap {Q_2}|$ vertices.
+\begin{align*}
+|V_{Q}| & \leq |V_{Q_1}|+|V_{Q_2}|+|{Q_1}|+|{Q_2}|\\
+%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1} \geq |Q_1|$}
+%& \leq |V_{Q_1}|+|V_{Q_2}|+\qruntime{Q_1}+\qruntime{Q_2}|\\
+\intertext{(From the inductive assumption)}
+& \leq (k-1)(\qruntime{Q_1} + \qruntime{Q_2}) + (b_1 + b_2)
+\intertext{(By definition of $\qruntime{Q}$)}
+& \leq (k-1)(\qruntime{Q}).
+\end{align*}
+
+\caseheading{$k$-ary Join}
+Assume that $Q = Q_1 \bowtie \ldots \bowtie Q_k$.
+The circuit for $Q$ has $|V_{Q_1}|+\ldots+|V_{Q_k}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
+\begin{align*}
+|V_{Q}| & = |V_{Q_1}|+\ldots+|V_{Q_k}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
+\intertext{From the inductive assumption and noting $\forall i: k_i \leq k-1$}
+& \leq (k-1)\qruntime{Q_1}+\ldots+(k-1)\qruntime{Q_k}+\\
+&\;\;\; (k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
+& \leq (k-1)(\qruntime{Q_1}+\ldots+\qruntime{Q_k}+\\
+&\;\;\;|{Q_1} \bowtie \ldots \bowtie {Q_k}|)\\
+\intertext{(By definition of $\qruntime{Q}$)}
+& = (k-1)\qruntime{Q}.
+\end{align*}
+
+The property holds for all recursive queries, and the proof holds.
+
+

--- a/macros.tex
+++ b/macros.tex
@ -2,6 +2,9 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % NOTATION
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%Circuits
+\newcommand{\caseheading}[1]{\smallskip \noindent \textbf{#1}.~}
+%%%%%
 \newcommand{\wElem}{w} %an element of \vct{w}
 \newcommand{\st}{\;|\;} %such that
 \newcommand{\kElem}{k}%the kth element
--- a/mult_distinct_p.tex
+++ b/mult_distinct_p.tex
@ -62,7 +62,7 @@ Note that this imples that our hard query polynomial can be created from a proje

 \subsection{Multiple Distinct $\prob$ Values}
 \label{sec:multiple-p}
-
+Unless otherwise noted, all proofs for this section are in~\Cref{app:single-mult-p}.
 We are now ready to present our main hardness result.
 %
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
@ -78,20 +78,10 @@ As mentioned earlier, we prove our hardness result by presenting a reduction fro
 Let $\prob_0,\ldots, \prob_{2\kElem}$ be distinct values in $(0, 1]$.  Then given the values $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for $0\leq i\leq 2\kElem$, the number of $\kElem$-matchings in $G$ can be computed in $O\inparen{\kElem^3}$ time.
 \end{Lemma}

-Before we prove the above Lemma, let us use it to prove~\Cref{thm:mult-p-hard-result}:
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{proof}[Proof of Theorem~\ref{thm:mult-p-hard-result}]
-For the sake of contradiction, let us assume we can solve our problem in $f(\kElem)\cdot m^c$ time for some absolute constant $c$. Then given a graph $G$ we can compute the query polynomial $\rpoly_G^\kElem$ (in the obvious way) in $O(km)$ time. Then after we run our algorithm on $\rpoly_G^\kElem$, we get $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for $0\leq i\leq 2\kElem$ in additional $f(\kElem)\cdot m^c$ time. \Cref{lem:qEk-multi-p} then computes the number of $k$-matchings in $G$ in $O(\kElem^3)$ time. Thus, overall we have an algorithm for computing the number of $k$-matchings in time
-\begin{align*}
- O(km) + f(\kElem)\cdot m^c + O(\kElem^3)
-&\le \inparen{O(\kElem^3) + f(\kElem)}\cdot m^{c+1} \\
-&\le \inparen{O(\kElem^3) + f(\kElem)}\cdot n^{2c+2},
-\end{align*}
-which contradicts \Cref{thm:k-match-hard}.
-\end{proof}
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-Finally, we are ready to prove \Cref{lem:qEk-multi-p}:
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{proof}[Proof of \Cref{lem:qEk-multi-p}]
 %It is trivial to see that one can readily expand the exponential expression by performing the $n^\kElem$ product operations, yielding the polynomial in the sum of products form of the lemma statement.  By definition $\rpoly_{G}^\kElem$ reduces all variable exponents greater than $1$ to $1$.  Thus, a monomial such as $X_i^\kElem X_j^\kElem$ is $X_iX_j$ in $\rpoly_{G}^\kElem$, and the value after substitution is $p_i\cdot p_j = p^2$.  Further, that the number of terms in the sum is no greater than $2\kElem + 1$, can be easily justified by the fact that each edge has two endpoints, and the most endpoints occur when we have $\kElem$ distinct edges (such a subgraph is also known as a $\kElem$-matching), with non-intersecting points, a case equivalent to $p^{2\kElem}$.
--- a/poly-form.tex
+++ b/poly-form.tex
@ -149,7 +149,7 @@ Note the following fact:
 Let $\pxdb$ be a \bi over variables $\vct{X} = \{X_1, \ldots, X_\numvar\}$ and with probability distribution $\vct{p} = (\prob_1, \ldots, \prob_\numvar)$. For any \bi-lineage polynomial $\poly(\vct{X})$ based on $\pxdb$ and some query $\query$ we have
  % The expectation over possible worlds in $\poly(\vct{X})$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
 \begin{equation*}
-\expct_{\vct{X}}\pbox{\poly(\vct{X})}  = \rpoly(\vct{p}).
+\expct_{\vct{W}}\pbox{\poly(\vct{W})}  = \rpoly(\vct{p}).
 \end{equation*}
 \end{Lemma}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
--- a/ra-to-poly.tex
+++ b/ra-to-poly.tex
@ -58,7 +58,7 @@ Assume a function $\rmod$, which takes an $\semNX$-PDB input and outputs an equi
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Proposition}[Expectation of polynomials]\label{prop:expection-of-polynom}
  Given an $\semN$-PDB $\pdb = (\idb,\pd)$ and $\semNX$-PDB $\pxdb = (\db,\pd')$ such that $\rmod(\pxdb) = \pdb$, we have:
-  \[ \expct_{\idb \sim \pd}[\query(\db)(t)] = \expct_{\vct{X} \sim \pd'}\pbox{\poly(\vct{X})} \]
+  \[ \expct_{\idb \sim \pd}[\query(\db)(t)] = \expct_{\vct{W} \sim \pd'}\pbox{\poly(\vct{W})} \]
 \end{Proposition}