Restructured file system for appendix.

2021-04-06 11:43:34 -04:00 · 2021-04-06 11:43:34 -04:00 · 75b9e6a020
parent 6b918517ef
commit 75b9e6a020
11 changed files with 883 additions and 1033 deletions
--- a/app_approx-alg-analysis.tex
+++ b/app_approx-alg-analysis.tex
@ -0,0 +1,61 @@
+%root: main.tex
+
+
+Before proving~\Cref{lem:mon-samp}, we use it to argue our main result,~\Cref{lem:approx-alg}:
+\subsection{Proof of Theorem \ref{lem:approx-alg}}\label{sec:proof-lem-approx-alg}
+
+Set $\mathcal{E}=\approxq(\revision{\circuit}, (\prob_1,\dots,\prob_\numvar),$ $\conf, \error')$, where
+\[\error' = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\revision{\circuit}}(1,\ldots, 1)},\]
+ which achieves the claimed accuracy bound on $\mathcal{E}$.
+
+The claim on the runtime follows since
+\begin{align*}
+\frac 1{\inparen{\error'}^2}\cdot \log\inparen{\frac 1\conf}=&\frac{\log{\frac{1}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)}{\abs{\revision{\circuit}}(1,\ldots, 1)}\right)^2}\\
+= &\frac{\log{\frac{1}{\conf}}\cdot \abs{\revision{\circuit}}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)},
+\end{align*}
+%and the runtime then follows, thus upholding ~\cref{lem:approx-alg}.
+which completes the proof.
+
+We now return to the proof of~\Cref{lem:mon-samp}:
+\subsection{Proof of Theorem \ref{lem:mon-samp}}\label{app:subsec-th-mon-samp}
+Consider now the random variables $\randvar_1,\dots,\randvar_\numvar$, where each $\randvar_i$ is the value of $\vari{Y}_{\vari{i}}$ after~\Cref{alg:mon-sam-product} is executed. In particular, note that we have
+\[Y_i= \onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \prod_{X_i\in \var\inparen{v}} p_i,\]
+where the indicator variable handles the check in~\Cref{alg:check-duplicate-block}
+Then for random variable $\randvar_i$, it is the case that
+\begin{align*}
+\expct\pbox{\randvar_i} &= \sum\limits_{(\monom, \coef) \in \expansion{\revision{\circuit}} }\frac{\onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot c\cdot\prod_{X_i\in \var\inparen{v}} p_i }{\abs{\revision{\circuit}}(1,\dots,1)} \\
+&= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\revision{\circuit}}(1,\ldots, 1)},
+\end{align*}
+where in the first equality we use the fact that $\vari{sgn}_{\vari{i}}\cdot \abs{\coef}=\coef$ and the second equality follows from~\cref{eq:tilde-Q-bi} with $X_i$ substituted by $\prob_i$.
+
+Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$.  It is also true that
+
+\[\expct\pbox{\empmean}  
+= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\expct\pbox{\randvar_i}
+= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\revision{\circuit}}(1,\ldots, 1)}.\]
+
+Hoeffding's inequality states that if we know that each $\randvar_i$ (which are all independent) always lie in the intervals $[a_i, b_i]$, then it is true that
+\begin{equation*}
+\probOf\left(\left|\empmean - \expct\pbox{\empmean}\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{\sum_{i = 1}^{\samplesize}(b_i -a_i)^2}\right)}.
+\end{equation*}
+
+Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is multiplied with $O(k)$ $\prob_i\in [0, 1]$, which implies the range for each $\randvar_i$ is $[-1, 1]$.
+Using Hoeffding's inequality, we then get:
+\begin{equation*}
+\probOf\pbox{~\left| \empmean - \expct\pbox{\empmean} ~\right| \geq \error} \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{2^2 \samplesize}\right)} = 2\exp{\left(-\frac{\samplesize\error^2}{2 }\right)}\leq \conf,
+\end{equation*}
+where the last inequality follows from our choice of $\samplesize$ in~\Cref{alg:mon-sam-global2}.
+
+This concludes the proof for the first claim of theorem ~\ref{lem:mon-samp}.
+
+\paragraph{Run-time Analysis}
+The runtime of the algorithm is dominated by~\Cref{alg:mon-sam-onepass} (which by~\Cref{lem:one-pass} takes time $O(\revision{\size(\circuit)})$) and the $\samplesize$ iterations of the loop in~\Cref{alg:sampling-loop}. Each iteration's run time is dominated by the call to~\Cref{alg:mon-sam-sample} (which by~\Cref{lem:sample} takes $O(\log{k} \cdot k \cdot \revision{\depth(\circuit)})$
+) and~\Cref{alg:check-duplicate-block}, which by the subsequent argument takes $O(k\log{k})$ time. We sort the $O(k)$ variables by their block IDs and then check if there is a duplicate block ID or not. Adding up all the times discussed here gives us the desired overall runtime.
+
+\subsection{Proof of~\Cref{cor:approx-algo-const-p}}
+The result follows by first noting that by definition of $\gamma$, we have
+\[\rpoly(1,\dots,1)= (1-\gamma)\cdot \abs{\revision{\circuit}}(1,\dots,1).\]
+Further, since each $\prob_i\ge \prob_0$ and $\poly(\vct{X})$ (and hence $\rpoly(\vct{X})$) has degree at most $k$, we have that
+\[ \rpoly(1,\dots,1) \ge \prob_0^k\cdot \rpoly(1,\dots,1).\]
+The above two inequalities implies $\rpoly(1,\dots,1) \ge \prob_0^k\cdot (1-\gamma)\cdot \abs{\revision{\circuit}}(1,\dots,1)$.
+Applying this bound in the runtime bound in~\Cref{lem:approx-alg} gives the first claimed runtime. The final runtime of $O_k\left(\frac 1{\eps^2}\cdot\size(\circuit)\cdot \log{\frac{1}{\conf}}\right)$ follows by noting that $depth(\revision{\circuit})\le \size(\revision{\circuit})$ and absorbing all factors that just depend on $k$.
--- a/app_hardness-results.tex
+++ b/app_hardness-results.tex
@ -0,0 +1,141 @@
+%root: main.tex
+
+We use~\Cref{lem:qEk-multi-p} to prove~\Cref{thm:mult-p-hard-result}:
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Proof of Theorem~\ref{thm:mult-p-hard-result}}
+\begin{proof}
+For the sake of contradiction, let us assume we can solve our problem in $f(\kElem)\cdot m^c$ time for some absolute constant $c$. Then given a graph $G$ we can compute the query polynomial or rather, expression tree representation of $\rpoly_G^\kElem$ (in the obvious way) in $O(km)$ time. Then after we run our algorithm on $\rpoly_G^\kElem$, we get $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for $0\leq i\leq 2\kElem$ in additional $f(\kElem)\cdot m^c$ time. \Cref{lem:qEk-multi-p} then computes the number of $k$-matchings in $G$ in $O(\kElem^3)$ time. Thus, overall we have an algorithm for computing the number of $k$-matchings in time
+\begin{align*}
+ O(km) + f(\kElem)\cdot m^c + O(\kElem^3)
+&\le \inparen{O(\kElem^3) + f(\kElem)}\cdot m^{c+1} \\
+&\le \inparen{O(\kElem^3) + f(\kElem)}\cdot n^{2c+2},
+\end{align*}
+which contradicts \Cref{thm:k-match-hard}.
+\qed
+\end{proof}
+
+\subsection{Proof of Lemma~\ref{lem:qEk-multi-p}}
+\input{lem_mult-p}
+
+
+\subsection{Proofs of~\cref{eq:1e}-\cref{eq:3p-3tri}}
+\label{app:easy-counts}
+
+The proofs for \cref{eq:1e},~\cref{eq:2p} and~\cref{eq:3s} are immediate.
+
+\begin{proof}[Proof of \cref{eq:2m}]
+For edge $(i, j)$ connecting arbitrary vertices $i$ and $j$, finding all other edges in $G$ disjoint to $(i, j)$ is equivalent to finding all edges that are not connected to either vertex $i$ or $j$.  The number of such edges is $m - d_i - d_j + 1$, where we add $1$ since edge $(i, j)$ is removed twice when subtracting both $d_i$ and $d_j$.  Since the summation is iterating over all edges such that a pair $\left((i, j), (k, \ell)\right)$ will also be counted as $\left((k, \ell), (i, j)\right)$, division by $2$ then eliminates this double counting.  Note that $m$ and $d_i$ for all $i \in V$ can be computed in one pass over the set of edges by simply maintaining counts for each quantity.  Finally, the summation is also one traversal through the set of edges where each operation is either a lookup ($O(1)$ time) or an addition operation (also $O(1)$) time.
+\qed
+\end{proof}
+
+\begin{proof}[Proof of \cref{eq:2pd-3d}]
+\Cref{eq:2pd-3d} is true for similar reasons.  For edge $(i, j)$, it is necessary to find two additional edges, disjoint or connected.  As in our argument for \cref{eq:2m}, once the number of edges disjoint to $(i, j)$ have been computed, then we only need to consider all possible combinations of two edges from the set of disjoint edges, since it doesn't matter if the two edges are connected or not.    Note, the factor $3$ of $\threedis$ is necessary to account for the triple counting of $3$-matchings.  It is also the case that, since the two path in $\twopathdis$ is connected, that there will be no double counting by the fact that the summation automatically disconnects the current edge, meaning that a two matching at the current vertex will not be counted.  The sum over all such edge combinations is precisely then $\numocc{G}{\twopathdis} + 3\numocc{G}{\threedis}$.  Note that all $d_i$ and $d_i - 3$ factorials can be computed in $O(m)$ time, and then each combination $\binom{n}{3}$ can be performed with constant time operations, yielding the claimed $O(m)$ run time.
+\qed
+\end{proof}
+\begin{proof}[Proof of \cref{eq:3p-3tri}]
+To  compute $\numocc{G}{\threepath}$, note that for an arbitrary edge $(i, j)$, a 3-path exists for edge pair $(i, \ell)$ and $(j, k)$ where $i, j, k, \ell$ are distinct.  Further, the quantity $(d_i - 1) \cdot (d_j - 1)$ represents the number of 3-edge subgraphs with middle edge $(i, j)$ and outer edges $(i, \ell), (j, k)$ such that $\ell \neq j$ and $k \neq i$.  When $k = \ell$, the resulting subgraph is a triangle, and when $k \neq \ell$, the subgraph is a 3-path.  Summing over all edges (i, j) gives \cref{eq:3p-3tri} by observing that each triangle is counted thrice, while each 3-path is counted just once.  For reasons similar to \cref{eq:2m}, all $d_i$ can be computed in $O(m)$ time and each summand can then be computed in $O(1)$ time, yielding an overall $O(m)$ run time.
+\qed
+\end{proof}
+
+\subsection{Proofs for~\Cref{lem:3m-G2}-\Cref{lem:lin-sys}}\label{subsec:proofs-struc-lemmas}
+Before proceeding, let us introduce a few more helpful definitions.
+
+\begin{Definition}\label{def:ed-nota}
+For $\ell > 1$, we use $E_\ell$ to denote the set of edges in $\graph{\ell}$.  For any graph $\graph{\ell}$, its edges are denoted by the a pair $(e, b)$, such that $b \in \{0,\ldots, \ell-1\}$ and $e\in E_1$, where $(e,0),\dots,(e,\ell-1)$ is the $\ell$-path that replaces the edge $e$.
+\end{Definition}
+
+\begin{Definition}[$\eset{\ell}$]
+Given an arbitrary subgraph $\sg{1}$ of $\graph{1}$, let $\eset{1}$ denote the set of edges in $\sg{1}$.  Define then $\eset{\ell}$ for $\ell > 1$ as the set of edges in the generated subgraph $\sg{\ell}$ (i.e. when we apply \Cref{def:Gk} to $\sg{1})$.
+\end{Definition}
+
+For example, consider $\sg{1}$ with edges $\eset{1} = \{e_1\}$.  Then the edge set of $\sg{2}$ is defined as $\eset{2} = \{(e_1, 0), (e_1, 1)\}$.
+\begin{Definition}\label{def:ed-sub}
+Let $\binom{E}{t}$ denote the set of subsets in $E$ with exactly $t$ edges.  In a similar manner, $\binom{E}{\leq t}$ is used to mean the subsets of $E$ with $t$ or fewer edges.
+\end{Definition}
+
+The following function $f_\ell$  is a mapping from every $3$-edge shape in $\graph{\ell}$ to its `projection' in $\graph{1}$.
+\begin{Definition}\label{def:fk}
+Let $f_\ell: \binom{E_\ell}{3} \mapsto \binom{E_1}{\leq3}$ be defined as follows.  For any element $s \in \binom{E_\ell}{3}$ such that $s = \pbrace{(e_1, b_1), (e_2, b_2), (e_3, b_3)}$, define:
+\[ f_\ell\left(\pbrace{(e_1, b_1), (e_2, b_2), (e_3, b_3)}\right) = \pbrace{e_1, e_2, e_3}.\]
+\end{Definition}
+
+
+\begin{Definition}[$f_\ell^{-1}$]\label{def:fk-inv}
+For an arbitrary subgraph $\sg{1}$ of $\graph{1}$ with at most $m \leq 3$ edges, the inverse function $f_\ell^{-1}: \binom{E_1}{\leq 3}\mapsto 2^{\binom{E_\ell}{3}}$ takes $\eset{1}$ and outputs the set of all elements $s \in \binom{\eset{\ell}}{3}$ such that 
+$f_\ell(s) = \eset{1}$.  
+\end{Definition}
+
+Note, importantly, that when we discuss $f_\ell^{-1}$, that each \textit{edge} present in $\eset{1}$ must have an edge in $s\in f_\ell^{-1}(S)$ that projects down to it.  In particular, if $|\eset{1}| = 3$, then it must be the case that each $s\in f_\ell^{-1}(S)$ consists of the following set of edges: $\{ (e_i, b), (e_j, b'), (e_m, b'') \}$, where $i,j$ and $m$ are distinct.  
+
+We first note that $f_\ell$ is well-defined:
+\begin{Lemma}\label{lem:fk-func}
+$f_\ell$ is a function.
+\end{Lemma}
+
+\begin{proof}\label{subsubsec:proof-fk}
+Note that $f_\ell$ is properly defined.  For any $S \in \binom{E_\ell}{3}$, $|f(S)| \leq 3$, since it has to be the case that any subset of $3$ edges in $E_\ell$ will map to at most three edges in $E_1$.  All mappings are in the required range.  Then,  since for any $b \in \{0,\ldots, \ell-1\}$ the map $(e, b) \mapsto e$ is a function and has exactly one mapping, which %` mapping for which $(e, b)$ maps to no other edge than $e$, and this
+implies that $f_\ell$ is a function.\qed
+\end{proof}
+
+We are now ready to prove the structural lemmas. Note that $f_\ell$ maps subsets of three edges in $\graph{\ell}$ to a subset of at most three edges in $E_1$. To prove the structural lemmas, we will use the map $f_\ell^{-1}$. In particular, to count the number of occurrences of $\tri,\threepath,\threedis$ in $\graph{\ell}$ we count for each $S\in\binom{E_1}{\le 3}$, how many of $\tri/\threepath/\threedis$ subgraphs appear in $f_\ell^{-1}(S)$.
+
+
+\subsubsection{Proof of Lemma \ref{lem:3m-G2}}
+\begin{proof}%[Proof of \Cref{lem:3m-G2}]
+For each subset  $\eset{1}\in \binom{E_1}{\le 3}$, we count the number of $3$-matchings in the $3$-edge subgraphs of $\graph{2}$ in $f_2^{-1}(\eset{1})$.  We first consider the case of $\eset{1} \in \binom{E_1}{3}$, where $\eset{1}$ is composed of the edges $e_1, e_2, e_3$ and $f_2^{-1}(\eset{1})$ is the set of all $3$-edge subsets $s \in \{(e_1, 0), (e_1, 1), (e_2, 0), (e_2, 1),$ $(e_3, 0), (e_3, 1)\}$ such that $f_\ell(s) = \{e_1, e_2, e_3\}$.
+
+We do a case analysis based on the subgraph $\sg{1}$ induced by $\eset{1}$ (denoted $\eset{1} \equiv \sg{1}$):
+\begin{itemize}
+	\item $3$-matching ($\threedis$)
+\end{itemize}
+When $\sg{1}$ is isomorphic to $\threedis$, it is the case that edges in $\eset{2}$ are {\em not} disjoint only for the pairs $(e_i, 0), (e_i, 1)$ for $i\in \{1,2,3\}$.  All choices for $b_1, b_2, b_3 \in \{0, 1\}$, $(e_1, b_1), (e_2, b_2), (e_3, b_3)$ will compose a 3-matching.  One can see that we have a total of two possible choices for $b_i$ for each edge $e_i$ in $\graph{1}$ yielding $2^3 = 8$ possible 3-matchings in $f_2^{-1}(\eset{1})$.
+
+\begin{itemize}
+	\item Disjoint Two-Path ($\twopathdis$)
+\end{itemize}
+For $\sg{1}$ isomorphic to $\twopathdis$ edges $e_2, e_3$ form a $2$-path with $e_1$ being disjoint.  This means that $(e_2, 0), (e_2, 1), (e_3, 0), (e_3, 1)$ form a $4$-path while $(e_1, 0), (e_1, 1)$ is its own disjoint $2$-path.  We can only pick either $(e_1, 0)$ or $(e_1, 1)$ for $f_2^{-1}(\eset{1})$, and then we need to pick a $2$-matching from $e_2$ and $e_3$.  Note that the four path allows there to be 3 possible 2 matchings, specifically,
+\begin{equation*}
+\pbrace{(e_2, 0), (e_3, 0)}, \pbrace{(e_2, 0), (e_3, 1)}, \pbrace{(e_2, 1), (e_3, 1)}.
+\end{equation*}
+
+Since these two selections can be made independently, there are $2 \cdot 3 = 6$ \emph{distinct} $3$-matchings in $f_2^{-1}(\eset{1})$.
+
+\begin{itemize}
+	\item $3$-star ($\oneint$)
+\end{itemize}
+When $\sg{1}$ is isomorphic to $\oneint$, the inner edges $(e_i, 1)$ of $\eset{2}$ are all connected, and the outer edges $(e_i, 0)$ are all disjoint.  Note that for a valid 3 matching it must be the case that at most one inner edge can be part of the set of disjoint edges.  For the case of when exactly one inner edge is chosen, there exist $3$ possiblities, based on which inner edge is chosen.  Note that if $(e_i, 1)$ is chosen, the matching has to choose $(e_j, 0)$ for $j \neq i$ and $(e_{j'}, 0)$ for $j' \neq i, j' \neq j$.  The remaining possible 3-matching occurs when all 3 outer edges are chosen.  Thus, there are four 3-matchings in $f_2^{-1}(\eset{1})$.
+
+\begin{itemize}
+	\item $3$-path ($\threepath$)
+\end{itemize}
+When $\sg{1}$ is isomorphic to $\threepath$ it is the case that all edges beginning with $e_1$ and ending with $e_3$ are successively connected.  This means that the edges of $\eset{2}$ form a $6$-path.  For a $3$-matching to exist in $f_2^{-1}(\eset{1})$, we cannot pick both $(e_i,0)$ and $(e_i,1)$ or both $(e_i, 1)$ and $(e_j, 0)$ where $j = i + 1$. % there must be at least one edge separating edges picked from a sequence.
+ There are four such possibilities: $\pbrace{(e_1, 0), (e_2, 0), (e_3, 0)}$, $\pbrace{(e_1, 0), (e_2, 0), (e_3, 1)}$, $\pbrace{(e_1, 0), (e_2, 1), (e_3, 1)},$ $\pbrace{(e_1, 1), (e_2, 1),  (e_3, 1)}$, a total of four 3-matchings in $f_2^{-1}(\eset{1})$.
+
+\begin{itemize}
+	\item Triangle ($\tri$)
+\end{itemize}
+For $\sg{1}$ isomorphic to $\tri$, note that it is the case that the edges in $\eset{2}$ are connected in a successive manner, but this time in a cycle, such that $(e_1, 0)$ and $(e_3, 1)$ are also connected.  While this is similar to the discussion of the three path above, the first and last edges are not disjoint, since they are connected.  This rules out both subsets of $(e_1, 0), (e_2, 0), (e_3, 1)$ and $(e_1, 0), (e_2, 1), (e_3, 1)$, yielding two 3-matchings.
+
+Let us now consider when $\eset{1} \in \binom{E_1}{\leq 2}$, i.e. patterns among
+\begin{itemize}
+	\item $2$-matching ($\twodis$), $2$-path ($\twopath$), $1$ edge ($\ed$)
+\end{itemize}
+When $|\eset{1}| = 2$, we can only pick one from each of two pairs, $\pbrace{(e_1, 0), (e_1, 1)}$ and $\pbrace{(e_2, 0), (e_2, 1)}$.  This implies that a $3$-matching cannot exist in $f_2^{-1}(\eset{1})$.  The same argument holds for $|\eset{1}| = 1$, where we can only pick one edge from the pair $\pbrace{(e_1, 0), (e_1, 1)}$.  Trivially, no $3$-matching exists in $f_2^{-1}(\eset{1})$.
+
+Observe that all of the arguments above focused solely on the subgraph $\sg{1}$ is isomorphmic.  In other words, all $\eset{1}$ of a given ``shape'' yield the same number of $3$-matchings in $f_2^{-1}(\eset{1})$, and this is why we get the required identity using the above case analysis.
+\qed
+\end{proof}
+
+
+
+%e have the case that all edges in $\eset{3}$ have the property that $(e_i, b_i)$ is disjoint to $(e_j, b_j)$ for $i \neq j$.  For each $e_i$, there are then $3$ choices, independent of each other, and it results that there are $3^3 = 27$ many 3-matchings in $f_3^{-1}(\eset{1})$.
+
+
+\subsubsection{Proof of~\Cref{lem:tri}}
+\begin{proof}%[Proof of \Cref{lem:tri}]
+The number of triangles in $\graph{\ell}$ for $\ell \geq 2$ will always be $0$ for the simple fact that all cycles in $\graph{\ell}$ will have at least six edges.
+\qed
+\end{proof}
+
+\subsubsection{Proof of \Cref{lem:lin-sys}}
+\input{lin_sys}
--- a/app_notation-background.tex
+++ b/app_notation-background.tex
@ -0,0 +1,165 @@
+%root: main.tex
+
+To justify the use of $\semNX$-databases, we need to show that we can encode any $\semN$-PDB in this way and that the query semantics over this representation coincides with query semantics over $\semN$-PDB. For that it will be opportune to define representation systems for $\semN$-PDBs.\BG{cite}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{Definition}[Representation System]\label{def:representation-syste}
+  A representation system for $\semN$-PDBs is a tuple $(\reprs, \rmod)$ where $\reprs$ is a set of representations and $\rmod$ associates with each $\repr \in \reprs$ an $\semN$-PDB $\pdb$. We say that a representation system is \emph{closed} under a class of queries $\qClass$ if for any query $\query \in \qClass$ we have:
+%
+  \[ \rmod(\query(\repr)) = \query(\rmod(\repr)) \]
+
+  A representation system is \emph{complete} if for every $\semN$-PDB $\pdb$ there exists $\repr \in \reprs$ such that:
+%
+  \[ \rmod(\repr) = \pdb \]
+
+\end{Definition}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+As mentioned above we will use $\semNX$-databases paired with a probability distribution as a representation system.
+We refer to such databases as $\semNX$-PDBs and use bold symbols to distinguish them from possible worlds (which are $\semN$-databases).
+Formally, an $\semNX$-PDB is an $\semNX$-database $\idb_{\semNX}$ and a probability distribution $\pd$ over assignments $\assign$ of the variables $\vct{X} = \{X_1, \ldots, X_\numvar\}$  occurring in annotations of $\idb_{\semNX}$ to $\{0,1\}$. Note that an assignment $\assign: \vct{X} \to \{0,1\}^\numvar$ can be represented as a vector $\vct{w} \in \{0,1\}^n$ where $\vct{w}[i]$ records the value assigned to variable $X_i$. Thus, from now on we will solely use such vectors which we refer to as \emph{world vectors} and implicitly understand them to represent assignments. Given an assignment $\assign$ we use $\assign(\pxdb)$ to denote the semiring homomorphism $\semNX \to \semN$ that applies the assignment $\assign$ to all variables of a polynomial and evaluates the resulting expression in $\semN$.\BG{explain connection to homomorphism lifting in K-relations}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{Definition}[$\semNX$-PDBs]\label{def:semnx-pdbs}
+  An $\semNX$-PDB $\pxdb$ over variables $\vct{X} = \{X_1, \ldots, X_n\}$ is a tuple $(\idb_{\semNX},\pd)$ where $\db$ is an $\semNX$-database and $\pd$ is a probability distribution over $\vct{w} \in \{0,1\}^n$. We use $\assign_{\vct{w}}$ to denote the assignment corresponding to $\vct{w} \in \{0,1\}^n$. The $\semN$-PDB $\rmod(\pxdb) = (\idb, \pd')$ encoded by $\pxdb$ is defined as:
+  \begin{align*}
+    \idb      & = \{ \assign_{\vct{w}}(\pxdb) \mid \vct{w} \in  \{0,1\}^n \} \\
+    \forall \db \in \idb: \probOf'(\db) & = \sum_{\vct{w} \in \{0,1\}^n: \assign_{\vct{w}}(\pxdb) = \db} \probOf(\vct{w})
+  \end{align*}
+\end{Definition}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+For instance, consider a $\pxdb$ consisting of a single tuple $\tup_1 = (1)$ annotated with $X_1 + X_2$ with probability distribution $\probOf([0,0]) = 0$, $\probOf([0,1]) = 0$, $\probOf([1,0]) = 0.3$ and $\probOf([1,1]) = 0.7$. This $\semNX$-PDB encodes two possible worlds (with non-zero) probability that we denote using their world vectors.
+%
+\[
+  D_{[0,1]}(\tup_1) = 1 \hspace{0.3cm} \mathbf{and} \hspace{0.3cm} D_{[1,1]}(\tup_1) = 2
+\]
+%
+Importantly, as the following proposition shows, any finite $\semN$-PDB can be encoded as an $\semNX$-PDB and $\semNX$-PDBs are closed under positive relational algebra queries, the class of queries we are interested in in this work.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{Proposition}\label{prop:semnx-pdbs-are-a-}
+$\semNX$-PDBs are a complete representation system for $\semN$-PDBs that is closed under $\raPlus$ queries.
+\end{Proposition}
+
+\subsection{Proof of~\Cref{prop:semnx-pdbs-are-a-}}
+\begin{proof}
+To prove that $\semNX$-PDBs are complete consider the following construction that for any $\semN$-PDB $\pdb = (\idb, \pd)$ produces an $\semNX$-PDB $\pxdb = (\idb_{\semNX}, \pd')$  such that $\rmod(\pxdb) = \pdb$. Let $\idb = \{D_1, \ldots, D_{\abs{\idb}}\}$ and let $max(D_i)$ denote $max_{\tup} D_i(\tup)$. For each world $D_i$ we create a corresponding variable $X_i$.
+%variables $X_{i1}$, \ldots, $X_{im}$ where $m = max(D_i)$.
+In $\idb_{\semNX}$ we assign each tuple $\tup$ the polynomial:
+%
+  \[
+ \idb_{\semNX}(\tup) = \sum_{i=1}^{\abs{\idb}} D_i(\tup)\cdot X_{i}
+  \]
+The probability distribution $\pd'$ assigns all world vectors zero probability except for $\abs{\idb}$ world vectors (representing the possible worlds) $\vct{w_i}$. All elements of $\vct{w_i}$ are zero except for the position corresponding to variables $X_{i}$ which is set to $1$. Unfolding definitions it is trivial to show that $\rmod(\pxdb) = \pdb$. Thus, $\semNX$ are a complete representation system.
+The closure under $\raPlus$ queries follows from the fact that an assignment $\vct{X} \to \{0,1\}$ is a semiring homomorphism and that semiring homomorphisms commute with queries over $\semK$-relations.
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+Now let us consider computing the expected multiplicity of a tuple $\tup$ in the result of a query $\query$ over an $\semN$-PDB $\pdb$ using the annotation of $\tup$ in the result of evaluating $\query$ over an $\semNX$-PDB $\pxdb$ for which $\rmod(\pxdb) = \pdb$. The expectation of the polynomial $\poly = \query(\pxdb)(\tup)$ based on the probability distribution of $\pxdb$ over the variables in $\pxdb$ is:
+
+\begin{equation}
+  \expct_{\vct{W} \sim \pd}\pbox{\poly(\vct{W})} = \sum_{\vct{w} \in \{0,1\}^n} \assign_{\vct{w}}(\query(\pxdb)(\tup)) \cdot \probOf(\vct{w})\label{eq:expect-q-nx}
+\end{equation}
+
+Since $\semNX$-PDBs $\pxdb$ are a complete representation system for $\semN$-PDBs which are closed under $\raPlus$, computing the expectation of the  multiplicity of a tuple $t$ in the result of an $\raPlus$ query over the $\semN$-PDB $\rmod(\pxdb)$, is the same as computing the expectation of the polynomial $\query(\pxdb)(t)$.
+\qed
+\end{proof}
+
+\subsection{Proof of~\Cref{prop:expection-of-polynom}}
+\label{subsec:expectation-of-polynom-proof}
+\begin{proof}
+We need to prove for $\semN$-PDB $\pdb = (\idb,\pd)$ and $\semNX$-PDB $\pxdb = (\db',\pd')$ where $\rmod(\pxdb) = \pdb$ that $\expct_{\db \sim \pd}[\query(\db)(t)] = \expct_{\vct{W} \sim \pd'}\pbox{\polyForTuple(\vct{W})}$
+By expanding $\polyForTuple$ and the expectation we have:
+\begin{align*}
+\expct_{\vct{W} \sim \pd'}\pbox{\polyForTuple(\vct{W})}
+& = \sum_{\vct{w} \in \{0,1\}^n}\probOf'(\vct{w}) \cdot Q(\pxdb)(t)(\vct{w})\\
+\intertext{From $\rmod(\pxdb) = \pdb$, we have that the range of $\assign_{\vct{w}(\pxdb)}$ is $\idb$, so}
+& = \sum_{\db \in \idb}\;\;\sum_{\vct{w} \in \{0,1\}^n : \assign_{\vct{w}}(\pxdb) = \db}\probOf'(\vct{w}) \cdot Q(\pxdb)(t)(\vct{w})\\
+\intertext{In the inner sum, $\assign_{\vct{w}}(\pxdb) = \db$, so by distributivity of $+$ over $\times$}
+& = \sum_{\db \in \idb}\query(\db)(t)\sum_{\vct{w} \in \{0,1\}^n : \assign_{\vct{w}}(\pxdb) = \db}\probOf'(\vct{w})\\
+\intertext{From the definition of $\probOf$, given $\rmod(\pxdb) = \pdb$, we get}
+& = \sum_{\db \in \idb}\query(\db)(t) \cdot \probOf(D) \quad = \expct_{\db \sim \pd}[\query(\db)(t)]
+\end{align*}
+\qed
+\end{proof}
+
+
+\subsection{Supplementary Material for~\Cref{subsec:tidbs-and-bidbs}}\label{subsec:supp-mat-ti-bi-def}
+Two important subclasses of $\semNX$-PDBs that are of interest to us are the bag versions of tuple-independent databases (\tis) and block-independent databases (\bis). Under set semantics, a \ti is a deterministic database $\db$ where each tuple $\tup$ is assigned a probability $\prob_\tup$. The set of possible worlds represented by a \ti $\db$ is all subsets of $\db$. The probability of each world is the product of the probabilities of all tuples that exist with one minus the probability of all tuples of $\db$ that are not part of this world, i.e., tuples are treated  as independent  random events. In a \bi, we also  assign each tuple a  probability,  but  additionally partition  $\db$ into blocks. The possible worlds of a \bi $\db$ are all subsets  of $\db$ that contain at most one tuple  from each block.  Note then that the tuples sharing the same block are disjoint, and the sum of the probabilitites of all the tuples in the same block $\block$ is $1$.  The probability of such a world is the product of the probabilities of all tuples present in the world.  %and one minus the sum of the probabilities of all tuples from blocks for which no  tuple is present in the world.
+For bag \tis and \bis, we define the probability of a tuple to  be the probability that the tuple exists with multiplicity at least $1$.
+
+\AH{This part \emph{below} needs more work if we include it.}
+Note that the main difference to the standard definitions of \tis and \bis is that we define them as subclasses of $\semNX$-PDBs and that we use bag semantics. Even though tuples cannot occur more than once in the input \ti or \bi, they can occur with a multiplicity larger than one in the result of a query. Since in \tis and \bis, there is a one-to-one correspondence between tuples in the database and variables, we can interpret a vector $\vct{w} \in \{0,1\}^n$ as denoting which tuples exist in the possible world $\assign_{\vct{w}}(\pxdb)$ (the ones where $\vct{w}[i] = 1$). Denote the vector $\vct{p}$ to be a vector whose elements are the individual probabilities $\prob_i$ of each tuple $\tup_i$.  Let $\pd^{(\vct{p})}$ denote the distribution induced by $\vct{p}$.
+
+%
+\begin{align}\label{eq:tidb-expectation}
+\expct_{\vct{W} \sim \pd^{(\vct{p})}}\pbox{\poly(\vct{W})} = \sum\limits_{\vct{w} \in \{0, 1\}^\numvar} \poly(\vct{w})\prod_{\substack{i \in [\numvar]\\ s.t. \wElem_i = 1}}\prob_i \prod_{\substack{i \in [\numvar]\\s.t. w_i = 0}}\left(1 - \prob_i\right).
+\end{align}
+%
+\BG{Do we need the BIDB formula?}
+\BG{Oliver's conjecture: Bag-\tis + Q can express any finite bag-PDB:
+A well-known result for set semantics PDBs is that while not all finite PDBs can be encoded as \tis, any finite PDB can be encoded using a \ti and a query. An analog result holds in our case: any finite $\semN$-PDB can be encoded as a bag \ti and a query (WHAT CLASS? ADD PROOF)
+}
+
+\subsection{~\Cref{lem:pre-poly-rpoly}}\label{app:subsec-pre-poly-rpoly}
+\begin{Lemma}\label{lem:pre-poly-rpoly}
+If
+$\poly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}} \cdot \prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i^{d_i}$
+then
+$\rpoly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \eta} q_{\vct{d}}\cdot\prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i$% \;\;\;  for some $\eta \subseteq \{0,\ldots, B\}^\numvar$
+\end{Lemma}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{proof}%[Proof for~\Cref{lem:pre-poly-rpoly}]
+Follows by the construction of $\rpoly$ in \cref{def:reduced-bi-poly}. 
+\qed
+\end{proof}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\subsection{Proposition ~\ref{proposition:q-qtilde}}\label{app:subsec-prop-q-qtilde}
+\noindent Note the following fact:
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{Proposition}\label{proposition:q-qtilde} For any \bi-lineage polynomial $\poly(X_1, \ldots, X_\numvar)$ and all $\vct{w} \in \eta$, it holds that
+$%  \[
+    \poly(\vct{w}) = \rpoly(\vct{w}).
+$%    \]
+\end{Proposition}
+
+\begin{proof}%[Proof for~\Cref{proposition:q-qtilde}]
+Note that any $\poly$ in factorized form is equivalent to its \abbrSMB expansion.  For each term in the expanded form, further note that for all $b \in \{0, 1\}$ and all $e \geq 1$, $b^e = b$. 
+\qed
+\end{proof}
+
+
+
+\subsection{Proof for Lemma ~\ref{lem:exp-poly-rpoly}}
+\begin{proof}
+Let $\poly$ be the generalized polynomial, i.e., the polynomial of $\numvar$ variables with highest degree $= B$: %, in which every possible monomial permutation appears,
+\[\poly(X_1,\ldots, X_\numvar) = \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar X_i^{d_i}\].
+Then, in expectation we have
+\begin{align}
+\expct_{\vct{W}}\pbox{\poly(\vct{W})} &= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \expct_{\vct{w}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar w_i^{d_i}}\label{p1-s1}\\
+&= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i^{d_i}}\label{p1-s2}\\
+&= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i}\label{p1-s3}\\
+&= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\
+&= \rpoly(\prob_1,\ldots, \prob_\numvar)\label{p1-s5}
+\end{align}
+In steps \cref{p1-s1} and \cref{p1-s2}, by linearity of expectation (recall the variables are independent), the expecation can be pushed all the way inside of the product.  In \cref{p1-s3}, note that $w_i \in \{0, 1\}$ which further implies that for any exponent $e \geq 1$, $w_i^e = w_i$.  Next, in \cref{p1-s4} the expectation of a tuple is indeed its probability.
+
+Finally, observe \Cref{p1-s5} by construction in \Cref{lem:pre-poly-rpoly}, that $\rpoly(\prob_1,\ldots, \prob_\numvar)$ is exactly the product of probabilities of each variable in each monomial across the entire sum.
+\qed
+\end{proof}
+
+
+\subsection{Proof For Corollary ~\ref{cor:expct-sop}}
+\begin{proof}
+Note that \cref{lem:exp-poly-rpoly} shows that $\expct\pbox{\poly} =$ $\rpoly(\prob_1,\ldots, \prob_\numvar)$.  Therefore, if $\poly$ is already in \abbrSMB form, one only needs to compute $\poly(\prob_1,\ldots, \prob_\numvar)$ ignoring exponent terms (note that such a polynomial is $\rpoly(\prob_1,\ldots, \prob_\numvar)$), which indeed has $O(\smbOf{|\poly|})$ computations.
+\qed
+\end{proof}
--- a/app_one-pass-analysis.tex
+++ b/app_one-pass-analysis.tex
@ -0,0 +1,203 @@
+%root: main.tex
+
+
+\subsection{$\onepass$ Pseudocode}
+
+Please note that it is \textit{assumed} that the original call to \onepass consists of a call on an input circuit \circuit such that the values of members \prt, \lwght and \rwght have been initialized to Null across all gates.
+
+\begin{Definition}[Equivalence ($\equiv$)]
+A circuit \circuit is equivalent to a circuit \circuit' if and only if $\polyf(\circuit) = \polyf(\circuit')$.
+\end{Definition}
+
+For technical reasons, we require the invariant that every subcircuit \subcircuit corresponding to an internal gate of \circuit has $\degree\left(\subcircuit\right) \geq 1$.  To ensure this, auxiliary algorithm ~\ref{alg:reduce} (\reduce) is called to perform any rewrites to \circuit, where an equivalent circuit \circuit' is created and returned by iteratively combining non-variable leaf nodes bottom-up until a parent node is reached which has an input \subcircuit whose subcircuit contains at least one leaf of type \var.  It is trivial to see in such a case that $\subcircuit \equiv \subcircuit'$, and this implies $\circuit \equiv \circuit'$.
+
+\begin{Lemma}\label{lem:reduce}
+In $O(\size(\circuit))$, algorithm \reduce inspects input circuit \circuit and outputs an equivalent version \circuit' of \circuit such that all subcircuits \subcircuit of \circuit' have $\degree(\subcircuit) \geq 1$.
+\end{Lemma}
+
+\begin{proof}[Proof of \Cref{lem:reduce}]
+~\paragraph{\reduce correctness}
+Note that for a source gate \gate, only when $\gate.\type = \var$ is it the case that $\degree(\gate) = 1$, and otherwise $\degree(\gate) = 0$.  Lines~\ref{alg:reduce-add-deg} and~\ref{alg:reduce-no-deg} compute \gate.\degval.
+
+We prove an equivalent circuit \circuit' by induction over the iteration of \topord.  For the base case, consider when we have one node.  In such a case, no rewriting occurs, and \reduce returns \circuit.  It is trivial to note that $\circuit \equiv \circuit$.
+
+For the inductive hypothesis, we assume that for $k \geq 0$ nodes in \topord, the modified circuit $\circuit_k' \equiv \circuit_k$, where $\circuit_k'$ denotes the circuit at the end of iteration $k$.  Similarly, when discussing \Cref{alg:reduce} pseudocode, $\gate_{k}$ denotes the gate in position $k$ of \topord, and $\gate_{k_\linput}$ ($\gate_{k_\rinput}$) denotes the left (right) input of $\gate_{k}$.
+
+We now prove for $k + 1$ gates in \topord that $\circuit_{k + 1}' \equiv \circuit_{k + 1}$.  Note that if the gate $\gate_{k + 1}$ is a source node, then this is again the base case and we are finished.  If $\gate_{k + 1}$ is an internal node, then $\gate_{k + 1}.\type$ must either be $\circmult$ or $\circplus$.
+
+When $\gate_{k + 1}$ is $\circmult$, then it is the case that either $\degree(\gate_{{k + 1}_\linput}) \geq 1$ or $\gate_{{k + 1}_\linput}.\type$ is $\tnum$ and likewise for $\gate_{{k + 1}_\rinput}$.  There are then four possibilities, only one of which will prompt a rewrite, namely when we have that both inputs have $\degree(\gate_{{k + 1}_i}) = 0$.  In such a case, $\gate_{k + 1}.\val \gets \gate_{{k + 1}_\linput}.\val \times \gate_{{k + 1}_\rinput}.\val$, and the inputs are deleted.  Note that since $\gate_{{k + 1}_\linput}.\type = \gate_{{k + 1}_\rinput}.\type = \tnum$ that we have two constants being multiplied, and that for subcircuit $\subcircuit = (\times, \tnum_1, \tnum_2)$ and $\tnum' = \tnum_1 \times \tnum_2$, $\polyf(\subcircuit) = \polyf(\tnum')$ which implies that for the rewritten \subcircuit', $\subcircuit \equiv \subcircuit'$.
+
+A analogous argument applies when $\gate_{k + 1}.\type$ is $\circplus$.\qed
+
+\paragraph{\reduce Run-time Analysis}.
+$O(\size(\circuit))$ trivially follows by the single iterative pass over the \topord of \circuit, where, as can be seen in lines~\ref{alg:reduce-var},~\ref{alg:reduce-num},~\ref{alg:reduce-mult}, and~\ref{alg:reduce-plus} a constant number of operations are performed on each node.\qed
+\end{proof}
+
+\subsection{$\onepass$ Example}
+\begin{Example}\label{example:one-pass}
+ Let $\etree$ encode the expression $(X_1 + X_2)(X_1 - X_2) + X_2^2$.  After one pass, \cref{alg:one-pass-iter} would have computed the following weight distribution.  For the two inputs of the root $+$ node $\etree$, $\etree.\lwght = \frac{4}{5}$ and $\etree.\rwght = \frac{1}{5}$.  Similarly, let $\stree$ denote the left-subtree of $\etree_{\lchild}$, $\stree.\lwght = \stree.\rwght = \frac{1}{2}$.  This is depicted in~\Cref{fig:expr-tree-T-wght}. 
+\end{Example}
+
+\begin{figure}[h!]
+	\begin{tikzpicture}[thick, every tree node/.style={default_node, thick, draw=black, black, circle, text width=0.3cm, font=\bfseries, minimum size=0.65cm}, every child/.style={black}, edge from parent/.style={draw, thick},
+level 1/.style={sibling distance=0.95cm},
+level 2/.style={sibling distance=0.7cm},
+%level 2+/.style={sibling distance=0.625cm}
+%level distance = 1.25cm,
+%sibling distance = 1cm,
+%every node/.append style = {anchor=center}
+]
+
+	\Tree [.\node(root){$\boldsymbol{+}$};
+			\edge [wght_color] node[midway, auto= right, font=\bfseries, gray] {$\bsym{\frac{4}{5}}$}; [.\node[highlight_color](tl){$\boldsymbol{\times}$};
+				[.\node(s){$\bsym{+}$};
+					\edge[wght_color] node[pos=0.35, left, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](sl){$\bsym{x_1}$}; ]
+					\edge[wght_color] node[pos=0.35, right, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](sr){$\bsym{x_2}$}; ]
+					]
+				[.\node(sp){$\bsym{+}$};
+					\edge[wght_color] node[pos=0.35, left, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](spl){$\bsym{x_1}$}; ]
+					\edge[wght_color] node[pos=0.35, right, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](spr){$\bsym{\times}$};
+						[.$\bsym{-1}$ ] [.$\bsym{x_2}$ ]
+						]
+					]
+				]
+			\edge [wght_color] node[midway, auto=left, font=\bfseries, gray] {$\bsym{\frac{1}{5}}$}; [.\node[highlight_color](tr){$\boldsymbol{\times}$};
+				[.$\bsym{x_2}$
+					\edge [draw=none]; [.\node[draw=none]{}; ]
+					\edge [draw=none]; [.\node[draw=none]{}; ]
+				]
+				[.$\bsym{x_2}$ ] ]
+	]
+%	labels for plus node children, with arrows
+	\node[left=2pt of sl, highlight_color, inner sep=0pt] (sl-label) {$\stree_\lchild$};
+	\draw[highlight_color] (sl) -- (sl-label);
+	\node[right=2pt of sr, highlight_color, inner sep=0pt] (sr-label) {$\stree_\rchild$};
+	\draw[highlight_color] (sr) -- (sr-label);
+	\node[below left=2pt of spl, inner sep=0pt, highlight_color](spl-label) {$\stree_\lchild'$};
+	\draw[highlight_color] (spl) -- (spl-label);
+	\node[right=2pt of spr, highlight_color, inner sep=0] (spr-label) {$\stree_\rchild'$};
+	\draw[highlight_color] (spr) -- (spr-label);
+	\node[above left=2pt of tl, inner sep=0pt, highlight_color] (tl-label) {$\etree_\lchild$};
+	\draw[highlight_color] (tl) -- (tl-label);
+	\node[above right=2pt of tr, highlight_color, inner sep=0pt] (tr-label) {$\etree_\rchild$};
+	\node[above = 2pt of root, highlight_color, inner sep=0pt, font=\bfseries] (root-label) {$\etree$};
+	\node[above = 2pt of s, highlight_color, inner sep=0pt, font=\bfseries] (s-label) {$\stree$};
+	\node[above = 2pt of sp, highlight_color, inner sep=0pt, font=\bfseries] (sp-label) {$\stree'$};
+	\draw[highlight_color] (tr) -- (tr-label);
+%	\draw[<-|, highlight_color] (s) -- (s-label);
+%	\draw[<-|, highlight_color] (sp) -- (sp-label);
+%	\draw[<-|, highlight_color]  (root) -- (root-label);
+%\node[above right=0.7cm of TR, highlight_color, inner sep=0pt, font=\bfseries] (tr-comment) {$\etree_\rchild$};
+%		\draw[<-|, highlight_color] (TR) -- (tr-comment);
+	\end{tikzpicture}
+
+		\caption{Weights computed by $\onepass$ in ~\cref{example:one-pass}.}
+
+		\label{fig:expr-tree-T-wght}
+\end{figure}
+
+
+\subsection{\onepass}
+\begin{algorithm}[h!]
+	\caption{\reduce$(\circuit)$}
+	\label{alg:reduce}
+	\begin{algorithmic}[1]
+		\Require \circuit: Circuit
+		\Ensure \circuit: Reduced Circuit	
+		\For{\gate in \topord(\circuit)}\label{alg:reduce-loop}\Comment{\topord($\cdot$) is the topological order of \circuit}
+			\If{\gate.\type $=$ \var}\label{alg:reduce-var}
+				\State \gate.\degval $\gets 1$\label{alg:reduce-add-deg}
+			\ElsIf{\gate.\type $=$ \tnum}\label{alg:reduce-num}
+				\State \gate.\degval $\gets 0$\label{alg:reduce-no-deg}
+			\ElsIf{\gate.\type $= \circmult$}\label{alg:reduce-mult}
+				\State \gate.\degval $\gets \gate_\linput.\degval + \gate_\rinput.\degval$
+				\If{\gate.\degval $= 0$}
+					\State \gate.\type $\gets \tnum$
+					\State $\gate.\val \gets \gate_\linput.\val \times \gate_\rinput.\val$
+					\State $\gate_\linput, \gate_\rinput \gets \nullval$		
+				\EndIf		
+			\Else \label{alg:reduce-plus}
+				\State \gate.\degval $\gets \max(\gate_\linput.\degval, \gate_\rinput.\degval)$
+				\If{\gate.\degval $= 0$}
+					\State \gate.\type $\gets \tnum$
+					\State $\gate.\val \gets \gate_\linput.\val + \gate_\rinput.\val$
+					\State $\gate_\linput, \gate_\rinput \gets \nullval$
+				\EndIf
+			\EndIf
+		\EndFor
+		\State \Return $\circuit$
+	\end{algorithmic}
+\end{algorithm}
+
+\begin{algorithm}[h!]
+	\caption{\onepass$(\circuit)$}
+	\label{alg:one-pass-iter}
+	\begin{algorithmic}[1]
+		\Require \circuit: Circuit
+		\Ensure \circuit: Annotated Circuit
+		\Ensure \vari{sum} $\in \reals$
+		\State $\circuit' \gets \reduce(\circuit)$
+		\For{\gate in \topord(\circuit')}\label{alg:one-pass-loop}\Comment{\topord($\cdot$) is the topological order of \circuit}
+			\If{\gate.\type $=$ \var}
+				\State \gate.\prt $\gets 1$\label{alg:one-pass-var}
+			\ElsIf{\gate.\type $=$ \tnum}
+				\State \gate.\prt $\gets \abs{\gate.\val}$\label{alg:one-pass-num}
+			\ElsIf{\gate.\type $= \circmult$}		
+				\State \gate.\prt $\gets \gate_\linput.\prt \times \gate_\rinput.\prt$\label{alg:one-pass-mult}
+			\Else 
+				\State \gate.\prt $\gets \gate_\linput.\prt + \gate_\rinput.\prt$\label{alg:one-pass-plus}
+				\State \gate.\lwght $\gets \frac{\gate_\linput.\prt}{\gate.\prt}$\label{alg:one-pass-lwght}
+				\State \gate.\rwght $\gets \frac{\gate_\rinput.\prt}{\gate.\prt}$\label{alg:one-pass-rwght}
+			\EndIf
+			\State \vari{sum} $\gets \gate.\prt$
+		\EndFor
+		\State \Return (\vari{sum}, $\circuit'$) 
+	\end{algorithmic}
+\end{algorithm}
+
+\subsection{Proof of ~\Cref{lem:one-pass}}\label{sec:proof-one-pass}
+\paragraph{\onepass Correctness}
+We prove the correct computation of \prt, \lwght, \rwght values on \circuit by induction over the number of iterations in line ~\ref{alg:one-pass-loop} over the topological order \topord of the input circuit \circuit.  Note that \topord is the standard definition of a topological ordering over the DAG structure of \circuit.
+
+For the base case, we have only one gate, which by definition is a source gate and must be either \var or \tnum.  In this case, as per \Cref{eq:T-all-ones}, lines ~\ref{alg:one-pass-var} and ~\ref{alg:one-pass-num} correctly compute \circuit.\prt as $1$ and \circuit.\val respectively.
+
+For the inductive hypothesis, assume that \onepass correctly computes \subcircuit.\prt, \subcircuit.\lwght, and \subcircuit.\rwght for all gates \gate in \circuit with $k > 0$ iterations over \topord.  
+
+We now prove for $k + 1$ iterations that \onepass correctly computes the \prt, \lwght, and \rwght values for each gate $\gate_\vari{i}$ in \circuit.  %By the hypothesis the first $k$ gates (alternatively \textit{iterations}) have correctly computed values.  
+Note that the $\gate_\vari{k + 1}$ must be in the last ordering of all gates $\gate_\vari{i}$ for $i \in [k + 1]$.  It is also the case that $\gate_{k+1}$ has  two inputs.  Finally, note that for \size(\circuit) > 1, if $\gate_{k+1}$ is a leaf node, we are back to the base case.  Otherwise $\gate_{k + 1}$ is an internal node $\gate_\vari{s}.\type = \circplus$ or $\gate_\vari{s}.\type = \circmult$.
+
+When $\gate_{k+1}.\type = \circplus$, then by line ~\ref{alg:one-pass-plus} $\gate_{k+1}$.\prt $= \gate_{{k+1}_\lchild}$.\prt $+ \gate_{{k+1}_\rchild}$.\prt, a correct computation, as per \Cref{eq:T-all-ones}.  Further, lines ~\ref{alg:one-pass-lwght} and ~\ref{alg:one-pass-rwght} compute $\gate_{{k+1}}.\lwght = \frac{\gate_{{k+1}_\lchild}.\prt}{\gate_{{k+1}}.\prt}$ and analogously for $\gate_{{k+1}}.\rwght$.  Note that all values needed for each computation have been correctly computed by the I.H.
+
+When $\gate_{k+1}.\type = \circmult$, then line ~\ref{alg:one-pass-mult} computes $\gate_{k+1}.\prt = \gate_{{k+1}_\lchild.\prt} \circmult \gate_{{k+1}_\rchild}.\prt$, which indeed is correct, as per \Cref{eq:T-all-ones}.
+
+\paragraph{\onepass Runtime}
+It is known that $\topord(G)$ is computable in linear time.  Next, each of the $\numvar$ iterations of the loop in ~\Cref{alg:one-pass-loop} take $O(1)$ time.  In general it is known that an arithmetic computation which requires $M$ bits takes $O(\frac{\log{M}}{\log{N}})$ time for an input size $N$.  Since each of the arithmetic operations at a given gate has a bit size of $O(\log{\abs{\circuit}(1,\ldots, 1)})$,  thus, we obtain the general runtime of $O\left(\size(\circuit)\cdot \frac{\log{\abs{\circuit}(1,\ldots, 1)}}{\log{\size(\circuit)}}\right)$.
+
+\paragraph{Sufficient condition for $\abs{\circuit}(1,\ldots, 1)$ to be size $O(N)$}
+For our runtime results to be relevant, it must be the case that the sum of the coefficients computed by \onepass is indeed size $O(N)$ since there are $O(\log{N})$ bits in the RAM model where $N$ is the size of the input.  The size of the input here is \size(\circuit).  We show that when \size$(\circuit_\linput) = N_\linput$, \size$(\circuit_\rinput) = N_\rinput$, where $N_\linput + N_\rinput \leq N$, this is indeed the case.
+
+\begin{proof}%[Proof of $\abs{\circuit}(1,\ldots, 1)$ is size $O(N)$]
+To prove this result, we start by proving that $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k }$ for \degree(\circuit) $= k$.  
+For the base case, we have that \depth(\circuit) $= 0$, and there can only be one node which must contain a coefficient (or constant) of $1$.  In this case, $\abs{\circuit}(1,\ldots, 1) = 1$, and \size(\circuit) $= 1$, and it is true that $\abs{\circuit}(1,\ldots, 1) = 1 \leq N^{2^k} = 1^{2^0} = 1$.
+
+Assume for $\ell > 0$ an arbitrary circuit \circuit of $\depth(\circuit) \leq \ell$ that it is true that $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k }$.% for $k \geq 1$ when \depth(C) $\geq 1$.
+
+For the inductive step we consider a circuit \circuit such that $\depth(\circuit) \leq \ell + 1$.  The sink can only be either a $\circmult$ or $\circplus$ gate.  Consider when sink node is $\circmult$.  Let $k_\linput, k_\rinput$ denote \degree($\circuit_\linput$) and \degree($\circuit_\rinput$) respectively.  Note that this case does not require the constraint on $N_\linput$ or $N_\rinput$.
+\begin{align}
+\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1)\circmult \abs{\circuit_\rinput}(1,\ldots, 1) \leq (N-1)^{2^{k_\linput}} \circmult (N - 1)^{2^{k_\rinput}}\nonumber\\
+ &\leq (N-1)^{2^{k}-1}\label{eq:sumcoeff-times-upper}\\
+ &\leq N^{2^k}.\nonumber
+\end{align} 
+We derive the upperbound of \cref{eq:sumcoeff-times-upper} by noting that the maximum value of the LHS occurs when both the base and exponent are maximized.
+
+For the case when the sink node is a $\circplus$ node, then we have
+\begin{align}
+\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1) \circplus \abs{\circuit_\rinput}(1,\ldots, 1) \leq
+N_\linput^{2^{k_\linput}} + N_\rinput^{2^{k_\rinput}}\nonumber\\
+&\leq N_\linput^{2^k } + N_\rinput\label{eq:sumcoeff-plus-upper}\\
+&\leq N^{2^k}.\nonumber
+\end{align}
+Similar to the $\circmult$ case, \cref{eq:sumcoeff-plus-upper} upperbounds its LHS by the fact that the maximum base and exponent combination is always greater than or equal to the sum of lower base/exponent combinations.  The final equality is true given the constraint over the inputs.  
+
+Since $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k}$ for all circuits such that all $\circplus$ gates share at most one gate with their sibling (across their respective subcircuits), then $\log{N^{2^k}} = 2^k \cdot \log{N}$ which for fixed $k$ yields the desired $O(\log{N})$ bits for $O(1)$ arithmetic operations.% for the given query class.
+\end{proof} 
--- a/app_samp-monom-analysis.tex
+++ b/app_samp-monom-analysis.tex
@ -0,0 +1,134 @@
+%root: main.tex
+
+
+\subsection{\sampmon Notes}
+While we would like to take advantage of the space efficiency gained in using a circuit \circuit instead an expression tree \etree, we do not know that such a method exists when computing a sample of the input polynomial representation.  
+
+The efficiency gains of circuits over trees is found in the capability of circuits to only require space for each \emph{distinct} term in the compressed representation.  This saves space in such polynomials containing non-distinct terms multiplied or added to each other, e.g., $x^4$.  However, to avoid biased sampling, it is imperative to sample from both inputs of a multiplication gate, independently, which is indeed the approach of \sampmon.  
+
+\subsection{Proof of~\Cref{lem:sample}}\label{sec:proof-sample-monom}
+We first need to show that $\sampmon$ indeed returns a monomial $\monom$,\footnote{Technically it returns $\var(\monom)$ but for less cumbersome notation we will refer to $\var(\monom)$ simply by $\monom$ in this proof.} such that $(\monom, \coef)$ is in $\expansion{\circuit}$, which we do by induction on the depth of $\circuit$.
+
+For the base case, let the depth $d$ of $\circuit$ be $0$.  We have that the root node is either a constant $\coef$ for which by line ~\ref{alg:sample-num-return} we return $\{~\}$, or we have that $\circuit.\type = \var$ and $\circuit.\val = x$, and  by line ~\ref{alg:sample-var-return} we return $\{x\}$.  Both cases sample a monomial%satisfy ~\cref{def:monomial}
+, and the base case is proven.
+
+For the inductive hypothesis, assume that for $d \leq k$ for some $k \geq 0$, that it is indeed the case that $\sampmon$ returns a monomial.
+
+For the inductive step, let us take a circuit $\circuit$ with $d = k + 1$.  Note that each input has depth $d \leq k$, and by inductive hypothesis both of them return a valid monomial.  Then the root can be either a $\circplus$ or $\circmult$ node.  For the case of a $\circplus$ root node, line ~\ref{alg:sample-plus-bsamp} of $\sampmon$ will choose one of the inputs of the root.  By inductive hypothesis it is the case that a monomial in \expansion(\circuit) is being returned from either input.  Then it follows that for the case of $+$ root node a valid monomial is returned by $\sampmon$.  When the root is a $\circmult$ node, line ~\ref{alg:sample-times-union} %and ~\ref{alg:sample-times-product} multiply
+computes the set union of the monomials returned by the two inputs of the root, and it is trivial to see
+%by definition ~\ref{def:monomial}
+%the product of two monomials is also a monomial, and
+by ~\Cref{def:expand-circuit} that \monom is a valid monomial in some $(\monom, \coef) \in \expansion{\circuit}$.
+
+We will next prove by induction on the depth $d$ of $\circuit$ that the $(\monom,\coef) \in \expansion{\circuit}$ is the \monom returned by $\sampmon$ with a probability %`that is in accordance with the monomial sampled,
+ $\frac{|\coef|}{\abs{\circuit}\polyinput{1}{1}}$.
+
+For the base case $d = 0$, by definition ~\ref{def:express-tree} we know that the root has to be either a coefficient or a variable.  For either case, the probability of the value returned is $1$ since there is only one value to sample from.  When the root is a variable $x$ the algorithm correctly returns $(\{x\}, 1 )$.  When the root is a coefficient, \sampmon ~correctly returns $(\{~\}, sign(\coef_i))$.
+
+For the inductive hypothesis, assume that for $d \leq k$ and $k \geq 0$ $\sampmon$ indeed samples $\monom$ in $(\monom, \coef)$ in $\expansion{\circuit}$ with probability $\frac{|\coef|}{\abs{\circuit}\polyinput{1}{1}}$.%bove is true.%lemma ~\ref{lem:sample} is true.
+
+We prove now for $d = k + 1$ the inductive step holds.  It is the case that the root of $\circuit$ has up to two inputs $\circuit_\linput$ and $\circuit_\rinput$.  Since $\circuit_\linput$ and $\circuit_\rinput$ are both depth $d \leq k$, by inductive hypothesis, $\sampmon$ will sample both monomials $\monom_\lchild$ in $(\monom_\lchild, \coef_\lchild)$ of $\expansion{\circuit_\linput}$ and $\monom_\rchild$ in $(\monom_\rchild, \coef_\rchild)$ of $\expansion{\circuit_\rinput}$, from $\circuit_\linput$ and $\circuit_\rinput$ with probability $\frac{|\coef_\lchild|}{\abs{\circuit_\linput}\polyinput{1}{1}}$ and $\frac{|\coef_\rchild|}{\abs{\circuit_\rinput}\polyinput{1}{1}}$.
+
+The root has to be either a $\circplus$ or $\circmult$ node.
+
+Consider the case when the root is $\circmult$.  Note that we are sampling a term from $\expansion{\circuit}$.  Consider $(\monom, \coef)$ in $\expansion{\circuit}$, where $\monom$ is the sampled monomial.  Notice also that it is the case that $\monom = \monom_\lchild \circmult \monom_\rchild$, where $\monom_\lchild$ is coming from $\circuit_\linput$ and $\monom_\rchild$ from $\circuit_\rinput$.  The probability that \sampmon$(\circuit_{\lchild})$ returns $\monom_\lchild$ is $\frac{|\coef_{\monom_\lchild}|}{|\circuit_\linput|(1,\ldots, 1)}$ and $\frac{|\coef_{\monom_\rchild}|}{\abs{\circuit_\rinput}\polyinput{1}{1}}$ for $\monom_\rchild$.  Since both $\monom_\lchild$ and $\monom_\rchild$ are sampled with independent randomness, the final probability for sample $\monom$ is then $\frac{|\coef_{\monom_\lchild}| \cdot |\coef_{\monom_\rchild}|}{|\circuit_\linput|(1,\ldots, 1) \cdot |\circuit_\rinput|(1,\ldots, 1)}$.  For $(\monom, \coef)$ in \expansion{\circuit}, it is indeed the case that $|\coef| = |\coef_{\monom_\lchild}| \cdot |\coef_{\monom_\rchild}|$ and that $\abs{\circuit}(1,\ldots, 1) = |\circuit_\linput|(1,\ldots, 1) \cdot |\circuit_\rinput|(1,\ldots, 1)$, and therefore $\monom$ is sampled with correct probability $\frac{|\coef|}{\abs{\circuit}(1,\ldots, 1)}$.
+
+For the case when $\circuit.\val = \circplus$, \sampmon ~will sample monomial $\monom$ from one of its inputs.  By inductive hypothesis we know that any $\monom_\lchild$ in $\expansion{\circuit_\linput}$ and any $\monom_\rchild$ in $\expansion{\circuit_\rinput}$ will both be sampled with correct probability $\frac{|\coef_{\monom_\lchild}|}{\circuit_{\lchild}(1,\ldots, 1)}$ and $\frac{|\coef_{\monom_\rchild}|}{|\circuit_\rinput|(1,\ldots, 1)}$, where either $\monom_\lchild$ or $\monom_\rchild$ will equal $\monom$, depending on whether $\circuit_\linput$ or $\circuit_\rinput$ is sampled.  Assume that $\monom$ is sampled from $\circuit_\linput$, and note that a symmetric argument holds for the case when $\monom$ is sampled from $\circuit_\rinput$.  Notice also that the probability of choosing $\circuit_\linput$ from $\circuit$ is $\frac{\abs{\circuit_\linput}\polyinput{1}{1}}{\abs{\circuit_\linput}\polyinput{1}{1} + \abs{\circuit_\rinput}\polyinput{1}{1}}$ as computed by $\onepass$.  Then, since $\sampmon$ goes top-down, and each sampling choice is independent (which follows from the randomness in the root of $\circuit$ being independent from the randomness used in its subtrees), the probability for $\monom$ to be sampled from $\circuit$ is equal to the product of the probability that $\circuit_\linput$ is sampled from $\circuit$ and $\monom$ is sampled in $\circuit_\linput$, and
+\begin{align*}
+&\probOf(\sampmon(\circuit) = \monom) = \\
+&\probOf(\sampmon(\circuit_\linput) = \monom) \cdot \probOf(SampledChild(\circuit) = \circuit_\linput)\\
+&= \frac{|\coef_\monom|}{|\circuit_\linput|(1,\ldots, 1)} \cdot \frac{\abs{\circuit_\linput}(1,\ldots, 1)}{|\circuit_\linput|(1,\ldots, 1) + |\circuit_\rinput|(1,\ldots, 1)}\\
+&= \frac{|\coef_\monom|}{\abs{\circuit}(1,\ldots, 1)},
+\end{align*}
+and we obtain the desired result.
+
+
+
+\paragraph{Run-time Analysis}
+It is easy to check that except for lines~\ref{alg:sample-times-union} and~\ref{alg:sample-plus-bsamp}, all lines take $O(1)$ time.  For \cref{alg:sample-times-uinon}, consider an execution of~\Cref{alg:sample-times-union}. We note that we will be adding a given set of variables to some set at most once: since the sum of the sizes of the sets at a given level is at most $\degree(\circuit)$, each gate visited takes $O(\log{\degree(\circuit)})$.  For \cref{alg:sample-plus-bsamp} we have $> O(1)$ time when $\abs{\circuit}(1,\ldots, 1) > \size(\circuit)$.  when this is the case that for each sample, we have $\frac{\log{\abs{\circuit}(1,\ldots, 1)}}{\log{\size(\circuit)}}$ operations, since we need to read in and then compare numbers of of $\log{{\abs{\circuit}(1,\ldots, 1)}}$ bits.  Denote \cost(\circuit) (\Cref{eq:cost-sampmon}) to be an upper bound of the number of nodes visited by \sampmon.  Then the runtime is $O\left(\cost(\circuit)\cdot \log{\degree(\circuit)}\cdot\frac{\log{\abs{\circuit}(1,\ldots, 1)}}{\log{\size(\circuit)}}\right)$.
+
+We now bound the number of recursive calls in $\sampmon$ by $O\left((\degree(\circuit) + 1)\right.$$\left.\cdot\right.$ $\left.\depth(\circuit)\right)$.  
+
+Let \cost$(\cdot)$ be a function that models an upper bound on the number of gates that can be visited in the run of \sampmon.  We define \cost$(\cdot)$ recursively as follows.
+
+\begin{equation}
+	\cost(\circuit) =
+		\begin{cases}
+			1 + \cost(\circuit_\linput) + \cost(\circuit_\rinput) & \textbf{if } \text{\circuit.\type = }\circmult\\
+			1 + \max\left(\cost(\circuit_\linput), \cost(\circuit_\rinput)\right) & \textbf{if } \text{\circuit.\type = \circplus}\\
+			1 & \textbf{otherwise}
+		\end{cases}\label{eq:cost-sampmon}
+\end{equation}
+
+First note that the number of gates visited in \sampmon is $\leq\cost(\circuit)$.  To show that \Cref{eq:cost-sampmon} upper bounds the number of nodes visited by \sampmon, note that when \sampmon visits a gate such that \circuit.\type $ =\circmult$, line ~\ref{alg:sample-times-for-loop} visits each input of \circuit, as defined in (\ref{eq:cost-sampmon}).  For the case when \circuit.\type $= \circplus$, line ~\ref{alg:sample-plus-bsamp} visits exactly one of the input gates, which may or may not be the subcircuit with the maximum number of gates traversed, which makes \cost$(\cdot)$ an upperbound.  Finally, it is trivial to see that when \circuit.\type $\in \{\var, \tnum\}$, i.e., a source gate, that only one gate is visited.
+
+We prove the following inequality holds.
+\begin{equation}
+2\degree(\circuit) \cdot \depth(\circuit) + 1 \geq \cost(\circuit)\label{eq:strict-upper-bound}
+\end{equation} 
+
+Note that \Cref{eq:strict-upper-bound} implies the claimed runtime.  We prove \Cref{eq:strict-upper-bound} for the number of gates traversed in \sampmon using induction over $\depth(\circuit)$.  Recall that \reduce has imposed the invariant that all subcircuits \subcircuit in \circuit must have $\subcircuit.\degval \geq 1$.
+
+For the base case $\degree(\circuit) = \depth(\circuit) = 0$, $\cost(\circuit) = 1$, and it is trivial to see that the inequality $2\degree(\circuit) \cdot \depth(\circuit) + 1 \geq \cost(\circuit)$ holds.
+
+For the inductive hypothesis, we assume the bound holds for a circuit where $\ell \geq \depth(\circuit) \geq 1$.
+Now consider the case when \sampmon has an arbitrary circuit \circuit input with $\depth(\circuit) = \ell + 1$.  By definition \circuit.\type $\in \{\circplus, \circmult\}$. Note that since $\depth(\circuit) \geq 2$, \circuit must have inputs.  Further we know that by the inductive hypothesis the inputs $\circuit_i$ for $i \in \{\linput, \rinput\}$ of the sink gate \circuit uphold the bound
+\begin{equation}
+2\degree(\circuit_i)\cdot \depth(\circuit_i) + 1 \geq \cost(\circuit_i).\label{eq:ih-bound-cost}
+\end{equation}
+It is also true that $\depth(\circuit_\linput) \leq \depth(\circuit) - 1$ and $\depth(\circuit_\rinput) \leq \depth(\circuit) - 1$.  
+
+If \circuit.\type $= \circplus$, then $\degree(\circuit) = \max\left(\degree(\circuit_\linput), \degree(\circuit_\rinput)\right)$.  Otherwise \circuit.\type = $\circmult$ and $\degree(\circuit) = \degree(\circuit_\linput) + \degree(\circuit_\rinput)$.  In either case it is true that $\depth(\circuit) = \max(\depth(\circuit_\linput), \depth(\circuit_\rinput)) + 1$.
+
+If \circuit.\type $= \circmult$, then, 
+substituing values, the following should hold,  
+\begin{align}
+&2\left(\degree(\circuit_\linput) + \degree(\circuit_\rinput)\right) \cdot \left(\max(\depth(\circuit_\linput), \depth(\circuit_\rinput)) + 1\right) + 1 \nonumber\\%\label{eq:times-lhs}\\
+&\qquad\geq 2\degree(\circuit_\linput) \cdot \depth(\circuit_\linput) + 2 \degree(\circuit_\rinput) \cdot \depth(\circuit_\rinput) + 3\label{eq:times-middle} \\
+&\qquad\geq 1 + \cost(\circuit_\linput) + \cost(\circuit_\rinput) = \cost(\circuit) (\ref{eq:cost-sampmon})\label{eq:times-rhs}.
+\end{align}
+
+To prove (\ref{eq:times-middle}), first, the LHS expands to, %\Cref{eq:times-lhs},  
+\begin{equation}
+%(\ref{eq:times-lhs}) 
+2\degree(\circuit_\linput)\depth_{\max} + 2\degree(\circuit_\rinput)\depth_{\max} + 2\degree(\circuit_\linput) +  2\degree(\circuit_\rinput) + 1\label{eq:times-lhs-expanded}
+\end{equation}
+where $\depth_{\max}$ is used to denote the maximum depth of the two input subcircuits.
+
+Let us now simplify the inequality (\ref{eq:times-middle}). 
+\begin{align}
+&2\degree(\circuit_\linput)\depth_{\max} + 2\degree(\circuit_\rinput)\depth_{\max} + 2\degree(\circuit_\linput) + 2\degree(\circuit_\rinput) + 1 \nonumber\\
+&\qquad \geq 2\degree(\circuit_\linput) \cdot \depth(\circuit_\linput) + 2 \degree(\circuit_\rinput) \cdot \depth(\circuit_\rinput) + 3\nonumber\\
+&\implies 2\degree(\circuit_\linput) + 2\degree(\circuit_\rinput) + 1 \geq 3\label{eq:times-lhs-middle-step1}
+\end{align}
+Note that by the \emph{reduced} invariant of \reduce, a circuit \circuit with $\depth(\circuit) \geq 1$ will always have at least one input with $\degree(\circuit_i) \geq 1$.  Thus, \Cref{eq:times-lhs-middle-step1} follows, and the inequality is upheld.
+
+Now to justify (\ref{eq:times-rhs}) which holds for the following reasons.  First, the RHS%\Cref{eq:times-rhs} 
+is the result of \Cref{eq:cost-sampmon} when $\circuit.\type = \circmult$.  The LHS %\Cref{eq:times-middle}
+is then produced by substituting the upperbound of (\ref{eq:ih-bound-cost}) for each $\cost(\circuit_i)$, trivially establishing the upper bound of (\ref{eq:times-rhs}).  This proves \Cref{eq:strict-upper-bound} for the $\circmult$ case.
+
+For the case when \circuit.\type $= \circplus$, substituting values yields
+\begin{align}
+&2\max(\degree(\circuit_\linput), \degree(\circuit_\rinput)) \cdot \left(\max(\depth(\circuit_\linput), \depth(\circuit_\rinput)) + 1\right) +1\nonumber\\%\label{eq:plus-lhs-inequality}\\
+&\qquad \geq \max\left(2\degree(\circuit_\linput) \cdot \depth(\circuit_\linput) + 1, 2\degree(\circuit_\rinput) \cdot \depth(\circuit_\rinput) +1\right) + 1\label{eq:plus-middle}\\
+&\qquad \geq 1 + \max(\cost(\circuit_\linput), \cost(\circuit_\rinput)) = \cost(\circuit)\label{eq:plus-rhs}
+\end{align}
+
+To prove  (\ref{eq:plus-middle}), we can rewrite the LHS as %(\ref{eq:plus-lhs-inequality}) as
+\begin{equation}
+2\degree_{\max}\depth_{\max} + 2\degree_{\max} + 1.\label{eq:plus-lhs-expanded}
+\end{equation}
+Since $\degree_{\max} \cdot \depth_{\max} \geq \degree(\circuit_i)\cdot \depth(\circuit_i),$ the following upper bound holds for the RHS of (\ref{eq:plus-middle}):
+\begin{equation}
+2\degree_{\max}\depth_{\max} + 2 \geq  \max\left(2\degree(\circuit_\linput) \cdot \depth(\circuit_\linput) + 1, 2\degree(\circuit_\rinput) \cdot \depth(\circuit_\rinput) +1\right) + 1.\label{eq:plus-middle-expanded}
+\end{equation}
+Substituting the upperbound (LHS) of (\ref{eq:plus-middle-expanded}) in for the RHS of (\ref{eq:plus-middle}) we obtain the following for (\ref{eq:plus-middle}):
+\begin{align}
+&2\degree_{\max}\depth_{\max} + 2\degree_{\max} + 1 \geq 2\degree_{\max}\depth_{\max} + 2\nonumber\\
+&\implies 2\degree_{\max} + 1 \geq 2\label{eq:plus-upper-bound-final}.
+\end{align}
+As in the $\circmult$ case the \emph{reduced} invariant of \reduce implies that $\degree_{\max} \geq 1$, and (\ref{eq:plus-upper-bound-final}) follows.  This proves (\ref{eq:plus-middle}).
+
+Similar to the case of $\circuit.\type = \circmult$, (\ref{eq:plus-rhs}) follows by equations $(\ref{eq:cost-sampmon})$ and $(\ref{eq:ih-bound-cost})$.
+
+This proves (\ref{eq:strict-upper-bound}) for the $\circplus$ case and thus the claimed $O(k\log{k}\cdot \frac{\log{\abs{\circuit}(1,\ldots, 1)}}{\size(\circuit)}\cdot\depth(\circuit))$ runtime for $k = \degree(\circuit)$ follows.
--- a/appendix.tex
+++ b/appendix.tex
@ -0,0 +1,152 @@
+%!TEX root=./main.tex
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Missing details from Section~\ref{sec:background}}\label{sec:proofs-background}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Supplementary Material for~\Cref{prop:expection-of-polynom}}\label{subsec:supp-mat-background}
+\input{app_notation-background}
+
+
+
+\section{Missing details from Section~\ref{sec:hard}}
+\label{app:single-mult-p}
+\input{app_hardness-results}
+
+\section{Missing Details from Section~\ref{sec:algo}}\label{sec:proofs-approx-alg}
+\input{app_approx-alg-analysis}
+\input{app_one-pass-analysis}
+\input{app_samp-monom-analysis}
+
+\subsection{Experimental Results}\label{app:subsec:experiment}
+\input{experiments}
+
+\section{Circuits}\label{app:sec-cicuits}
+\subsection{Representing Polynomials with Circuits}\label{app:subsec-rep-poly-lin-circ}
+\newcommand{\getpoly}[1]{\textbf{lin}\inparen{#1}}
+Each vertex $v \in V_{Q,\pxdb}$ in the arithmetic circuit for
+
+\[\tuple{V_{Q,\pxdb}, E_{Q,\pxdb}, \phi_{Q,\pxdb}, \ell_{Q,\pxdb}}\]
+
+encodes a polynomial, realized as
+
+\[\getpoly{v} = \begin{cases}
+\sum_{v' : (v',v) \in E_{Q,\pxdb}} \getpoly{v'} & \textbf{if } \ell(v) = +\\
+\prod_{v' : (v',v) \in E_{Q,\pxdb}} \getpoly{v'} & \textbf{if } \ell(v) = \times\\
+\ell(v) & \textbf{otherwise}
+\end{cases}\]
+
+
+We define the circuit for a select-union-project-join $Q$ recursively by cases as follows.  In each case, let $\tuple{V_{Q_i,\pxdb}, E_{Q_i,\pxdb}, \phi_{Q_{i},\pxdb}, \ell_{Q_i,\pxdb}}$ denote the circuit for subquery $Q_i$.
+
+\caseheading{Base Relation}
+Let $Q$ be a base relation $R$.  We define one node for each tuple.  Formally, let $V_{Q,\pxdb} = \comprehension{v_t}{t\in R}$, let $\phi_{Q,\pxdb}(t) = v_t$, let $\ell_{Q,\pxdb}(v_t) = R(t)$, and let $E_{Q,\pxdb} = \emptyset$.
+This circuit has $|R|$ vertices.
+
+\caseheading{Selection}
+Let $Q = \sigma_\theta \inparen{Q_1}$.
+We re-use the circuit for $Q_1$. %, but define a new distinguished node $v_0$ with label $0$ and make it the sink node for all tuples that fail the selection predicate.
+Formally, let $V_{Q,\pxdb} = V_{Q_1,\pxdb}$, let $\ell_{Q,\pxdb}(v_0) = 0$, and let $\ell_{Q,\pxdb}(v) = \ell_{Q_1,\pxdb}(v)$ for any $v \in V_{Q_1,\pxdb}$.  Let $E_{Q,\pxdb} = E_{Q_1,\pxdb}$, and define
+$$\phi_{Q,\pxdb}(t) =
+\phi_{Q_{1}, \pxdb}(t)  \text{ for } t \text{ s.t.}\; \theta(t).$$
+Dead sinks are iteratively removed, and so
+%\AH{While not explicit, I assume a reviewer would know that the notation above discards tuples/vertices not satisfying the selection predicate.}
+%v_0 & \textbf{otherwise}
+%\end{cases}$$
+this circuit has at most $|V_{Q_1,\pxdb}|$ vertices.
+
+\caseheading{Projection}
+Let $Q = \pi_{\vct A} {Q_1}$.
+We extend the circuit for ${Q_1}$ with a new set of sum vertices (i.e., vertices with label $+$) for each tuple in $Q$, and connect them to the corresponding sink nodes of the circuit for ${Q_1}$.
+Naively, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup \comprehension{v_t}{t \in \pi_{\vct A} {Q_1}}$, let $\phi_{Q,\pxdb}(t) = v_t$, and let $\ell_{Q,\pxdb}(v_t) = +$.  Finally let
+$$E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup \comprehension{(\phi_{Q_{1}, \pxdb}(t'), v_t)}{t = \pi_{\vct A} t', t' \in {Q_1}, t \in \pi_{\vct A} {Q_1}}$$
+This formulation will produce vertices with an in-degree greater than two, a problem that we correct by replacing every vertex with an in-degree over two by an equivalent fan-in tree.  The resulting structure has at most $|{Q_1}|-1$ new vertices.
+% \AH{Is the rightmost operator \emph{supposed} to be a $-$?  In the beginning we add $|\pi_{\vct A}{Q_1}|$ vertices.}
+The corrected circuit thus has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
+
+\caseheading{Union}
+Let $Q = {Q_1} \cup {Q_2}$.
+We merge graphs and produce a sum vertex for all tuples in both sides of the union.
+Formally, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup V_{Q_2,\pxdb} \cup \comprehension{v_t}{t \in {Q_1} \cap {Q_2}}$, let $\ell_{Q,\pxdb}(v_t) = +$, and let
+\[E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup E_{Q_2,\pxdb} \cup \comprehension{(\phi_{Q_{1}, \pxdb}(t), v_t), (\phi_{Q_{2}, \pxdb}(t), v_t)}{t \in {Q_1} \cap {Q_2}}\]
+\[
+  \phi_{Q,\pxdb}(t) = \begin{cases}
+v_t & \textbf{if } t \in {Q_1} \cap {Q_1}\\
+\phi_{Q_{1}, \pxdb}(t) & \textbf{if } t \not \in {Q_2}\\
+\phi_{Q_{2}, \pxdb}(t) & \textbf{if } t \not \in {Q_1}\\
+\end{cases}\]
+This circuit has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ vertices.
+
+\caseheading{$k$-ary Join}
+Let $Q = {Q_1} \bowtie \ldots \bowtie {Q_k}$.
+We merge graphs and produce a multiplication vertex for all tuples resulting from the join
+Naively, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup \ldots \cup V_{Q_k,\pxdb} \cup \comprehension{v_t}{t \in {Q_1} \bowtie \ldots \bowtie {Q_k}}$, let
+{\small
+\begin{multline*}
+E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup \ldots \cup E_{Q_k,\pxdb} \cup
+\left\{\;
+(\phi_{Q_{1}, \pxdb}(\pi_{\sch({Q_1})}t), v_t), \right.\\
+\ldots, (\phi_{Q_k,\pxdb}(\pi_{\sch({Q_k})}t), v_t)
+\;\left|\;t \in {Q_1} \bowtie \ldots \bowtie {Q_k}\;\right\}
+\end{multline*}
+}
+Let $\ell_{Q,\pxdb}(v_t) = \times$, and let $\phi_{Q,\pxdb}(t) = v_t$
+As in projection, newly created vertices will have an in-degree of $k$, and a fan-in tree is required.
+There are $|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ such vertices, so the corrected circuit has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Proof for~\Cref{lem:circuits-model-runtime}}\label{app:subsec-lem-lin-vs-qplan}
+
+Proof by induction.  The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |R|$.
+For the inductive step, we assume that we have circuits for subplans $Q_1, \ldots, Q_n$ such that $|V_{Q_i,\pxdb}| \leq (k_i-1)\qruntime{Q_i,\pxdb}$ where $k_i$ is the degree of $Q_i$.
+
+\caseheading{Selection}
+Assume that $Q = \sigma_\theta(Q_1)$.
+In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\pxdb}|$ vertices, so from the inductive assumption and $\qruntime{Q,\pxdb} = \qruntime{Q_1,\pxdb}$ by definition, we have $|V_{Q,\pxdb}| \leq (k-1) \qruntime{Q,\pxdb} $.
+% \AH{Technically, $\kElem$ is the degree of $\poly_1$, but I guess this is a moot point since one can argue that $\kElem$ is also the degree of $\poly$.}
+% OK: Correct
+\caseheading{Projection}
+Assume that $Q = \pi_{\vct A}(Q_1)$.
+The circuit for $Q$ has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
+% \AH{The combination of terms above doesn't follow the details for projection above.}
+\begin{align*}
+|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\
+%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1,\pxdb} \geq |Q_1|$}
+%& \leq |V_{Q_1,\pxdb}| + 2 \qruntime{Q_1,\pxdb}\\
+\intertext{(From the inductive assumption)}
+& \leq (k-1)\qruntime{Q_1,\pxdb} + \abs{Q_1}\\
+\intertext{(By definition  of $\qruntime{Q,\pxdb}$)}
+& \le (k-1)\qruntime{Q,\pxdb}.
+\end{align*}
+\caseheading{Union}
+Assume that $Q = Q_1 \cup Q_2$.
+The circuit for $Q$ has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ vertices.
+\begin{align*}
+|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\
+%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1,\pxdb} \geq |Q_1|$}
+%& \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+\qruntime{Q_1,\pxdb}+\qruntime{Q_2,\pxdb}|\\
+\intertext{(From the inductive assumption)}
+& \leq (k-1)(\qruntime{Q_1,\pxdb} + \qruntime{Q_2,\pxdb}) + (b_1 + b_2)
+\intertext{(By definition of $\qruntime{Q,\pxdb}$)}
+& \leq (k-1)(\qruntime{Q,\pxdb}).
+\end{align*}
+
+\caseheading{$k$-ary Join}
+Assume that $Q = Q_1 \bowtie \ldots \bowtie Q_k$.
+The circuit for $Q$ has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
+\begin{align*}
+|V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
+\intertext{From the inductive assumption and noting $\forall i: k_i \leq k-1$}
+& \leq (k-1)\qruntime{Q_1,\pxdb}+\ldots+(k-1)\qruntime{Q_k,\pxdb}+\\
+&\;\;\; (k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
+& \leq (k-1)(\qruntime{Q_1,\pxdb}+\ldots+\qruntime{Q_k,\pxdb}+\\
+&\;\;\;|{Q_1} \bowtie \ldots \bowtie {Q_k}|)\\
+\intertext{(By definition of $\qruntime{Q,\pxdb}$)}
+& = (k-1)\qruntime{Q,\pxdb}.
+\end{align*}
+
+The property holds for all recursive queries, and the proof holds.
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: "main"
+%%% End:
--- a/hardness-app.tex
+++ b/hardness-app.tex
@ -1,933 +0,0 @@
-%!TEX root=./main.tex
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Missing details from Section~\ref{sec:background}}\label{sec:proofs-background}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\subsection{Supplementary Material for~\Cref{prop:expection-of-polynom}}\label{subsec:supp-mat-background}
-To justify the use of $\semNX$-databases, we need to show that we can encode any $\semN$-PDB in this way and that the query semantics over this representation coincides with query semantics over $\semN$-PDB. For that it will be opportune to define representation systems for $\semN$-PDBs.\BG{cite}
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{Definition}[Representation System]\label{def:representation-syste}
-  A representation system for $\semN$-PDBs is a tuple $(\reprs, \rmod)$ where $\reprs$ is a set of representations and $\rmod$ associates with each $\repr \in \reprs$ an $\semN$-PDB $\pdb$. We say that a representation system is \emph{closed} under a class of queries $\qClass$ if for any query $\query \in \qClass$ we have:
-%
-  \[ \rmod(\query(\repr)) = \query(\rmod(\repr)) \]
-
-  A representation system is \emph{complete} if for every $\semN$-PDB $\pdb$ there exists $\repr \in \reprs$ such that:
-%
-  \[ \rmod(\repr) = \pdb \]
-
-\end{Definition}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-As mentioned above we will use $\semNX$-databases paired with a probability distribution as a representation system.
-We refer to such databases as $\semNX$-PDBs and use bold symbols to distinguish them from possible worlds (which are $\semN$-databases).
-Formally, an $\semNX$-PDB is an $\semNX$-database $\idb_{\semNX}$ and a probability distribution $\pd$ over assignments $\assign$ of the variables $\vct{X} = \{X_1, \ldots, X_\numvar\}$  occurring in annotations of $\idb_{\semNX}$ to $\{0,1\}$. Note that an assignment $\assign: \vct{X} \to \{0,1\}^\numvar$ can be represented as a vector $\vct{w} \in \{0,1\}^n$ where $\vct{w}[i]$ records the value assigned to variable $X_i$. Thus, from now on we will solely use such vectors which we refer to as \emph{world vectors} and implicitly understand them to represent assignments. Given an assignment $\assign$ we use $\assign(\pxdb)$ to denote the semiring homomorphism $\semNX \to \semN$ that applies the assignment $\assign$ to all variables of a polynomial and evaluates the resulting expression in $\semN$.\BG{explain connection to homomorphism lifting in K-relations}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{Definition}[$\semNX$-PDBs]\label{def:semnx-pdbs}
-  An $\semNX$-PDB $\pxdb$ over variables $\vct{X} = \{X_1, \ldots, X_n\}$ is a tuple $(\idb_{\semNX},\pd)$ where $\db$ is an $\semNX$-database and $\pd$ is a probability distribution over $\vct{w} \in \{0,1\}^n$. We use $\assign_{\vct{w}}$ to denote the assignment corresponding to $\vct{w} \in \{0,1\}^n$. The $\semN$-PDB $\rmod(\pxdb) = (\idb, \pd')$ encoded by $\pxdb$ is defined as:
-  \begin{align*}
-    \idb      & = \{ \assign_{\vct{w}}(\pxdb) \mid \vct{w} \in  \{0,1\}^n \} \\
-    \forall \db \in \idb: \probOf'(\db) & = \sum_{\vct{w} \in \{0,1\}^n: \assign_{\vct{w}}(\pxdb) = \db} \probOf(\vct{w})
-  \end{align*}
-\end{Definition}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-For instance, consider a $\pxdb$ consisting of a single tuple $\tup_1 = (1)$ annotated with $X_1 + X_2$ with probability distribution $\probOf([0,0]) = 0$, $\probOf([0,1]) = 0$, $\probOf([1,0]) = 0.3$ and $\probOf([1,1]) = 0.7$. This $\semNX$-PDB encodes two possible worlds (with non-zero) probability that we denote using their world vectors.
-%
-\[
-  D_{[0,1]}(\tup_1) = 1 \hspace{0.3cm} \mathbf{and} \hspace{0.3cm} D_{[1,1]}(\tup_1) = 2
-\]
-%
-Importantly, as the following proposition shows, any finite $\semN$-PDB can be encoded as an $\semNX$-PDB and $\semNX$-PDBs are closed under positive relational algebra queries, the class of queries we are interested in in this work.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{Proposition}\label{prop:semnx-pdbs-are-a-}
-$\semNX$-PDBs are a complete representation system for $\semN$-PDBs that is closed under $\raPlus$ queries.
-\end{Proposition}
-
-\subsection{Proof of~\Cref{prop:semnx-pdbs-are-a-}}
-
-  To prove that $\semNX$-PDBs are complete consider the following construction that for any $\semN$-PDB $\pdb = (\idb, \pd)$ produces an $\semNX$-PDB $\pxdb = (\idb_{\semNX}, \pd')$  such that $\rmod(\pxdb) = \pdb$. Let $\idb = \{D_1, \ldots, D_{\abs{\idb}}\}$ and let $max(D_i)$ denote $max_{\tup} D_i(\tup)$. For each world $D_i$ we create a corresponding variable $X_i$.
-%variables $X_{i1}$, \ldots, $X_{im}$ where $m = max(D_i)$.
-In $\idb_{\semNX}$ we assign each tuple $\tup$ the polynomial:
-%
-  \[
- \idb_{\semNX}(\tup) = \sum_{i=1}^{\abs{\idb}} D_i(\tup)\cdot X_{i}
-  \]
-The probability distribution $\pd'$ assigns all world vectors zero probability except for $\abs{\idb}$ world vectors (representing the possible worlds) $\vct{w_i}$. All elements of $\vct{w_i}$ are zero except for the position corresponding to variables $X_{i}$ which is set to $1$. Unfolding definitions it is trivial to show that $\rmod(\pxdb) = \pdb$. Thus, $\semNX$ are a complete representation system.
-The closure under $\raPlus$ queries follows from the fact that an assignment $\vct{X} \to \{0,1\}$ is a semiring homomorphism and that semiring homomorphisms commute with queries over $\semK$-relations.
-
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-Now let us consider computing the expected multiplicity of a tuple $\tup$ in the result of a query $\query$ over an $\semN$-PDB $\pdb$ using the annotation of $\tup$ in the result of evaluating $\query$ over an $\semNX$-PDB $\pxdb$ for which $\rmod(\pxdb) = \pdb$. The expectation of the polynomial $\poly = \query(\pxdb)(\tup)$ based on the probability distribution of $\pxdb$ over the variables in $\pxdb$ is:
-
-\begin{equation}
-  \expct_{\vct{W} \sim \pd}\pbox{\poly(\vct{W})} = \sum_{\vct{w} \in \{0,1\}^n} \assign_{\vct{w}}(\query(\pxdb)(\tup)) \cdot \probOf(\vct{w})\label{eq:expect-q-nx}
-\end{equation}
-
-Since $\semNX$-PDBs $\pxdb$ are a complete representation system for $\semN$-PDBs which are closed under $\raPlus$, computing the expectation of the  multiplicity of a tuple $t$ in the result of an $\raPlus$ query over the $\semN$-PDB $\rmod(\pxdb)$, is the same as computing the expectation of the polynomial $\query(\pxdb)(t)$.
-
-
-\subsection{Proof of~\Cref{prop:expection-of-polynom}}
-\label{subsec:expectation-of-polynom-proof}
-
-We need to prove for $\semN$-PDB $\pdb = (\idb,\pd)$ and $\semNX$-PDB $\pxdb = (\db',\pd')$ where $\rmod(\pxdb) = \pdb$ that $\expct_{\db \sim \pd}[\query(\db)(t)] = \expct_{\vct{W} \sim \pd'}\pbox{\polyForTuple(\vct{W})}$
-By expanding $\polyForTuple$ and the expectation we have:
-\begin{align*}
-\expct_{\vct{W} \sim \pd'}\pbox{\polyForTuple(\vct{W})}
-& = \sum_{\vct{w} \in \{0,1\}^n}\probOf'(\vct{w}) \cdot Q(\pxdb)(t)(\vct{w})\\
-\intertext{From $\rmod(\pxdb) = \pdb$, we have that the range of $\assign_{\vct{w}(\pxdb)}$ is $\idb$, so}
-& = \sum_{\db \in \idb}\;\;\sum_{\vct{w} \in \{0,1\}^n : \assign_{\vct{w}}(\pxdb) = \db}\probOf'(\vct{w}) \cdot Q(\pxdb)(t)(\vct{w})\\
-\intertext{In the inner sum, $\assign_{\vct{w}}(\pxdb) = \db$, so by distributivity of $+$ over $\times$}
-& = \sum_{\db \in \idb}\query(\db)(t)\sum_{\vct{w} \in \{0,1\}^n : \assign_{\vct{w}}(\pxdb) = \db}\probOf'(\vct{w})\\
-\intertext{From the definition of $\probOf$, given $\rmod(\pxdb) = \pdb$, we get}
-& = \sum_{\db \in \idb}\query(\db)(t) \cdot \probOf(D) \quad = \expct_{\db \sim \pd}[\query(\db)(t)]
-\end{align*}
-
-
-\subsection{Supplementary Material for~\Cref{subsec:tidbs-and-bidbs}}\label{subsec:supp-mat-ti-bi-def}
-Two important subclasses of $\semNX$-PDBs that are of interest to us are the bag versions of tuple-independent databases (\tis) and block-independent databases (\bis). Under set semantics, a \ti is a deterministic database $\db$ where each tuple $\tup$ is assigned a probability $\prob_\tup$. The set of possible worlds represented by a \ti $\db$ is all subsets of $\db$. The probability of each world is the product of the probabilities of all tuples that exist with one minus the probability of all tuples of $\db$ that are not part of this world, i.e., tuples are treated  as independent  random events. In a \bi, we also  assign each tuple a  probability,  but  additionally partition  $\db$ into blocks. The possible worlds of a \bi $\db$ are all subsets  of $\db$ that contain at most one tuple  from each block.  Note then that the tuples sharing the same block are disjoint, and the sum of the probabilitites of all the tuples in the same block $\block$ is $1$.  The probability of such a world is the product of the probabilities of all tuples present in the world.  %and one minus the sum of the probabilities of all tuples from blocks for which no  tuple is present in the world.
-For bag \tis and \bis, we define the probability of a tuple to  be the probability that the tuple exists with multiplicity at least $1$.
-
-\AH{This part \emph{below} needs more work if we include it.}
-Note that the main difference to the standard definitions of \tis and \bis is that we define them as subclasses of $\semNX$-PDBs and that we use bag semantics. Even though tuples cannot occur more than once in the input \ti or \bi, they can occur with a multiplicity larger than one in the result of a query. Since in \tis and \bis, there is a one-to-one correspondence between tuples in the database and variables, we can interpret a vector $\vct{w} \in \{0,1\}^n$ as denoting which tuples exist in the possible world $\assign_{\vct{w}}(\pxdb)$ (the ones where $\vct{w}[i] = 1$). Denote the vector $\vct{p}$ to be a vector whose elements are the individual probabilities $\prob_i$ of each tuple $\tup_i$.  Let $\pd^{(\vct{p})}$ denote the distribution induced by $\vct{p}$.
-
-%
-\begin{align}\label{eq:tidb-expectation}
-\expct_{\vct{W} \sim \pd^{(\vct{p})}}\pbox{\poly(\vct{W})} = \sum\limits_{\vct{w} \in \{0, 1\}^\numvar} \poly(\vct{w})\prod_{\substack{i \in [\numvar]\\ s.t. \wElem_i = 1}}\prob_i \prod_{\substack{i \in [\numvar]\\s.t. w_i = 0}}\left(1 - \prob_i\right).
-\end{align}
-%
-\BG{Do we need the BIDB formula?}
-\BG{Oliver's conjecture: Bag-\tis + Q can express any finite bag-PDB:
-A well-known result for set semantics PDBs is that while not all finite PDBs can be encoded as \tis, any finite PDB can be encoded using a \ti and a query. An analog result holds in our case: any finite $\semN$-PDB can be encoded as a bag \ti and a query (WHAT CLASS? ADD PROOF)
-}
-
-\subsection{~\Cref{lem:pre-poly-rpoly}}\label{app:subsec-pre-poly-rpoly}
-\begin{Lemma}\label{lem:pre-poly-rpoly}
-If
-$\poly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}} \cdot \prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i^{d_i}$
-then
-$\rpoly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \eta} q_{\vct{d}}\cdot\prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i$% \;\;\;  for some $\eta \subseteq \{0,\ldots, B\}^\numvar$
-\end{Lemma}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{proof}[Proof for~\Cref{lem:pre-poly-rpoly}]
-Follows by the construction of $\rpoly$ in \cref{def:reduced-bi-poly}. \qed
-\end{proof}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\subsection{Proposition ~\ref{proposition:q-qtilde}}\label{app:subsec-prop-q-qtilde}
-\noindent Note the following fact:
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{Proposition}\label{proposition:q-qtilde} For any \bi-lineage polynomial $\poly(X_1, \ldots, X_\numvar)$ and all $\vct{w} \in \eta$, it holds that
-$%  \[
-    \poly(\vct{w}) = \rpoly(\vct{w}).
-$%    \]
-\end{Proposition}
-
-\begin{proof}[Proof for~\Cref{proposition:q-qtilde}]
-Note that any $\poly$ in factorized form is equivalent to its \abbrSMB expansion.  For each term in the expanded form, further note that for all $b \in \{0, 1\}$ and all $e \geq 1$, $b^e = b$. \qed
-\end{proof}
-
-
-
-\subsection{Proof for Lemma ~\ref{lem:exp-poly-rpoly}}
-Let $\poly$ be the generalized polynomial, i.e., the polynomial of $\numvar$ variables with highest degree $= B$: %, in which every possible monomial permutation appears,
-\[\poly(X_1,\ldots, X_\numvar) = \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar X_i^{d_i}\].
-
-
-Then, in expectation we have
-\begin{align}
-\expct_{\vct{W}}\pbox{\poly(\vct{W})} &= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \expct_{\vct{w}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar w_i^{d_i}}\label{p1-s1}\\
-&= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i^{d_i}}\label{p1-s2}\\
-&= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i}\label{p1-s3}\\
-&= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\
-&= \rpoly(\prob_1,\ldots, \prob_\numvar)\label{p1-s5}
-\end{align}
-
-In steps \cref{p1-s1} and \cref{p1-s2}, by linearity of expectation (recall the variables are independent), the expecation can be pushed all the way inside of the product.  In \cref{p1-s3}, note that $w_i \in \{0, 1\}$ which further implies that for any exponent $e \geq 1$, $w_i^e = w_i$.  Next, in \cref{p1-s4} the expectation of a tuple is indeed its probability.
-
-Finally, observe \Cref{p1-s5} by construction in \Cref{lem:pre-poly-rpoly}, that $\rpoly(\prob_1,\ldots, \prob_\numvar)$ is exactly the product of probabilities of each variable in each monomial across the entire sum.
-
-
-\subsection{Proof For Corollary ~\ref{cor:expct-sop}}
-Note that \cref{lem:exp-poly-rpoly} shows that $\expct\pbox{\poly} =$ $\rpoly(\prob_1,\ldots, \prob_\numvar)$.  Therefore, if $\poly$ is already in \abbrSMB form, one only needs to compute $\poly(\prob_1,\ldots, \prob_\numvar)$ ignoring exponent terms (note that such a polynomial is $\rpoly(\prob_1,\ldots, \prob_\numvar)$), which indeed has $O(\smbOf{|\poly|})$ computations.\qed
-
-
-
-\section{Missing details from Section~\ref{sec:hard}}
-\label{app:single-mult-p}
-
-We use~\Cref{lem:qEk-multi-p} to prove~\Cref{thm:mult-p-hard-result}:
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\subsection{Proof of Theorem~\ref{thm:mult-p-hard-result}}
-For the sake of contradiction, let us assume we can solve our problem in $f(\kElem)\cdot m^c$ time for some absolute constant $c$. Then given a graph $G$ we can compute the query polynomial or rather, expression tree representation of $\rpoly_G^\kElem$ (in the obvious way) in $O(km)$ time. Then after we run our algorithm on $\rpoly_G^\kElem$, we get $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for $0\leq i\leq 2\kElem$ in additional $f(\kElem)\cdot m^c$ time. \Cref{lem:qEk-multi-p} then computes the number of $k$-matchings in $G$ in $O(\kElem^3)$ time. Thus, overall we have an algorithm for computing the number of $k$-matchings in time
-\begin{align*}
- O(km) + f(\kElem)\cdot m^c + O(\kElem^3)
-&\le \inparen{O(\kElem^3) + f(\kElem)}\cdot m^{c+1} \\
-&\le \inparen{O(\kElem^3) + f(\kElem)}\cdot n^{2c+2},
-\end{align*}
-which contradicts \Cref{thm:k-match-hard}.
-
-
-\subsection{Proofs of~\cref{eq:1e}-\cref{eq:3p-3tri}}
-\label{app:easy-counts}
-
-The proofs for \cref{eq:1e},~\cref{eq:2p} and~\cref{eq:3s} are immediate.
-
-\begin{proof}[Proof of \cref{eq:2m}]
-For edge $(i, j)$ connecting arbitrary vertices $i$ and $j$, finding all other edges in $G$ disjoint to $(i, j)$ is equivalent to finding all edges that are not connected to either vertex $i$ or $j$.  The number of such edges is $m - d_i - d_j + 1$, where we add $1$ since edge $(i, j)$ is removed twice when subtracting both $d_i$ and $d_j$.  Since the summation is iterating over all edges such that a pair $\left((i, j), (k, \ell)\right)$ will also be counted as $\left((k, \ell), (i, j)\right)$, division by $2$ then eliminates this double counting.  Note that $m$ and $d_i$ for all $i \in V$ can be computed in one pass over the set of edges by simply maintaining counts for each quantity.  Finally, the summation is also one traversal through the set of edges where each operation is either a lookup ($O(1)$ time) or an addition operation (also $O(1)$) time.
-\qed
-\end{proof}
-
-\begin{proof}[Proof of \cref{eq:2pd-3d}]
-\Cref{eq:2pd-3d} is true for similar reasons.  For edge $(i, j)$, it is necessary to find two additional edges, disjoint or connected.  As in our argument for \cref{eq:2m}, once the number of edges disjoint to $(i, j)$ have been computed, then we only need to consider all possible combinations of two edges from the set of disjoint edges, since it doesn't matter if the two edges are connected or not.    Note, the factor $3$ of $\threedis$ is necessary to account for the triple counting of $3$-matchings.  It is also the case that, since the two path in $\twopathdis$ is connected, that there will be no double counting by the fact that the summation automatically disconnects the current edge, meaning that a two matching at the current vertex will not be counted.  The sum over all such edge combinations is precisely then $\numocc{G}{\twopathdis} + 3\numocc{G}{\threedis}$.  Note that all $d_i$ and $d_i - 3$ factorials can be computed in $O(m)$ time, and then each combination $\binom{n}{3}$ can be performed with constant time operations, yielding the claimed $O(m)$ run time.
-\qed
-\end{proof}
-\begin{proof}[Proof of \cref{eq:3p-3tri}]
-To  compute $\numocc{G}{\threepath}$, note that for an arbitrary edge $(i, j)$, a 3-path exists for edge pair $(i, \ell)$ and $(j, k)$ where $i, j, k, \ell$ are distinct.  Further, the quantity $(d_i - 1) \cdot (d_j - 1)$ represents the number of 3-edge subgraphs with middle edge $(i, j)$ and outer edges $(i, \ell), (j, k)$ such that $\ell \neq j$ and $k \neq i$.  When $k = \ell$, the resulting subgraph is a triangle, and when $k \neq \ell$, the subgraph is a 3-path.  Summing over all edges (i, j) gives \cref{eq:3p-3tri} by observing that each triangle is counted thrice, while each 3-path is counted just once.  For reasons similar to \cref{eq:2m}, all $d_i$ can be computed in $O(m)$ time and each summand can then be computed in $O(1)$ time, yielding an overall $O(m)$ run time.
-\qed
-\end{proof}
-
-\subsection{Proofs for~\Cref{lem:3m-G2}-\Cref{lem:lin-sys}}\label{subsec:proofs-struc-lemmas}
-Before proceeding, let us introduce a few more helpful definitions.
-
-\begin{Definition}\label{def:ed-nota}
-For $\ell > 1$, we use $E_\ell$ to denote the set of edges in $\graph{\ell}$.  For any graph $\graph{\ell}$, its edges are denoted by the a pair $(e, b)$, such that $b \in \{0,\ldots, \ell-1\}$ and $e\in E_1$, where $(e,0),\dots,(e,\ell-1)$ is the $\ell$-path that replaces the edge $e$.
-\end{Definition}
-
-\begin{Definition}[$\eset{\ell}$]
-Given an arbitrary subgraph $\sg{1}$ of $\graph{1}$, let $\eset{1}$ denote the set of edges in $\sg{1}$.  Define then $\eset{\ell}$ for $\ell > 1$ as the set of edges in the generated subgraph $\sg{\ell}$ (i.e. when we apply \Cref{def:Gk} to $\sg{1})$.
-\end{Definition}
-
-For example, consider $\sg{1}$ with edges $\eset{1} = \{e_1\}$.  Then the edge set of $\sg{2}$ is defined as $\eset{2} = \{(e_1, 0), (e_1, 1)\}$.
-\begin{Definition}\label{def:ed-sub}
-Let $\binom{E}{t}$ denote the set of subsets in $E$ with exactly $t$ edges.  In a similar manner, $\binom{E}{\leq t}$ is used to mean the subsets of $E$ with $t$ or fewer edges.
-\end{Definition}
-
-The following function $f_\ell$  is a mapping from every $3$-edge shape in $\graph{\ell}$ to its `projection' in $\graph{1}$.
-\begin{Definition}\label{def:fk}
-Let $f_\ell: \binom{E_\ell}{3} \mapsto \binom{E_1}{\leq3}$ be defined as follows.  For any element $s \in \binom{E_\ell}{3}$ such that $s = \pbrace{(e_1, b_1), (e_2, b_2), (e_3, b_3)}$, define:
-\[ f_\ell\left(\pbrace{(e_1, b_1), (e_2, b_2), (e_3, b_3)}\right) = \pbrace{e_1, e_2, e_3}.\]
-\end{Definition}
-
-
-\begin{Definition}[$f_\ell^{-1}$]\label{def:fk-inv}
-For an arbitrary subgraph $\sg{1}$ of $\graph{1}$ with at most $m \leq 3$ edges, the inverse function $f_\ell^{-1}: \binom{E_1}{\leq 3}\mapsto 2^{\binom{E_\ell}{3}}$ takes $\eset{1}$ and outputs the set of all elements $s \in \binom{\eset{\ell}}{3}$ such that %each 3-edge set $T\in f_\ell^{-1}(S)$ is mapped to the input set $\eset{1}$ by $f_\ell$, i.e. 
-$f_\ell(s) = \eset{1}$.  %The set returned by $f_\ell^{-1}$ is of size $h$, where $h$ depends on $\abs{s^{(1)}}$, such that $h \leq \binom{\abs{s^{(1)}} \cdot \ell}{3}$.
-\end{Definition}
-
-Note, importantly, that when we discuss $f_\ell^{-1}$, that each \textit{edge} present in $\eset{1}$ must have an edge in $s\in f_\ell^{-1}(S)$ that projects down to it.  In particular, if $|\eset{1}| = 3$, then it must be the case that each $s\in f_\ell^{-1}(S)$ consists of the following set of edges: $\{ (e_i, b), (e_j, b'), (e_m, b'') \}$, where $i,j$ and $m$ are distinct.  
-
-We first note that $f_\ell$ is well-defined:
-\begin{Lemma}\label{lem:fk-func}
-$f_\ell$ is a function.
-\end{Lemma}
-
-\begin{proof}\label{subsubsec:proof-fk}
-Note that $f_\ell$ is properly defined.  For any $S \in \binom{E_\ell}{3}$, $|f(S)| \leq 3$, since it has to be the case that any subset of $3$ edges in $E_\ell$ will map to at most three edges in $E_1$.  All mappings are in the required range.  Then,  since for any $b \in \{0,\ldots, \ell-1\}$ the map $(e, b) \mapsto e$ is a function and has exactly one mapping, which %` mapping for which $(e, b)$ maps to no other edge than $e$, and this
-implies that $f_\ell$ is a function.\qed
-\end{proof}
-
-We are now ready to prove the structural lemmas. Note that $f_\ell$ maps subsets of three edges in $\graph{\ell}$ to a subset of at most three edges in $E_1$. To prove the structural lemmas, we will use the map $f_\ell^{-1}$. In particular, to count the number of occurrences of $\tri,\threepath,\threedis$ in $\graph{\ell}$ we count for each $S\in\binom{E_1}{\le 3}$, how many of $\tri/\threepath/\threedis$ subgraphs appear in $f_\ell^{-1}(S)$.
-
-
-\subsubsection{Proof of Lemma \ref{lem:3m-G2}}
-\begin{proof}[Proof of \Cref{lem:3m-G2}]
-For each subset  $\eset{1}\in \binom{E_1}{\le 3}$, we count the number of $3$-matchings in the $3$-edge subgraphs of $\graph{2}$ in $f_2^{-1}(\eset{1})$.  We first consider the case of $\eset{1} \in \binom{E_1}{3}$, where $\eset{1}$ is composed of the edges $e_1, e_2, e_3$ and $f_2^{-1}(\eset{1})$ is the set of all $3$-edge subsets $s \in \{(e_1, 0), (e_1, 1), (e_2, 0), (e_2, 1),$ $(e_3, 0), (e_3, 1)\}$ such that $f_\ell(s) = \{e_1, e_2, e_3\}$.
-
-We do a case analysis based on the subgraph $\sg{1}$ induced by $\eset{1}$ (denoted $\eset{1} \equiv \sg{1}$):
-\begin{itemize}
-	\item $3$-matching ($\threedis$)
-\end{itemize}
-When $\sg{1}$ is isomorphic to $\threedis$, it is the case that edges in $\eset{2}$ are {\em not} disjoint only for the pairs $(e_i, 0), (e_i, 1)$ for $i\in \{1,2,3\}$.  All choices for $b_1, b_2, b_3 \in \{0, 1\}$, $(e_1, b_1), (e_2, b_2), (e_3, b_3)$ will compose a 3-matching.  One can see that we have a total of two possible choices for $b_i$ for each edge $e_i$ in $\graph{1}$ yielding $2^3 = 8$ possible 3-matchings in $f_2^{-1}(\eset{1})$.
-
-\begin{itemize}
-	\item Disjoint Two-Path ($\twopathdis$)
-\end{itemize}
-For $\sg{1}$ isomorphic to $\twopathdis$ edges $e_2, e_3$ form a $2$-path with $e_1$ being disjoint.  This means that $(e_2, 0), (e_2, 1), (e_3, 0), (e_3, 1)$ form a $4$-path while $(e_1, 0), (e_1, 1)$ is its own disjoint $2$-path.  We can only pick either $(e_1, 0)$ or $(e_1, 1)$ for $f_2^{-1}(\eset{1})$, and then we need to pick a $2$-matching from $e_2$ and $e_3$.  Note that the four path allows there to be 3 possible 2 matchings, specifically,
-\begin{equation*}
-\pbrace{(e_2, 0), (e_3, 0)}, \pbrace{(e_2, 0), (e_3, 1)}, \pbrace{(e_2, 1), (e_3, 1)}.
-\end{equation*}
-
-Since these two selections can be made independently, there are $2 \cdot 3 = 6$ \emph{distinct} $3$-matchings in $f_2^{-1}(\eset{1})$.
-
-\begin{itemize}
-	\item $3$-star ($\oneint$)
-\end{itemize}
-When $\sg{1}$ is isomorphic to $\oneint$, the inner edges $(e_i, 1)$ of $\eset{2}$ are all connected, and the outer edges $(e_i, 0)$ are all disjoint.  Note that for a valid 3 matching it must be the case that at most one inner edge can be part of the set of disjoint edges.  For the case of when exactly one inner edge is chosen, there exist $3$ possiblities, based on which inner edge is chosen.  Note that if $(e_i, 1)$ is chosen, the matching has to choose $(e_j, 0)$ for $j \neq i$ and $(e_{j'}, 0)$ for $j' \neq i, j' \neq j$.  The remaining possible 3-matching occurs when all 3 outer edges are chosen.  Thus, there are four 3-matchings in $f_2^{-1}(\eset{1})$.
-
-\begin{itemize}
-	\item $3$-path ($\threepath$)
-\end{itemize}
-When $\sg{1}$ is isomorphic to $\threepath$ it is the case that all edges beginning with $e_1$ and ending with $e_3$ are successively connected.  This means that the edges of $\eset{2}$ form a $6$-path.  For a $3$-matching to exist in $f_2^{-1}(\eset{1})$, we cannot pick both $(e_i,0)$ and $(e_i,1)$ or both $(e_i, 1)$ and $(e_j, 0)$ where $j = i + 1$. % there must be at least one edge separating edges picked from a sequence.
- There are four such possibilities: $\pbrace{(e_1, 0), (e_2, 0), (e_3, 0)}$, $\pbrace{(e_1, 0), (e_2, 0), (e_3, 1)}$, $\pbrace{(e_1, 0), (e_2, 1), (e_3, 1)},$ $\pbrace{(e_1, 1), (e_2, 1),  (e_3, 1)}$, a total of four 3-matchings in $f_2^{-1}(\eset{1})$.
-
-\begin{itemize}
-	\item Triangle ($\tri$)
-\end{itemize}
-For $\sg{1}$ isomorphic to $\tri$, note that it is the case that the edges in $\eset{2}$ are connected in a successive manner, but this time in a cycle, such that $(e_1, 0)$ and $(e_3, 1)$ are also connected.  While this is similar to the discussion of the three path above, the first and last edges are not disjoint, since they are connected.  This rules out both subsets of $(e_1, 0), (e_2, 0), (e_3, 1)$ and $(e_1, 0), (e_2, 1), (e_3, 1)$, yielding two 3-matchings.
-
-Let us now consider when $\eset{1} \in \binom{E_1}{\leq 2}$, i.e. patterns among
-\begin{itemize}
-	\item $2$-matching ($\twodis$), $2$-path ($\twopath$), $1$ edge ($\ed$)
-\end{itemize}
-When $|\eset{1}| = 2$, we can only pick one from each of two pairs, $\pbrace{(e_1, 0), (e_1, 1)}$ and $\pbrace{(e_2, 0), (e_2, 1)}$.  This implies that a $3$-matching cannot exist in $f_2^{-1}(\eset{1})$.  The same argument holds for $|\eset{1}| = 1$, where we can only pick one edge from the pair $\pbrace{(e_1, 0), (e_1, 1)}$.  Trivially, no $3$-matching exists in $f_2^{-1}(\eset{1})$.
-
-Observe that all of the arguments above focused solely on the subgraph $\sg{1}$ is isomorphmic.  In other words, all $\eset{1}$ of a given ``shape'' yield the same number of $3$-matchings in $f_2^{-1}(\eset{1})$, and this is why we get the required identity using the above case analysis.
-\end{proof}
-
-%
-%\subsubsection{Proof of~\cref{lem:3m-G3}}
-%
-%For any $\eset{1} \in \binom{E_1}{\leq3}$, we again then count the number of $3$-matchings in $f_3^{-1}(\eset{1})$ via a case analysis:
-%
-%
-%
-%\begin{itemize}
-%	\item $1$ edge ($\ed$)
-%\end{itemize}
-%When $\eset{1} \equiv \ed$, $f_3^{-1}(\eset{1})$ has one subset, $(e_1, 0), (e_1, 1), (e_1, 2)$, which clearly does not contain a $3$-matching.
-%\begin{itemize}
-%	\item $2$-path ($\twopath$)
-%\end{itemize}
-%When $\eset{1} \equiv \twopath$ and now we have all edges in $\eset{3}$ form a $6$-path, and similar to the discussion in the proof of \cref{lem:3m-G2} (when $\eset{1} \equiv \threepath$ in $\graph{2}$), this leads to four $3$-matchings in $f_3^{-1}(\eset{1})$.
-%
-%\begin{itemize}
-%	\item $2$-matching ($\twodis$)
-%\end{itemize}
-%For $\eset{1} \equiv \twodis$, all edges of $\eset{3}$ are predicated on the fact that $(e_i, b)$ is disjoint with $(e_j, b)$ for $i \neq j\in \{1,2\}$ and $b \in \{0, 1, 2\}$.  Pick an aribitrary $e_i$ and note, that $(e_i, 0), (e_i, 2)$ is a $2$-matching, which can combine with any of the $3$ edges in $(e_j, 0), (e_j, 1), (e_j, 2)$ again for $i \neq j$.  Since the selections are independent, it follows that there exist $2 \cdot 3 = 6$ $3$-matchings in $f_3^{-1}(\eset{1})$.
-%
-%Now, we consider the 3-edge subgraphs of $\graph{1}$, starting with $\eset{1} = \tri$.
-%\begin{itemize}
-%	\item Triangle ($\tri$)
-%\end{itemize}
-%As discussed in proof of \cref{lem:3m-G2} for the case of $\tri$, the edges of $\eset{3}$ are a cyclic sequence, and we must be careful not to pair $(e_1, 0)$ with $(e_3, 2)$ in a $3$-matching.  For any $T \in f_3^{-1}(\eset{1})$, $T$ is a $3$-matching when we have that for the edges $(e_1, b_1), (e_2, b_2), (e_3, b_3)$ where $b_1, b_2, b_3 \in \{0, 1, 2\}$, such that, for all $i \in [3]$ it is the case that if $b_i = 2$ then $b_{i \mod{3} + 1} \neq 0$.  Iterating through all possible choices for $e_1$, we have
-%\begin{itemize}
-%	\item For \textsc{$(e_1, 0)$}, there are five possibilities:
-%		\begin{itemize}
-%			\item $\pbrace{(e_1, 0), (e_2, 0), (e_3, 0)}$
-%			\item $\pbrace{(e_1, 0), (e_2, 0), (e_3, 1)}$
-%			\item $\pbrace{(e_1, 0), (e_2, 1), (e_3, 0)}$
-%			\item $\pbrace{(e_1, 0), (e_2, 1), (e_3, 1)}$
-%			\item $\pbrace{(e_1, 0), (e_2, 2), (e_3, 1)}$
-%		\end{itemize}
-%	\item For \textsc{$(e_1, 1)$}, there are eight possibilities:
-%		\begin{itemize}
-%			\item $\pbrace{(e_1, 1), (e_2, 0), (e_3, 0)}, \ldots\pbrace{(e_1, 1), (e_2, 1), (e_3, 2)}$
-%			\item $\pbrace{(e_1, 1), (e_2, 2), (e_3, 1)}$
-%			\item $\pbrace{(e_1, 1), (e_2, 2), (e_3, 2)}$
-%		\end{itemize}
-%	\item For \textsc{$(e_1, 2)$}, there are five possibilities:
-%		\begin{itemize}
-%			\item $\pbrace{(e_1, 2), (e_2, 1), (e_3, 0)}$
-%			\item $\pbrace{(e_1, 2), (e_2, 1), (e_3, 1)}$
-%			\item $\pbrace{(e_1, 2), (e_2, 1), (e_3, 2)}$
-%			\item $\pbrace{(e_1, 2), (e_2, 2), (e_3, 1)}$
-%			\item $\pbrace{(e_1, 2), (e_2, 2), (e_3, 2)}$
-%		\end{itemize}
-%\end{itemize}
-%for a total of $18$ $3$-matchings in $f_3^{-1}(\eset{1})$.
-%
-%\begin{itemize}
-%	\item $3$-path ($\threepath$)
-%\end{itemize}
-%When $\eset{1} \equiv \threepath$ and all edges in $\eset{3}$ are successively connected to form a $9$-path.  Since $(e_1, 0)$ is disjoint to $(e_3, 2)$, both of these edges can exist in a $3$-matching.  This relaxation yields 3 other 3-matchings that couldn't be counted in the case of the $\eset{1} = \tri$, namely
-%\begin{equation*}
-%\pbrace{(e_1, 0), (e_2, 0), (e_3, 2)},\pbrace{(e_1, 0), (e_2, 1), (e_3, 2)}, \pbrace{(e_1, 0), (e_2, 2), (e_3, 2)}.
-%\end{equation*}
-%There are therefore $18 + 3 = 21$ $3$-matchings in $f_3^{-1}(\eset{1})$.
-%
-%\begin{itemize}
-%	\item Disjoint Two-Path ($\twopathdis$)
-%\end{itemize}
-%Assume $\eset{1} = \twopathdis$, then the edges of $\eset{3}$ have successive connectivity from $(e_1, 0)$ through $(e_1, 2)$, and successive connectivity from $(e_2, 0)$ through $(e_3, 2)$.  It is the case that the edges in $\eset{3}$ form a 6-path with a disjoint 3-path.  There exist $8$ distinct two matchings (with at least one $(e_2,\cdot)$ and at least one $(e_3,\cdot)$ edge) in the $6$-path $(e_2, 0),\ldots, (e_3, 2)$ of the form
-%\begin{equation*}
-%\pbrace{(e_2, 0), (e_3, 0)},\ldots, \pbrace{(e_2, 1), (e_3, 2)}, \pbrace{(e_2, 2), (e_3, 1)}, \pbrace{(e_2, 2), (e_3, 2)}.
-%\end{equation*}
-%These matchings can be paired independently with either of the $3$ remaining edges of $(e_1, b)$, for a total of $8 \cdot 3 = 24$ many 3-matchings in $f_3^{-1}(\eset{1})$.
-%
-%\begin{itemize}
-%	\item $3$-star ($\oneint$)
-%\end{itemize}
-%When $\eset{1} \equiv \oneint$, the edges of $\eset{3}$ are restricted such that the outer edges $(e_i, 0)$ are disjoint from another, the middle edges $(e_i, 1)$ are also disjoint to each other, and only the inner edges $(e_i, 2)$ intersect with one another at exactly one common endpoint.  To be precise, any outer edge $(e_i, 0)$ is disjoint to every middle edge $(e_j, 1)$ for $i \neq j$.  As previously mentioned in the proof of \cref{lem:3m-G2}, at most one inner edge may appear in a $3$-matching.  For arbitrary inner edge $(e_i, 2)$, we have $4$ combinations of the middle and outer edges of $e_j, e_m$, where $i \neq j \neq m$.  These choices are independent and we have $4 \cdot 3 = 12$ many 3-matchings.  We are not done yet, as we need to consider the middle and outer edge combinations.  Notice that for each $e_i$, we have $2$ choices, i.e. a middle or outer edge, contributing $2^3 = 8$ additional $3$-matchings, for a total of $8 + 12 = 20$ many $3$-matchings in $f_3^{-1}(\eset{1})$.
-%
-%\begin{itemize}
-%	\item $3$-matching ($\threedis$)
-%\end{itemize}
-%When $\eset{1} \equiv \threedis$ subgraph, we have the case that all edges in $\eset{3}$ have the property that $(e_i, b_i)$ is disjoint to $(e_j, b_j)$ for $i \neq j$.  For each $e_i$, there are then $3$ choices, independent of each other, and it results that there are $3^3 = 27$ many 3-matchings in $f_3^{-1}(\eset{1})$.
-%
-%All of the observations above focused only on the shape of $\eset{1}$, and since we see that for fixed $\eset{1}$, we have a fixed number of $3$-matchings, this implies the identity.
-%
-%\subsubsection{Proof of~\cref{lem:3p-G2}}
-%
-%For $\mathcal{P} \in f_2^{-1}\inparen{ \eset{2}}$ such that $\mathcal{P} $ is a $3$-path, it \textit{must} be the case by definition of $f_2$ that (i)eall edges in $f_2(\mathcal{P} )$ have at least one mapping from an edge in $\mathcal{P} $ and recall that (ii) $\mathcal{P} $ is connected.  These constraint rules out every pattern $\eset{1}$ consisting of $3$ edges (it can be verified that in each three-edge pattern at least one of (i) or (ii) is violated), as well as when $\eset{1} = \twodis$. For $\eset{1} = \ed$, note that $\eset{1}$ doesn't have enough edges to have any output in $f_2^{-1}(\eset{1})$, i.e., there exists no $\eset{1} \in \binom{E_2}{3}$ such that $f_2(\mathcal{P} ) = \eset{1}$.  The only surviving pattern is $\eset{1} \equiv \twopath$, where the edges of $\eset{2}$ have successive connectivity from $(e_1, 0)$ to $(e_2, 1)$.  There are then two $3$-paths sharing edges $e_1$ and $e_2$ in $f_2^{-1}(\eset{1}), \pbrace{(e_1, 0), (e_1, 1), (e_2, 0)} \text{ and }\{(e_1, 1)$,$ (e_2, 0), (e_2, 1)\}$.
-%
-%All of the observations above focused only on the shape of $\eset{1}$, and since we see that for fixed $\eset{1}$, we have a fixed number of $3$-paths, this implies the identity.
-%
-%
-%\subsubsection{Proof of~\cref{lem:3p-G3}}
-%The argument follows along the same lines as in the proof of \cref{lem:3p-G2}.  Given $\mathcal{P} \in f_3^{-1}\inparen{\eset{1}}$, it \textit{must} be that every edge in $f_3(\mathcal{P})$ has at least one edge in $\mathcal{P}$ mapped to it (and $\mathcal{P}$ is connected).  Notice again that this cannot be the case for any $\eset{1} \in \binom{E_1}{3}$, nor is it the case when $\eset{1} = \twodis$.  This leaves us with two patterns, $\eset{1} = \twopath$ and $\eset{1} = \ed$.  For the former, it is the case that we have two $3$-paths across $e_1$ and $e_2$, $\pbrace{(e_1, 1), (e_1, 2), (e_2, 0)}$ and $\pbrace{(e_1, 2), (e_2, 0), (e_2, 1)}$.  For the latter pattern $\ed$, it it trivial to see that an edge in $\graph{1}$ becomes a $3$-path in $\graph{3}$, and this proves the identity.
-%
-%All of the observations above focused only on the shape of $\eset{1}$, and since we see that for fixed $\eset{1}$, we have a fixed number of $3$-paths, this implies the identity.
-
-
-\subsubsection{Proof of~\cref{lem:tri}}
-\begin{proof}[Proof of \Cref{lem:tri}]
-The number of triangles in $\graph{\ell}$ for $\ell \geq 2$ will always be $0$ for the simple fact that all cycles in $\graph{\ell}$ will have at least six edges.
-\end{proof}
-
-\input{lin_sys}
-
-\section{Missing Details from Section~\ref{sec:algo}}\label{sec:proofs-approx-alg}
-
-Before proving~\Cref{lem:mon-samp}, we use it to argue our main result,~\Cref{lem:approx-alg}:
-\subsection{Proof of Theorem \ref{lem:approx-alg}}\label{sec:proof-lem-approx-alg}
-
-Set $\mathcal{E}=\approxq(\revision{\circuit}, (\prob_1,\dots,\prob_\numvar),$ $\conf, \error')$, where
-\[\error' = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\revision{\circuit}}(1,\ldots, 1)},\]
- which achieves the claimed accuracy bound on $\mathcal{E}$.
-
-The claim on the runtime follows since
-\begin{align*}
-\frac 1{\inparen{\error'}^2}\cdot \log\inparen{\frac 1\conf}=&\frac{\log{\frac{1}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)}{\abs{\revision{\circuit}}(1,\ldots, 1)}\right)^2}\\
-= &\frac{\log{\frac{1}{\conf}}\cdot \abs{\revision{\circuit}}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)},
-\end{align*}
-%and the runtime then follows, thus upholding ~\cref{lem:approx-alg}.
-which completes the proof.
-
-We now return to the proof of~\Cref{lem:mon-samp}:
-\subsection{Proof of Theorem \ref{lem:mon-samp}}\label{app:subsec-th-mon-samp}
-Consider now the random variables $\randvar_1,\dots,\randvar_\numvar$, where each $\randvar_i$ is the value of $\vari{Y}_{\vari{i}}$ after~\Cref{alg:mon-sam-product} is executed. In particular, note that we have
-\[Y_i= \onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \prod_{X_i\in \var\inparen{v}} p_i,\]
-where the indicator variable handles the check in~\Cref{alg:check-duplicate-block}
-Then for random variable $\randvar_i$, it is the case that
-\begin{align*}
-\expct\pbox{\randvar_i} &= \sum\limits_{(\monom, \coef) \in \expansion{\revision{\circuit}} }\frac{\onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot c\cdot\prod_{X_i\in \var\inparen{v}} p_i }{\abs{\revision{\circuit}}(1,\dots,1)} \\
-&= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\revision{\circuit}}(1,\ldots, 1)},
-\end{align*}
-where in the first equality we use the fact that $\vari{sgn}_{\vari{i}}\cdot \abs{\coef}=\coef$ and the second equality follows from~\cref{eq:tilde-Q-bi} with $X_i$ substituted by $\prob_i$.
-
-Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$.  It is also true that
-
-\[\expct\pbox{\empmean}  %\expct\pbox{ \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i}
-= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\expct\pbox{\randvar_i}
-= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\revision{\circuit}}(1,\ldots, 1)}.\]
-
-Hoeffding's inequality states that if we know that each $\randvar_i$ (which are all independent) always lie in the intervals $[a_i, b_i]$, then it is true that
-\begin{equation*}
-\probOf\left(\left|\empmean - \expct\pbox{\empmean}\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{\sum_{i = 1}^{\samplesize}(b_i -a_i)^2}\right)}.
-\end{equation*}
-
-Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is multiplied with $O(k)$ $\prob_i\in [0, 1]$, which implies the range for each $\randvar_i$ is $[-1, 1]$.
-Using Hoeffding's inequality, we then get:
-\begin{equation*}
-\probOf\pbox{~\left| \empmean - \expct\pbox{\empmean} ~\right| \geq \error} \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{2^2 \samplesize}\right)} = 2\exp{\left(-\frac{\samplesize\error^2}{2 }\right)}\leq \conf,
-\end{equation*}
-where the last inequality follows from our choice of $\samplesize$ in~\Cref{alg:mon-sam-global2}.
-
-This concludes the proof for the first claim of theorem ~\ref{lem:mon-samp}.
-
-\paragraph{Run-time Analysis}
-The runtime of the algorithm is dominated by~\Cref{alg:mon-sam-onepass} (which by~\Cref{lem:one-pass} takes time $O(\revision{\size(\circuit)})$) and the $\samplesize$ iterations of the loop in~\Cref{alg:sampling-loop}. Each iteration's run time is dominated by the call to~\Cref{alg:mon-sam-sample} (which by~\Cref{lem:sample} takes $O(\log{k} \cdot k \cdot \revision{\depth(\circuit)})$
-) and~\Cref{alg:check-duplicate-block}, which by the subsequent argument takes $O(k\log{k})$ time. We sort the $O(k)$ variables by their block IDs and then check if there is a duplicate block ID or not. Adding up all the times discussed here gives us the desired overall runtime.
-
-\subsection{Proof of~\Cref{cor:approx-algo-const-p}}
-The result follows by first noting that by definition of $\gamma$, we have
-%\AH{Just wondering why you use $\geq$ as opposed to $=$?}
-%\AR{Ah, right-- fixed}
-\[\rpoly(1,\dots,1)= (1-\gamma)\cdot \abs{\revision{\circuit}}(1,\dots,1).\]
-Further, since each $\prob_i\ge \prob_0$ and $\poly(\vct{X})$ (and hence $\rpoly(\vct{X})$) has degree at most $k$, we have that
-\[ \rpoly(1,\dots,1) \ge \prob_0^k\cdot \rpoly(1,\dots,1).\]
-The above two inequalities implies $\rpoly(1,\dots,1) \ge \prob_0^k\cdot (1-\gamma)\cdot \abs{\revision{\circuit}}(1,\dots,1)$.
-%\AH{This looks really nice!}
-Applying this bound in the runtime bound in~\Cref{lem:approx-alg} gives the first claimed runtime. The final runtime of $O_k\left(\frac 1{\eps^2}\cdot\size(\circuit)\cdot \log{\frac{1}{\conf}}\right)$ follows by noting that $depth(\revision{\circuit})\le \size(\revision{\circuit})$ and absorbing all factors that just depend on $k$.
-
-
-
-\subsection{$\onepass$ Pseudocode}
-
-Please note that it is \textit{assumed} that the original call to \onepass consists of a call on an input circuit \circuit such that the values of members \prt, \lwght and \rwght have been initialized to Null across all gates.
-
-\begin{Definition}[Equivalence ($\equiv$)]
-A circuit \circuit is equivalent to a circuit \circuit' if and only if $\polyf(\circuit) = \polyf(\circuit')$.
-\end{Definition}
-
-For technical reasons, we require the invariant that every subcircuit \subcircuit corresponding to an internal gate of \circuit has $\degree\left(\subcircuit\right) \geq 1$.  To ensure this, auxiliary algorithm ~\ref{alg:reduce} (\reduce) is called to perform any rewrites to \circuit, where an equivalent circuit \circuit' is created and returned by iteratively combining non-variable leaf nodes bottom-up until a parent node is reached which has an input \subcircuit whose subcircuit contains at least one leaf of type \var.  It is trivial to see in such a case that $\subcircuit \equiv \subcircuit'$, and this implies $\circuit \equiv \circuit'$.
-
-\begin{Lemma}\label{lem:reduce}
-In $O(\size(\circuit))$, algorithm \reduce inspects input circuit \circuit and outputs an equivalent version \circuit' of \circuit such that all subcircuits \subcircuit of \circuit' have $\degree(\subcircuit) \geq 1$.
-\end{Lemma}
-
-\begin{proof}[Proof of \Cref{lem:reduce}]
-~\paragraph{\reduce correctness}
-Note that for a source gate \gate, only when $\gate.\type = \var$ is it the case that $\degree(\gate) = 1$, and otherwise $\degree(\gate) = 0$.  Lines~\ref{alg:reduce-add-deg} and~\ref{alg:reduce-no-deg} compute \gate.\degval.
-
-We prove an equivalent circuit \circuit' by induction over the iteration of \topord.  For the base case, consider when we have one node.  In such a case, no rewriting occurs, and \reduce returns \circuit.  It is trivial to note that $\circuit \equiv \circuit$.
-
-For the inductive hypothesis, we assume that for $k \geq 0$ nodes in \topord, the modified circuit $\circuit_k' \equiv \circuit_k$, where $\circuit_k'$ denotes the circuit at the end of iteration $k$.  Similarly, when discussing \Cref{alg:reduce} pseudocode, $\gate_{k}$ denotes the gate in position $k$ of \topord, and $\gate_{k_\linput}$ ($\gate_{k_\rinput}$) denotes the left (right) input of $\gate_{k}$.
-
-We now prove for $k + 1$ gates in \topord that $\circuit_{k + 1}' \equiv \circuit_{k + 1}$.  Note that if the gate $\gate_{k + 1}$ is a source node, then this is again the base case and we are finished.  If $\gate_{k + 1}$ is an internal node, then $\gate_{k + 1}.\type$ must either be $\circmult$ or $\circplus$.
-
-When $\gate_{k + 1}$ is $\circmult$, then it is the case that either $\degree(\gate_{{k + 1}_\linput}) \geq 1$ or $\gate_{{k + 1}_\linput}.\type$ is $\tnum$ and likewise for $\gate_{{k + 1}_\rinput}$.  There are then four possibilities, only one of which will prompt a rewrite, namely when we have that both inputs have $\degree(\gate_{{k + 1}_i}) = 0$.  In such a case, $\gate_{k + 1}.\val \gets \gate_{{k + 1}_\linput}.\val \times \gate_{{k + 1}_\rinput}.\val$, and the inputs are deleted.  Note that since $\gate_{{k + 1}_\linput}.\type = \gate_{{k + 1}_\rinput}.\type = \tnum$ that we have two constants being multiplied, and that for subcircuit $\subcircuit = (\times, \tnum_1, \tnum_2)$ and $\tnum' = \tnum_1 \times \tnum_2$, $\polyf(\subcircuit) = \polyf(\tnum')$ which implies that for the rewritten \subcircuit', $\subcircuit \equiv \subcircuit'$.
-
-A analogous argument applies when $\gate_{k + 1}.\type$ is $\circplus$.\qed
-
-\paragraph{\reduce Run-time Analysis}.
-$O(\size(\circuit))$ trivially follows by the single iterative pass over the \topord of \circuit, where, as can be seen in lines~\ref{alg:reduce-var},~\ref{alg:reduce-num},~\ref{alg:reduce-mult}, and~\ref{alg:reduce-plus} a constant number of operations are performed on each node.\qed
-\end{proof}
-
-\subsection{$\onepass$ Example}
-\begin{Example}\label{example:one-pass}
- Let $\etree$ encode the expression $(X_1 + X_2)(X_1 - X_2) + X_2^2$.  After one pass, \cref{alg:one-pass-iter} would have computed the following weight distribution.  For the two inputs of the root $+$ node $\etree$, $\etree.\lwght = \frac{4}{5}$ and $\etree.\rwght = \frac{1}{5}$.  Similarly, let $\stree$ denote the left-subtree of $\etree_{\lchild}$, $\stree.\lwght = \stree.\rwght = \frac{1}{2}$.  This is depicted in~\Cref{fig:expr-tree-T-wght}. 
-\end{Example}
-
-\begin{figure}[h!]
-	\begin{tikzpicture}[thick, every tree node/.style={default_node, thick, draw=black, black, circle, text width=0.3cm, font=\bfseries, minimum size=0.65cm}, every child/.style={black}, edge from parent/.style={draw, thick},
-level 1/.style={sibling distance=0.95cm},
-level 2/.style={sibling distance=0.7cm},
-%level 2+/.style={sibling distance=0.625cm}
-%level distance = 1.25cm,
-%sibling distance = 1cm,
-%every node/.append style = {anchor=center}
-]
-
-	\Tree [.\node(root){$\boldsymbol{+}$};
-			\edge [wght_color] node[midway, auto= right, font=\bfseries, gray] {$\bsym{\frac{4}{5}}$}; [.\node[highlight_color](tl){$\boldsymbol{\times}$};
-				[.\node(s){$\bsym{+}$};
-					\edge[wght_color] node[pos=0.35, left, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](sl){$\bsym{x_1}$}; ]
-					\edge[wght_color] node[pos=0.35, right, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](sr){$\bsym{x_2}$}; ]
-					]
-				[.\node(sp){$\bsym{+}$};
-					\edge[wght_color] node[pos=0.35, left, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](spl){$\bsym{x_1}$}; ]
-					\edge[wght_color] node[pos=0.35, right, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](spr){$\bsym{\times}$};
-						[.$\bsym{-1}$ ] [.$\bsym{x_2}$ ]
-						]
-					]
-				]
-			\edge [wght_color] node[midway, auto=left, font=\bfseries, gray] {$\bsym{\frac{1}{5}}$}; [.\node[highlight_color](tr){$\boldsymbol{\times}$};
-				[.$\bsym{x_2}$
-					\edge [draw=none]; [.\node[draw=none]{}; ]
-					\edge [draw=none]; [.\node[draw=none]{}; ]
-				]
-				[.$\bsym{x_2}$ ] ]
-	]
-%	labels for plus node children, with arrows
-	\node[left=2pt of sl, highlight_color, inner sep=0pt] (sl-label) {$\stree_\lchild$};
-	\draw[highlight_color] (sl) -- (sl-label);
-	\node[right=2pt of sr, highlight_color, inner sep=0pt] (sr-label) {$\stree_\rchild$};
-	\draw[highlight_color] (sr) -- (sr-label);
-	\node[below left=2pt of spl, inner sep=0pt, highlight_color](spl-label) {$\stree_\lchild'$};
-	\draw[highlight_color] (spl) -- (spl-label);
-	\node[right=2pt of spr, highlight_color, inner sep=0] (spr-label) {$\stree_\rchild'$};
-	\draw[highlight_color] (spr) -- (spr-label);
-	\node[above left=2pt of tl, inner sep=0pt, highlight_color] (tl-label) {$\etree_\lchild$};
-	\draw[highlight_color] (tl) -- (tl-label);
-	\node[above right=2pt of tr, highlight_color, inner sep=0pt] (tr-label) {$\etree_\rchild$};
-	\node[above = 2pt of root, highlight_color, inner sep=0pt, font=\bfseries] (root-label) {$\etree$};
-	\node[above = 2pt of s, highlight_color, inner sep=0pt, font=\bfseries] (s-label) {$\stree$};
-	\node[above = 2pt of sp, highlight_color, inner sep=0pt, font=\bfseries] (sp-label) {$\stree'$};
-	\draw[highlight_color] (tr) -- (tr-label);
-%	\draw[<-|, highlight_color] (s) -- (s-label);
-%	\draw[<-|, highlight_color] (sp) -- (sp-label);
-%	\draw[<-|, highlight_color]  (root) -- (root-label);
-%\node[above right=0.7cm of TR, highlight_color, inner sep=0pt, font=\bfseries] (tr-comment) {$\etree_\rchild$};
-%		\draw[<-|, highlight_color] (TR) -- (tr-comment);
-	\end{tikzpicture}
-
-		\caption{Weights computed by $\onepass$ in ~\cref{example:one-pass}.}
-
-		\label{fig:expr-tree-T-wght}
-\end{figure}
-
-
-\subsection{\onepass}
-\begin{algorithm}[h!]
-	\caption{\reduce$(\circuit)$}
-	\label{alg:reduce}
-	\begin{algorithmic}[1]
-		\Require \circuit: Circuit
-		\Ensure \circuit: Reduced Circuit	
-		\For{\gate in \topord(\circuit)}\label{alg:reduce-loop}\Comment{\topord($\cdot$) is the topological order of \circuit}
-			\If{\gate.\type $=$ \var}\label{alg:reduce-var}
-				\State \gate.\degval $\gets 1$\label{alg:reduce-add-deg}
-			\ElsIf{\gate.\type $=$ \tnum}\label{alg:reduce-num}
-				\State \gate.\degval $\gets 0$\label{alg:reduce-no-deg}
-			\ElsIf{\gate.\type $= \circmult$}\label{alg:reduce-mult}
-				\State \gate.\degval $\gets \gate_\linput.\degval + \gate_\rinput.\degval$
-				\If{\gate.\degval $= 0$}
-					\State \gate.\type $\gets \tnum$
-					\State $\gate.\val \gets \gate_\linput.\val \times \gate_\rinput.\val$
-					\State $\gate_\linput, \gate_\rinput \gets \nullval$		
-				\EndIf		
-			\Else \label{alg:reduce-plus}
-				\State \gate.\degval $\gets \max(\gate_\linput.\degval, \gate_\rinput.\degval)$
-				\If{\gate.\degval $= 0$}
-					\State \gate.\type $\gets \tnum$
-					\State $\gate.\val \gets \gate_\linput.\val + \gate_\rinput.\val$
-					\State $\gate_\linput, \gate_\rinput \gets \nullval$
-				\EndIf
-			\EndIf
-		\EndFor
-		\State \Return $\circuit$
-	\end{algorithmic}
-\end{algorithm}
-
-\begin{algorithm}[h!]
-	\caption{\onepass$(\circuit)$}
-	\label{alg:one-pass-iter}
-	\begin{algorithmic}[1]
-		\Require \circuit: Circuit
-		\Ensure \circuit: Annotated Circuit
-		\Ensure \vari{sum} $\in \reals$
-		\State $\circuit' \gets \reduce(\circuit)$
-		\For{\gate in \topord(\circuit')}\label{alg:one-pass-loop}\Comment{\topord($\cdot$) is the topological order of \circuit}
-			\If{\gate.\type $=$ \var}
-				\State \gate.\prt $\gets 1$\label{alg:one-pass-var}
-			\ElsIf{\gate.\type $=$ \tnum}
-				\State \gate.\prt $\gets \abs{\gate.\val}$\label{alg:one-pass-num}
-			\ElsIf{\gate.\type $= \circmult$}		
-				\State \gate.\prt $\gets \gate_\linput.\prt \times \gate_\rinput.\prt$\label{alg:one-pass-mult}
-			\Else 
-				\State \gate.\prt $\gets \gate_\linput.\prt + \gate_\rinput.\prt$\label{alg:one-pass-plus}
-				\State \gate.\lwght $\gets \frac{\gate_\linput.\prt}{\gate.\prt}$\label{alg:one-pass-lwght}
-				\State \gate.\rwght $\gets \frac{\gate_\rinput.\prt}{\gate.\prt}$\label{alg:one-pass-rwght}
-			\EndIf
-			\State \vari{sum} $\gets \gate.\prt$
-		\EndFor
-		\State \Return (\vari{sum}, $\circuit'$) 
-	\end{algorithmic}
-\end{algorithm}
-
-\subsection{Proof of ~\Cref{lem:one-pass}}\label{sec:proof-one-pass}
-\paragraph{\onepass Correctness}
-We prove the correct computation of \prt, \lwght, \rwght values on \circuit by induction over the number of iterations in line ~\ref{alg:one-pass-loop} over the topological order \topord of the input circuit \circuit.  Note that \topord is the standard definition of a topological ordering over the DAG structure of \circuit.
-
-For the base case, we have only one gate, which by definition is a source gate and must be either \var or \tnum.  In this case, as per \Cref{eq:T-all-ones}, lines ~\ref{alg:one-pass-var} and ~\ref{alg:one-pass-num} correctly compute \circuit.\prt as $1$ and \circuit.\val respectively.
-
-For the inductive hypothesis, assume that \onepass correctly computes \subcircuit.\prt, \subcircuit.\lwght, and \subcircuit.\rwght for all gates \gate in \circuit with $k > 0$ iterations over \topord.  
-
-We now prove for $k + 1$ iterations that \onepass correctly computes the \prt, \lwght, and \rwght values for each gate $\gate_\vari{i}$ in \circuit.  %By the hypothesis the first $k$ gates (alternatively \textit{iterations}) have correctly computed values.  
-Note that the $\gate_\vari{k + 1}$ must be in the last ordering of all gates $\gate_\vari{i}$ for $i \in [k + 1]$.  It is also the case that $\gate_{k+1}$ has  two inputs.  Finally, note that for \size(\circuit) > 1, if $\gate_{k+1}$ is a leaf node, we are back to the base case.  Otherwise $\gate_{k + 1}$ is an internal node $\gate_\vari{s}.\type = \circplus$ or $\gate_\vari{s}.\type = \circmult$.
-
-When $\gate_{k+1}.\type = \circplus$, then by line ~\ref{alg:one-pass-plus} $\gate_{k+1}$.\prt $= \gate_{{k+1}_\lchild}$.\prt $+ \gate_{{k+1}_\rchild}$.\prt, a correct computation, as per \Cref{eq:T-all-ones}.  Further, lines ~\ref{alg:one-pass-lwght} and ~\ref{alg:one-pass-rwght} compute $\gate_{{k+1}}.\lwght = \frac{\gate_{{k+1}_\lchild}.\prt}{\gate_{{k+1}}.\prt}$ and analogously for $\gate_{{k+1}}.\rwght$.  Note that all values needed for each computation have been correctly computed by the I.H.
-
-When $\gate_{k+1}.\type = \circmult$, then line ~\ref{alg:one-pass-mult} computes $\gate_{k+1}.\prt = \gate_{{k+1}_\lchild.\prt} \circmult \gate_{{k+1}_\rchild}.\prt$, which indeed is correct, as per \Cref{eq:T-all-ones}.
-
-\paragraph{\onepass Runtime}
-It is known that $\topord(G)$ is computable in linear time.  Next, each of the $\numvar$ iterations of the loop in ~\Cref{alg:one-pass-loop} take $O(1)$ time.  In general it is known that an arithmetic computation which requires $M$ bits takes $O(\frac{\log{M}}{\log{N}})$ time for an input size $N$.  Since each of the arithmetic operations at a given gate has a bit size of $O(\log{\abs{\circuit}(1,\ldots, 1)})$,  thus, we obtain the general runtime of $O\left(\size(\circuit)\cdot \frac{\log{\abs{\circuit}(1,\ldots, 1)}}{\log{\size(\circuit)}}\right)$.
-
-\paragraph{Sufficient condition for $\abs{\circuit}(1,\ldots, 1)$ to be size $O(N)$}
-For our runtime results to be relevant, it must be the case that the sum of the coefficients computed by \onepass is indeed size $O(N)$ since there are $O(\log{N})$ bits in the RAM model where $N$ is the size of the input.  The size of the input here is \size(\circuit).  We show that when \size$(\circuit_\linput) = N_\linput$, \size$(\circuit_\rinput) = N_\rinput$, where $N_\linput + N_\rinput \leq N$, this is indeed the case.
-
-\begin{proof}%[Proof of $\abs{\circuit}(1,\ldots, 1)$ is size $O(N)$]
-To prove this result, we start by proving that $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k }$ for \degree(\circuit) $= k$.  
-For the base case, we have that \depth(\circuit) $= 0$, and there can only be one node which must contain a coefficient (or constant) of $1$.  In this case, $\abs{\circuit}(1,\ldots, 1) = 1$, and \size(\circuit) $= 1$, and it is true that $\abs{\circuit}(1,\ldots, 1) = 1 \leq N^{2^k} = 1^{2^0} = 1$.
-
-Assume for $\ell > 0$ an arbitrary circuit \circuit of $\depth(\circuit) \leq \ell$ that it is true that $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k }$.% for $k \geq 1$ when \depth(C) $\geq 1$.
-
-For the inductive step we consider a circuit \circuit such that $\depth(\circuit) \leq \ell + 1$.  The sink can only be either a $\circmult$ or $\circplus$ gate.  Consider when sink node is $\circmult$.  Let $k_\linput, k_\rinput$ denote \degree($\circuit_\linput$) and \degree($\circuit_\rinput$) respectively.  Note that this case does not require the constraint on $N_\linput$ or $N_\rinput$.
-\begin{align}
-\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1)\circmult \abs{\circuit_\rinput}(1,\ldots, 1) \leq (N-1)^{2^{k_\linput}} \circmult (N - 1)^{2^{k_\rinput}}\nonumber\\
- &\leq (N-1)^{2^{k}-1}\label{eq:sumcoeff-times-upper}\\
- &\leq N^{2^k}.\nonumber
-\end{align} 
-We derive the upperbound of \cref{eq:sumcoeff-times-upper} by noting that the maximum value of the LHS occurs when both the base and exponent are maximized.
-
-For the case when the sink node is a $\circplus$ node, then we have
-\begin{align}
-\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1) \circplus \abs{\circuit_\rinput}(1,\ldots, 1) \leq
-N_\linput^{2^{k_\linput}} + N_\rinput^{2^{k_\rinput}}\nonumber\\
-&\leq N_\linput^{2^k } + N_\rinput\label{eq:sumcoeff-plus-upper}\\
-&\leq N^{2^k}.\nonumber
-\end{align}
-Similar to the $\circmult$ case, \cref{eq:sumcoeff-plus-upper} upperbounds its LHS by the fact that the maximum base and exponent combination is always greater than or equal to the sum of lower base/exponent combinations.  The final equality is true given the constraint over the inputs.  
-
-Since $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k}$ for all circuits such that all $\circplus$ gates share at most one gate with their sibling (across their respective subcircuits), then $\log{N^{2^k}} = 2^k \cdot \log{N}$ which for fixed $k$ yields the desired $O(\log{N})$ bits for $O(1)$ arithmetic operations.% for the given query class.
-\end{proof} 
-
-\subsection{\sampmon Notes}
-While we would like to take advantage of the space efficiency gained in using a circuit \circuit instead an expression tree \etree, we do not know that such a method exists when computing a sample of the input polynomial representation.  
-
-The efficiency gains of circuits over trees is found in the capability of circuits to only require space for each \emph{distinct} term in the compressed representation.  This saves space in such polynomials containing non-distinct terms multiplied or added to each other, e.g., $x^4$.  However, to avoid biased sampling, it is imperative to sample from both inputs of a multiplication gate, independently, which is indeed the approach of \sampmon.  
-
-\subsection{Proof of~\Cref{lem:sample}}\label{sec:proof-sample-monom}
-We first need to show that $\sampmon$ indeed returns a monomial $\monom$,\footnote{Technically it returns $\var(\monom)$ but for less cumbersome notation we will refer to $\var(\monom)$ simply by $\monom$ in this proof.} such that $(\monom, \coef)$ is in $\expansion{\circuit}$, which we do by induction on the depth of $\circuit$.
-
-For the base case, let the depth $d$ of $\circuit$ be $0$.  We have that the root node is either a constant $\coef$ for which by line ~\ref{alg:sample-num-return} we return $\{~\}$, or we have that $\circuit.\type = \var$ and $\circuit.\val = x$, and  by line ~\ref{alg:sample-var-return} we return $\{x\}$.  Both cases sample a monomial%satisfy ~\cref{def:monomial}
-, and the base case is proven.
-
-For the inductive hypothesis, assume that for $d \leq k$ for some $k \geq 0$, that it is indeed the case that $\sampmon$ returns a monomial.
-
-For the inductive step, let us take a circuit $\circuit$ with $d = k + 1$.  Note that each input has depth $d \leq k$, and by inductive hypothesis both of them return a valid monomial.  Then the root can be either a $\circplus$ or $\circmult$ node.  For the case of a $\circplus$ root node, line ~\ref{alg:sample-plus-bsamp} of $\sampmon$ will choose one of the inputs of the root.  By inductive hypothesis it is the case that a monomial in \expansion(\circuit) is being returned from either input.  Then it follows that for the case of $+$ root node a valid monomial is returned by $\sampmon$.  When the root is a $\circmult$ node, line ~\ref{alg:sample-times-union} %and ~\ref{alg:sample-times-product} multiply
-computes the set union of the monomials returned by the two inputs of the root, and it is trivial to see
-%by definition ~\ref{def:monomial}
-%the product of two monomials is also a monomial, and
-by ~\Cref{def:expand-circuit} that \monom is a valid monomial in some $(\monom, \coef) \in \expansion{\circuit}$.
-
-We will next prove by induction on the depth $d$ of $\circuit$ that the $(\monom,\coef) \in \expansion{\circuit}$ is the \monom returned by $\sampmon$ with a probability %`that is in accordance with the monomial sampled,
- $\frac{|\coef|}{\abs{\circuit}\polyinput{1}{1}}$.
-
-For the base case $d = 0$, by definition ~\ref{def:express-tree} we know that the root has to be either a coefficient or a variable.  For either case, the probability of the value returned is $1$ since there is only one value to sample from.  When the root is a variable $x$ the algorithm correctly returns $(\{x\}, 1 )$.  When the root is a coefficient, \sampmon ~correctly returns $(\{~\}, sign(\coef_i))$.
-
-For the inductive hypothesis, assume that for $d \leq k$ and $k \geq 0$ $\sampmon$ indeed samples $\monom$ in $(\monom, \coef)$ in $\expansion{\circuit}$ with probability $\frac{|\coef|}{\abs{\circuit}\polyinput{1}{1}}$.%bove is true.%lemma ~\ref{lem:sample} is true.
-
-We prove now for $d = k + 1$ the inductive step holds.  It is the case that the root of $\circuit$ has up to two inputs $\circuit_\linput$ and $\circuit_\rinput$.  Since $\circuit_\linput$ and $\circuit_\rinput$ are both depth $d \leq k$, by inductive hypothesis, $\sampmon$ will sample both monomials $\monom_\lchild$ in $(\monom_\lchild, \coef_\lchild)$ of $\expansion{\circuit_\linput}$ and $\monom_\rchild$ in $(\monom_\rchild, \coef_\rchild)$ of $\expansion{\circuit_\rinput}$, from $\circuit_\linput$ and $\circuit_\rinput$ with probability $\frac{|\coef_\lchild|}{\abs{\circuit_\linput}\polyinput{1}{1}}$ and $\frac{|\coef_\rchild|}{\abs{\circuit_\rinput}\polyinput{1}{1}}$.
-
-The root has to be either a $\circplus$ or $\circmult$ node.
-
-Consider the case when the root is $\circmult$.  Note that we are sampling a term from $\expansion{\circuit}$.  Consider $(\monom, \coef)$ in $\expansion{\circuit}$, where $\monom$ is the sampled monomial.  Notice also that it is the case that $\monom = \monom_\lchild \circmult \monom_\rchild$, where $\monom_\lchild$ is coming from $\circuit_\linput$ and $\monom_\rchild$ from $\circuit_\rinput$.  The probability that \sampmon$(\circuit_{\lchild})$ returns $\monom_\lchild$ is $\frac{|\coef_{\monom_\lchild}|}{|\circuit_\linput|(1,\ldots, 1)}$ and $\frac{|\coef_{\monom_\rchild}|}{\abs{\circuit_\rinput}\polyinput{1}{1}}$ for $\monom_\rchild$.  Since both $\monom_\lchild$ and $\monom_\rchild$ are sampled with independent randomness, the final probability for sample $\monom$ is then $\frac{|\coef_{\monom_\lchild}| \cdot |\coef_{\monom_\rchild}|}{|\circuit_\linput|(1,\ldots, 1) \cdot |\circuit_\rinput|(1,\ldots, 1)}$.  For $(\monom, \coef)$ in \expansion{\circuit}, it is indeed the case that $|\coef| = |\coef_{\monom_\lchild}| \cdot |\coef_{\monom_\rchild}|$ and that $\abs{\circuit}(1,\ldots, 1) = |\circuit_\linput|(1,\ldots, 1) \cdot |\circuit_\rinput|(1,\ldots, 1)$, and therefore $\monom$ is sampled with correct probability $\frac{|\coef|}{\abs{\circuit}(1,\ldots, 1)}$.
-
-For the case when $\circuit.\val = \circplus$, \sampmon ~will sample monomial $\monom$ from one of its inputs.  By inductive hypothesis we know that any $\monom_\lchild$ in $\expansion{\circuit_\linput}$ and any $\monom_\rchild$ in $\expansion{\circuit_\rinput}$ will both be sampled with correct probability $\frac{|\coef_{\monom_\lchild}|}{\circuit_{\lchild}(1,\ldots, 1)}$ and $\frac{|\coef_{\monom_\rchild}|}{|\circuit_\rinput|(1,\ldots, 1)}$, where either $\monom_\lchild$ or $\monom_\rchild$ will equal $\monom$, depending on whether $\circuit_\linput$ or $\circuit_\rinput$ is sampled.  Assume that $\monom$ is sampled from $\circuit_\linput$, and note that a symmetric argument holds for the case when $\monom$ is sampled from $\circuit_\rinput$.  Notice also that the probability of choosing $\circuit_\linput$ from $\circuit$ is $\frac{\abs{\circuit_\linput}\polyinput{1}{1}}{\abs{\circuit_\linput}\polyinput{1}{1} + \abs{\circuit_\rinput}\polyinput{1}{1}}$ as computed by $\onepass$.  Then, since $\sampmon$ goes top-down, and each sampling choice is independent (which follows from the randomness in the root of $\circuit$ being independent from the randomness used in its subtrees), the probability for $\monom$ to be sampled from $\circuit$ is equal to the product of the probability that $\circuit_\linput$ is sampled from $\circuit$ and $\monom$ is sampled in $\circuit_\linput$, and
-\begin{align*}
-&\probOf(\sampmon(\circuit) = \monom) = \\
-&\probOf(\sampmon(\circuit_\linput) = \monom) \cdot \probOf(SampledChild(\circuit) = \circuit_\linput)\\
-&= \frac{|\coef_\monom|}{|\circuit_\linput|(1,\ldots, 1)} \cdot \frac{\abs{\circuit_\linput}(1,\ldots, 1)}{|\circuit_\linput|(1,\ldots, 1) + |\circuit_\rinput|(1,\ldots, 1)}\\
-&= \frac{|\coef_\monom|}{\abs{\circuit}(1,\ldots, 1)},
-\end{align*}
-and we obtain the desired result.
-
-
-
-\paragraph{Run-time Analysis}
-It is easy to check that except for lines~\ref{alg:sample-times-union} and~\ref{alg:sample-plus-bsamp}, all lines take $O(1)$ time.  For \cref{alg:sample-times-uinon}, consider an execution of~\Cref{alg:sample-times-union}. We note that we will be adding a given set of variables to some set at most once: since the sum of the sizes of the sets at a given level is at most $\degree(\circuit)$, each gate visited takes $O(\log{\degree(\circuit)})$.  For \cref{alg:sample-plus-bsamp} we have $> O(1)$ time when $\abs{\circuit}(1,\ldots, 1) > \size(\circuit)$.  when this is the case that for each sample, we have $\frac{\log{\abs{\circuit}(1,\ldots, 1)}}{\log{\size(\circuit)}}$ operations, since we need to read in and then compare numbers of of $\log{{\abs{\circuit}(1,\ldots, 1)}}$ bits.  Denote \cost(\circuit) (\Cref{eq:cost-sampmon}) to be an upper bound of the number of nodes visited by \sampmon.  Then the runtime is $O\left(\cost(\circuit)\cdot \log{\degree(\circuit)}\cdot\frac{\log{\abs{\circuit}(1,\ldots, 1)}}{\log{\size(\circuit)}}\right)$.
-
-We now bound the number of recursive calls in $\sampmon$ by $O\left((\degree(\circuit) + 1)\right.$$\left.\cdot\right.$ $\left.\depth(\circuit)\right)$.  
-
-Let \cost$(\cdot)$ be a function that models an upper bound on the number of gates that can be visited in the run of \sampmon.  We define \cost$(\cdot)$ recursively as follows.
-
-\begin{equation}
-	\cost(\circuit) =
-		\begin{cases}
-			1 + \cost(\circuit_\linput) + \cost(\circuit_\rinput) & \textbf{if } \text{\circuit.\type = }\circmult\\
-			1 + \max\left(\cost(\circuit_\linput), \cost(\circuit_\rinput)\right) & \textbf{if } \text{\circuit.\type = \circplus}\\
-			1 & \textbf{otherwise}
-		\end{cases}\label{eq:cost-sampmon}
-\end{equation}
-
-First note that the number of gates visited in \sampmon is $\leq\cost(\circuit)$.  To show that \Cref{eq:cost-sampmon} upper bounds the number of nodes visited by \sampmon, note that when \sampmon visits a gate such that \circuit.\type $ =\circmult$, line ~\ref{alg:sample-times-for-loop} visits each input of \circuit, as defined in (\ref{eq:cost-sampmon}).  For the case when \circuit.\type $= \circplus$, line ~\ref{alg:sample-plus-bsamp} visits exactly one of the input gates, which may or may not be the subcircuit with the maximum number of gates traversed, which makes \cost$(\cdot)$ an upperbound.  Finally, it is trivial to see that when \circuit.\type $\in \{\var, \tnum\}$, i.e., a source gate, that only one gate is visited.
-
-We prove the following inequality holds.
-\begin{equation}
-2\degree(\circuit) \cdot \depth(\circuit) + 1 \geq \cost(\circuit)\label{eq:strict-upper-bound}
-\end{equation} 
-
-Note that \Cref{eq:strict-upper-bound} implies the claimed runtime.  We prove \Cref{eq:strict-upper-bound} for the number of gates traversed in \sampmon using induction over $\depth(\circuit)$.  Recall that \reduce has imposed the invariant that all subcircuits \subcircuit in \circuit must have $\subcircuit.\degval \geq 1$.
-
-For the base case $\degree(\circuit) = \depth(\circuit) = 0$, $\cost(\circuit) = 1$, and it is trivial to see that the inequality $2\degree(\circuit) \cdot \depth(\circuit) + 1 \geq \cost(\circuit)$ holds.
-
-For the inductive hypothesis, we assume the bound holds for a circuit where $\ell \geq \depth(\circuit) \geq 1$.
-Now consider the case when \sampmon has an arbitrary circuit \circuit input with $\depth(\circuit) = \ell + 1$.  By definition \circuit.\type $\in \{\circplus, \circmult\}$. Note that since $\depth(\circuit) \geq 2$, \circuit must have inputs.  Further we know that by the inductive hypothesis the inputs $\circuit_i$ for $i \in \{\linput, \rinput\}$ of the sink gate \circuit uphold the bound
-\begin{equation}
-2\degree(\circuit_i)\cdot \depth(\circuit_i) + 1 \geq \cost(\circuit_i).\label{eq:ih-bound-cost}
-\end{equation}
-It is also true that $\depth(\circuit_\linput) \leq \depth(\circuit) - 1$ and $\depth(\circuit_\rinput) \leq \depth(\circuit) - 1$.  
-
-If \circuit.\type $= \circplus$, then $\degree(\circuit) = \max\left(\degree(\circuit_\linput), \degree(\circuit_\rinput)\right)$.  Otherwise \circuit.\type = $\circmult$ and $\degree(\circuit) = \degree(\circuit_\linput) + \degree(\circuit_\rinput)$.  In either case it is true that $\depth(\circuit) = \max(\depth(\circuit_\linput), \depth(\circuit_\rinput)) + 1$.
-
-If \circuit.\type $= \circmult$, then, 
-substituing values, the following should hold,  
-\begin{align}
-&2\left(\degree(\circuit_\linput) + \degree(\circuit_\rinput)\right) \cdot \left(\max(\depth(\circuit_\linput), \depth(\circuit_\rinput)) + 1\right) + 1 \nonumber\\%\label{eq:times-lhs}\\
-&\qquad\geq 2\degree(\circuit_\linput) \cdot \depth(\circuit_\linput) + 2 \degree(\circuit_\rinput) \cdot \depth(\circuit_\rinput) + 3\label{eq:times-middle} \\
-&\qquad\geq 1 + \cost(\circuit_\linput) + \cost(\circuit_\rinput) = \cost(\circuit) (\ref{eq:cost-sampmon})\label{eq:times-rhs}.
-\end{align}
-
-To prove (\ref{eq:times-middle}), first, the LHS expands to, %\Cref{eq:times-lhs},  
-\begin{equation}
-%(\ref{eq:times-lhs}) 
-2\degree(\circuit_\linput)\depth_{\max} + 2\degree(\circuit_\rinput)\depth_{\max} + 2\degree(\circuit_\linput) +  2\degree(\circuit_\rinput) + 1\label{eq:times-lhs-expanded}
-\end{equation}
-where $\depth_{\max}$ is used to denote the maximum depth of the two input subcircuits.
-
-Let us now simplify the inequality (\ref{eq:times-middle}). 
-\begin{align}
-&2\degree(\circuit_\linput)\depth_{\max} + 2\degree(\circuit_\rinput)\depth_{\max} + 2\degree(\circuit_\linput) + 2\degree(\circuit_\rinput) + 1 \nonumber\\
-&\qquad \geq 2\degree(\circuit_\linput) \cdot \depth(\circuit_\linput) + 2 \degree(\circuit_\rinput) \cdot \depth(\circuit_\rinput) + 3\nonumber\\
-&\implies 2\degree(\circuit_\linput) + 2\degree(\circuit_\rinput) + 1 \geq 3\label{eq:times-lhs-middle-step1}
-\end{align}
-Note that by the \emph{reduced} invariant of \reduce, a circuit \circuit with $\depth(\circuit) \geq 1$ will always have at least one input with $\degree(\circuit_i) \geq 1$.  Thus, \Cref{eq:times-lhs-middle-step1} follows, and the inequality is upheld.
-
-Now to justify (\ref{eq:times-rhs}) which holds for the following reasons.  First, the RHS%\Cref{eq:times-rhs} 
-is the result of \Cref{eq:cost-sampmon} when $\circuit.\type = \circmult$.  The LHS %\Cref{eq:times-middle}
-is then produced by substituting the upperbound of (\ref{eq:ih-bound-cost}) for each $\cost(\circuit_i)$, trivially establishing the upper bound of (\ref{eq:times-rhs}).  This proves \Cref{eq:strict-upper-bound} for the $\circmult$ case.
-
-For the case when \circuit.\type $= \circplus$, substituting values yields
-\begin{align}
-&2\max(\degree(\circuit_\linput), \degree(\circuit_\rinput)) \cdot \left(\max(\depth(\circuit_\linput), \depth(\circuit_\rinput)) + 1\right) +1\nonumber\\%\label{eq:plus-lhs-inequality}\\
-&\qquad \geq \max\left(2\degree(\circuit_\linput) \cdot \depth(\circuit_\linput) + 1, 2\degree(\circuit_\rinput) \cdot \depth(\circuit_\rinput) +1\right) + 1\label{eq:plus-middle}\\
-&\qquad \geq 1 + \max(\cost(\circuit_\linput), \cost(\circuit_\rinput)) = \cost(\circuit)\label{eq:plus-rhs}
-\end{align}
-
-To prove  (\ref{eq:plus-middle}), we can rewrite the LHS as %(\ref{eq:plus-lhs-inequality}) as
-\begin{equation}
-2\degree_{\max}\depth_{\max} + 2\degree_{\max} + 1.\label{eq:plus-lhs-expanded}
-\end{equation}
-Since $\degree_{\max} \cdot \depth_{\max} \geq \degree(\circuit_i)\cdot \depth(\circuit_i),$ the following upper bound holds for the RHS of (\ref{eq:plus-middle}):
-\begin{equation}
-2\degree_{\max}\depth_{\max} + 2 \geq  \max\left(2\degree(\circuit_\linput) \cdot \depth(\circuit_\linput) + 1, 2\degree(\circuit_\rinput) \cdot \depth(\circuit_\rinput) +1\right) + 1.\label{eq:plus-middle-expanded}
-\end{equation}
-Substituting the upperbound (LHS) of (\ref{eq:plus-middle-expanded}) in for the RHS of (\ref{eq:plus-middle}) we obtain the following for (\ref{eq:plus-middle}):
-\begin{align}
-&2\degree_{\max}\depth_{\max} + 2\degree_{\max} + 1 \geq 2\degree_{\max}\depth_{\max} + 2\nonumber\\
-&\implies 2\degree_{\max} + 1 \geq 2\label{eq:plus-upper-bound-final}.
-\end{align}
-As in the $\circmult$ case the \emph{reduced} invariant of \reduce implies that $\degree_{\max} \geq 1$, and (\ref{eq:plus-upper-bound-final}) follows.  This proves (\ref{eq:plus-middle}).
-
-Similar to the case of $\circuit.\type = \circmult$, (\ref{eq:plus-rhs}) follows by equations $(\ref{eq:cost-sampmon})$ and $(\ref{eq:ih-bound-cost})$.
-
-This proves (\ref{eq:strict-upper-bound}) for the $\circplus$ case and thus the claimed $O(k\log{k}\cdot \frac{\log{\abs{\circuit}(1,\ldots, 1)}}{\size(\circuit)}\cdot\depth(\circuit))$ runtime for $k = \degree(\circuit)$ follows.
-
-
-\subsection{Experimental Results}\label{app:subsec:experiment}
-\input{experiments}
-
-\section{Circuits}\label{app:sec-cicuits}
-
-
-%\subsection{Extending to Lineage Circuits}\label{app:lineage-circuit-ext}
-%
-%More specifically consider $\onepass$. The algorithm (as well as its analysis) basically uses the fact that one can compute the corresponding polynomial at all $1$s input with a simple recursive formula (\cref{eq:T-all-ones}), and that we can compute a probability distribution based on these weights (as in~\cref{eq:T-weights}). It can be verified that all the arguments go through if we replace $\etree_\lchild$ and $\etree_\rchild$ for expression tree $\etree$ with the two incoming nodes of the sink for the given lineage circuit. Another way to look at this is we could `unroll' the recursion in $\onepass$ and think of the algorithm as doing the evaluation at each node bottom up from leaves to the root in the expression tree. For lineage circuits, we start from the source nodes and do the computation in the topological order till we reach the sink(s).
-%
-%The argument for $\sampmon$ is similar. Since we argued that $\onepass$ works as intended for lineage circuits since~\Cref{alg:one-pass} only recurses on children of the current node in the expression tree and we can generalize it to lineage circuits by recursing to the two children of the current node in the lineage circuit. Alternatively, as we have already used in the proof of~\Cref{lem:sample}, we can think of the sampling algorithm as sampling a sub-graph of the expression tree. For lineage circuits, we can think of $\sampmon$ as sampling the same sub-graph. Alternatively, one can implicitly expand the circuit lineage into a (larger but) equivalent expression tree. Since $\sampmon$ only explores one sub-graph during its run we can think of its run on a lineage circuit as being done on the implicit equivalent expression tree\footnote{
-%  Recall that $\sampmon$ scales only in the depth of the expression and its polynomial degree ($k$). There exist polynomials that can be encoded in size $\Omega(\log k)$, but we follow convention in assuming that the circuit size is asymptotically larger than $k$ and thus treat the degree (i.e., join width) as a constant.
-%}. Hence, all of the results on $\sampmon$  on expression trees carry over to lineage circuits.
-%
-%Thus, we have argued that~\Cref{lem:approx-alg} also holds if we use a lineage circuit instead of an expression tree as the input to our approximation algorithm.
-
-\subsection{Representing Polynomials with Circuits}\label{app:subsec-rep-poly-lin-circ}
-\newcommand{\getpoly}[1]{\textbf{lin}\inparen{#1}}
-Each vertex $v \in V_{Q,\pxdb}$ in the arithmetic circuit for
-
-\[\tuple{V_{Q,\pxdb}, E_{Q,\pxdb}, \phi_{Q,\pxdb}, \ell_{Q,\pxdb}}\]
-
-encodes a polynomial, realized as
-
-\[\getpoly{v} = \begin{cases}
-\sum_{v' : (v',v) \in E_{Q,\pxdb}} \getpoly{v'} & \textbf{if } \ell(v) = +\\
-\prod_{v' : (v',v) \in E_{Q,\pxdb}} \getpoly{v'} & \textbf{if } \ell(v) = \times\\
-\ell(v) & \textbf{otherwise}
-\end{cases}\]
-
-
-We define the circuit for a select-union-project-join $Q$ recursively by cases as follows.  In each case, let $\tuple{V_{Q_i,\pxdb}, E_{Q_i,\pxdb}, \phi_{Q_{i},\pxdb}, \ell_{Q_i,\pxdb}}$ denote the circuit for subquery $Q_i$.
-
-\caseheading{Base Relation}
-Let $Q$ be a base relation $R$.  We define one node for each tuple.  Formally, let $V_{Q,\pxdb} = \comprehension{v_t}{t\in R}$, let $\phi_{Q,\pxdb}(t) = v_t$, let $\ell_{Q,\pxdb}(v_t) = R(t)$, and let $E_{Q,\pxdb} = \emptyset$.
-This circuit has $|R|$ vertices.
-
-\caseheading{Selection}
-Let $Q = \sigma_\theta \inparen{Q_1}$.
-We re-use the circuit for $Q_1$. %, but define a new distinguished node $v_0$ with label $0$ and make it the sink node for all tuples that fail the selection predicate.
-Formally, let $V_{Q,\pxdb} = V_{Q_1,\pxdb}$, let $\ell_{Q,\pxdb}(v_0) = 0$, and let $\ell_{Q,\pxdb}(v) = \ell_{Q_1,\pxdb}(v)$ for any $v \in V_{Q_1,\pxdb}$.  Let $E_{Q,\pxdb} = E_{Q_1,\pxdb}$, and define
-$$\phi_{Q,\pxdb}(t) =
-\phi_{Q_{1}, \pxdb}(t)  \text{ for } t \text{ s.t.}\; \theta(t).$$
-Dead sinks are iteratively removed, and so
-%\AH{While not explicit, I assume a reviewer would know that the notation above discards tuples/vertices not satisfying the selection predicate.}
-%v_0 & \textbf{otherwise}
-%\end{cases}$$
-this circuit has at most $|V_{Q_1,\pxdb}|$ vertices.
-
-\caseheading{Projection}
-Let $Q = \pi_{\vct A} {Q_1}$.
-We extend the circuit for ${Q_1}$ with a new set of sum vertices (i.e., vertices with label $+$) for each tuple in $Q$, and connect them to the corresponding sink nodes of the circuit for ${Q_1}$.
-Naively, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup \comprehension{v_t}{t \in \pi_{\vct A} {Q_1}}$, let $\phi_{Q,\pxdb}(t) = v_t$, and let $\ell_{Q,\pxdb}(v_t) = +$.  Finally let
-$$E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup \comprehension{(\phi_{Q_{1}, \pxdb}(t'), v_t)}{t = \pi_{\vct A} t', t' \in {Q_1}, t \in \pi_{\vct A} {Q_1}}$$
-This formulation will produce vertices with an in-degree greater than two, a problem that we correct by replacing every vertex with an in-degree over two by an equivalent fan-in tree.  The resulting structure has at most $|{Q_1}|-1$ new vertices.
-% \AH{Is the rightmost operator \emph{supposed} to be a $-$?  In the beginning we add $|\pi_{\vct A}{Q_1}|$ vertices.}
-The corrected circuit thus has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
-
-\caseheading{Union}
-Let $Q = {Q_1} \cup {Q_2}$.
-We merge graphs and produce a sum vertex for all tuples in both sides of the union.
-Formally, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup V_{Q_2,\pxdb} \cup \comprehension{v_t}{t \in {Q_1} \cap {Q_2}}$, let $\ell_{Q,\pxdb}(v_t) = +$, and let
-\[E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup E_{Q_2,\pxdb} \cup \comprehension{(\phi_{Q_{1}, \pxdb}(t), v_t), (\phi_{Q_{2}, \pxdb}(t), v_t)}{t \in {Q_1} \cap {Q_2}}\]
-\[
-  \phi_{Q,\pxdb}(t) = \begin{cases}
-v_t & \textbf{if } t \in {Q_1} \cap {Q_1}\\
-\phi_{Q_{1}, \pxdb}(t) & \textbf{if } t \not \in {Q_2}\\
-\phi_{Q_{2}, \pxdb}(t) & \textbf{if } t \not \in {Q_1}\\
-\end{cases}\]
-This circuit has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ vertices.
-
-\caseheading{$k$-ary Join}
-Let $Q = {Q_1} \bowtie \ldots \bowtie {Q_k}$.
-We merge graphs and produce a multiplication vertex for all tuples resulting from the join
-Naively, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup \ldots \cup V_{Q_k,\pxdb} \cup \comprehension{v_t}{t \in {Q_1} \bowtie \ldots \bowtie {Q_k}}$, let
-{\small
-\begin{multline*}
-E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup \ldots \cup E_{Q_k,\pxdb} \cup
-\left\{\;
-(\phi_{Q_{1}, \pxdb}(\pi_{\sch({Q_1})}t), v_t), \right.\\
-\ldots, (\phi_{Q_k,\pxdb}(\pi_{\sch({Q_k})}t), v_t)
-\;\left|\;t \in {Q_1} \bowtie \ldots \bowtie {Q_k}\;\right\}
-\end{multline*}
-}
-Let $\ell_{Q,\pxdb}(v_t) = \times$, and let $\phi_{Q,\pxdb}(t) = v_t$
-As in projection, newly created vertices will have an in-degree of $k$, and a fan-in tree is required.
-There are $|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ such vertices, so the corrected circuit has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\subsection{Proof for~\Cref{lem:circuits-model-runtime}}\label{app:subsec-lem-lin-vs-qplan}
-
-Proof by induction.  The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |R|$.
-For the inductive step, we assume that we have circuits for subplans $Q_1, \ldots, Q_n$ such that $|V_{Q_i,\pxdb}| \leq (k_i-1)\qruntime{Q_i,\pxdb}$ where $k_i$ is the degree of $Q_i$.
-
-\caseheading{Selection}
-Assume that $Q = \sigma_\theta(Q_1)$.
-In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\pxdb}|$ vertices, so from the inductive assumption and $\qruntime{Q,\pxdb} = \qruntime{Q_1,\pxdb}$ by definition, we have $|V_{Q,\pxdb}| \leq (k-1) \qruntime{Q,\pxdb} $.
-% \AH{Technically, $\kElem$ is the degree of $\poly_1$, but I guess this is a moot point since one can argue that $\kElem$ is also the degree of $\poly$.}
-% OK: Correct
-\caseheading{Projection}
-Assume that $Q = \pi_{\vct A}(Q_1)$.
-The circuit for $Q$ has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
-% \AH{The combination of terms above doesn't follow the details for projection above.}
-\begin{align*}
-|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\
-%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1,\pxdb} \geq |Q_1|$}
-%& \leq |V_{Q_1,\pxdb}| + 2 \qruntime{Q_1,\pxdb}\\
-\intertext{(From the inductive assumption)}
-& \leq (k-1)\qruntime{Q_1,\pxdb} + \abs{Q_1}\\
-\intertext{(By definition  of $\qruntime{Q,\pxdb}$)}
-& \le (k-1)\qruntime{Q,\pxdb}.
-\end{align*}
-\caseheading{Union}
-Assume that $Q = Q_1 \cup Q_2$.
-The circuit for $Q$ has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ vertices.
-\begin{align*}
-|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\
-%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1,\pxdb} \geq |Q_1|$}
-%& \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+\qruntime{Q_1,\pxdb}+\qruntime{Q_2,\pxdb}|\\
-\intertext{(From the inductive assumption)}
-& \leq (k-1)(\qruntime{Q_1,\pxdb} + \qruntime{Q_2,\pxdb}) + (b_1 + b_2)
-\intertext{(By definition of $\qruntime{Q,\pxdb}$)}
-& \leq (k-1)(\qruntime{Q,\pxdb}).
-\end{align*}
-
-\caseheading{$k$-ary Join}
-Assume that $Q = Q_1 \bowtie \ldots \bowtie Q_k$.
-The circuit for $Q$ has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
-\begin{align*}
-|V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
-\intertext{From the inductive assumption and noting $\forall i: k_i \leq k-1$}
-& \leq (k-1)\qruntime{Q_1,\pxdb}+\ldots+(k-1)\qruntime{Q_k,\pxdb}+\\
-&\;\;\; (k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
-& \leq (k-1)(\qruntime{Q_1,\pxdb}+\ldots+\qruntime{Q_k,\pxdb}+\\
-&\;\;\;|{Q_1} \bowtie \ldots \bowtie {Q_k}|)\\
-\intertext{(By definition of $\qruntime{Q,\pxdb}$)}
-& = (k-1)\qruntime{Q,\pxdb}.
-\end{align*}
-
-The property holds for all recursive queries, and the proof holds.
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End:
--- a/lem_mult-p.tex
+++ b/lem_mult-p.tex
@ -0,0 +1,22 @@
+%root: main.tex
+
+\begin{proof}%[Proof of \Cref{lem:qEk-multi-p}]
+We first argue that $\rpoly_{G}^\kElem(\prob,\ldots, \prob) = \sum\limits_{i = 0}^{2\kElem} c_i \cdot \prob^i$.  First, since $\poly_G(\vct{X})$ has degree $2$, it follows that $\poly_G^\kElem(\vct{X})$ has degree $2\kElem$.  By definition, $\rpoly_{G}^{\kElem}(\vct{X})$ sets every exponent $e > 1$ to $e = 1$, which means that $\degree(\rpoly_{G}^\kElem)\le \degree(\poly_G^\kElem)= 2k$. Thus, if we think of $\prob$ as a variable, then $\rpoly_{G}^{\kElem}(\prob,\dots,\prob)$ is a univariate polynomial of degree at most $\degree(\rpoly_{G}^\kElem)\le 2k$. Thus, we can write
+\begin{equation*}
+\rpoly_{G}^{\kElem}(\prob,\ldots, \prob) = \sum_{i = 0}^{2\kElem} c_i \prob^i
+\end{equation*}
+We note that $c_i$ is {\em exactly} the number of monomials in the SMB expansion of $\poly_{G}^{\kElem}(\vct{X})$ composed of $i$ distinct variables.\footnote{Since $\rpoly_G^\kElem(\vct{X})$ does not have any monomial with degree $< 2$, it is the case that $c_0 = c_1 = 0$ but for the sake of simplcity we will ignore this observation.}
+
+Given that we then have $2\kElem + 1$ distinct values of $\rpoly_{G}^\kElem(\prob,\ldots, \prob)$ for $0\leq i\leq2\kElem$, it follows that we have a linear system of the form $\vct{M} \cdot \vct{c} = \vct{b}$ where the $i$th row of $\vct{M}$ is $\inparen{\prob_i^0\ldots\prob_i^{2\kElem}}$, $\vct{c}$ is the coefficient vector $\inparen{c_0,\ldots, c_{2\kElem}}$, and $\vct{b}$ is the vector such that $\vct{b}[i] = \rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$.  In other words, matrix $\vct{M}$ is the Vandermonde matrix, from which it follows that we have a matrix with full rank (the $p_i$'s are distinct), and we can solve the linear system in $O(k^3)$ time (e.g., using Gaussian Elimination) to determine $\vct{c}$ exactly. 
+ Thus, after $O(k^3)$ work, we know $\vct{c}$ and in particular, $c_{2k}$ exactly. 
+ Next, we show why we can compute $\numocc{G}{\kmatch}$ from $c_{2k}$ in $O(1)$ additional time.
+We claim that $c_{2\kElem}$ is $\kElem! \cdot \numocc{G}{\kmatch}$.  This can be seen intuitively by looking at the original factorized representation 
+\[\poly_{G}^\kElem(\vct{X}) = \sum_{\substack{(i_1, j_1),\cdots,(i_\kElem, j_\kElem) \in E}}X_{i_1}X_{j_1}\cdots X_{i_\kElem}X_{j_\kElem},\] 
+where across each of the $\kElem$ products, an arbitrary $\kElem$-matching can be selected $\prod_{i = 1}^\kElem i = \kElem!$ times.  
+Indeed, note that each $\kElem$-matching $(i_1, j_1)\ldots$ $(i_k, j_k)$ in $G$ corresponds to the monomial $\prod_{\ell = 1}^\kElem X_{i_\ell}X_{j_\ell}$ in $\poly_{G}^\kElem(\vct{X})$, with distinct indexes.  Second, the only surviving monomials $\prod_{\ell = 1}^\kElem X_{i_\ell}X_{j_\ell}$ of degree exactly $2k$ in $\rpoly_{G}^{\kElem}(\vct{X})$ must have that all of $i_1,j_1,\dots,i_\kElem,j_\kElem$ are distinct in $\poly_{G}^{\kElem}(\vct{X})$. 
+By the last two statements, only monomials composed of $2k$ distinct variables in $\poly_{G}^{\kElem}(\vct{X})$ (and hence of degree $2\kElem$ in $\rpoly_{G}^{\kElem}(\vct{X})$) correspond to a $k$-matching in $G$.
+
+Notice that each of the $k!$ permutations of an arbitrary monomial maps to the same distinct $\kElem$-matching in $G$, and this implies a $\kElem!$ to $1$ mapping between degree $2\kElem$ monomials in $\rpoly_{G}^{\kElem}(\vct{X})$ and $\kElem$-matchings in $G$. 
+It then follows that $c_{2\kElem}= \kElem! \cdot \numocc{G}{\kmatch}$.
+Thus, simply dividing $c_{2\kElem}$ by $\kElem!$ gives us $\numocc{G}{\kmatch}$, as needed. \qed 
+\end{proof}
--- a/lin_sys.tex
+++ b/lin_sys.tex
@ -1,8 +1,6 @@
 %root: main.tex

-\subsubsection{Proof of \Cref{lem:lin-sys}}
-
-\begin{proof}[Proof of \Cref{lem:lin-sys}]
+\begin{proof}%[Proof of \Cref{lem:lin-sys}]
 The proof consists of two parts.  First we need to show that a vector $\vct{b}$ satisfying the linear system exists and further can be computed in $O(m)$ time.  Second we need to show that $\numocc{G}{\tri}, \numocc{G}{\threedis}$ can indeed be computed in time $O(1)$.

 The lemma claims that for $\vct{M} = 
@ -40,14 +38,12 @@ We follow the same process in deriving an equality for $G^{(2)}$.  Replacing occ
 &-\left[\numocc{\graph{2}}{\threepath}\prob+3\numocc{\graph{2}}{\tri}\prob\right]-\left[\numocc{\graph{2}}{\twopathdis}\prob^2-3\numocc{\graph{2}}{\threedis}\prob^2\right]\nonumber\\
 &+\left(4\numocc{G}{\oneint}+\left[6\numocc{G}{\twopathdis}+18\numocc{G}{\threedis}\right]+\left[4\numocc{G}{\threepath}+12\numocc{G}{\tri}\right]\right)(3\prob^2 - \prob^3)\label{eq:b2-final}   
 \end{align}
-\end{proof}
 As in the previous equality derivation for $G$, note that the LHS of \Cref{eq:b2-final} is the same as $\vct{M}[2]\cdot \vct{x}[2]$.  The RHS of \Cref{eq:b2-final} has terms all computable (by equations (\ref{eq:1e})-(\ref{eq:3p-3tri})) in $O(m)$ time.  Setting $\vct{b}[2]$ to the RHS then completes the proof of step 1.

 Note that if $\vct{M}$ has full rank then one can compute $\numocc{G}{\tri}$ and $\numocc{G}{\threedis}$ in $O(1)$ using Gaussian elimination.

 To show that $\vct{M}$ indeed has full rank, we will show that $\dtrm{\vct{M}}\ne 0$ for every $\prob\in (0,1)$.
 Let $\vct{M} = $
-
 \begin{align}
 &\begin{vmatrix}
 1-3\prob				&-(3\prob^2 - \prob^3)\\
@ -60,19 +56,6 @@ Let $\vct{M} = $


 From ~\cref{eq:det-final} it can easily be seen that the roots of $\dtrm{\vct{M}}$ are $0, 1,$ and $3$.  Hence there are no roots in $(0, 1)$ and ~\cref{lem:lin-sys} follows.
-%\end{proof}
+\qed
+\end{proof}

-%\qed
-%\begin{proof}[Proof of \cref{th:single-p}]
-%The proof follows by ~\cref{lem:lin-sys}.
-%\end{proof}
-%\qed
-
-%\begin{Corollary}\label{cor:single-p-gen-k}
-%For every value $\kElem \geq 3$, there exists a query with $\kElem$ product width that is hard.
-%\end{Corollary}
-%\begin{proof}[Proof of Corollary ~\ref{cor:single-p-gen-k}]
-%Consider $\poly^3_{G}$ and $\poly' = 1$ such that $\poly'' = \poly^3_{G} \cdot \poly'$.  By ~\cref{th:single-p}, query $\poly''$ with $\kElem = 4$ has $\Omega(\numvar^{\frac{4}{3}})$ complexity.
-%\end{proof}
-
-%\qed
--- a/main.tex
+++ b/main.tex
@ -117,6 +117,6 @@ sensitive=true
 \clearpage
 \appendix
 \normalsize
-\input{hardness-app}
+\input{appendix}
 \input{related-work-extra}
 \end{document}
--- a/mult_distinct_p.tex
+++ b/mult_distinct_p.tex
@ -6,15 +6,12 @@
 \BG{We sometimes use $\expct_{\vct{X} \sim P}$ sometimes $\expct_{\vct{X}}$}
 In this section, we will prove that computing $\expct\limits_{\vct{W} \sim \pd}\pbox{\poly(\vct{W})}$ exactly for a \ti-lineage polynomial  $\poly(\vct{X})$ generated from a project-join query is \sharpwonehard. Note that this implies hardness for \bis and general $\semNX$-PDBs. Furthermore, we demonstrate in \Cref{sec:single-p} that the problem remains hard, even if $\probOf(X_i) = \prob$ for all $X_i$ and any fixed valued $\prob \in (0, 1)$ as long as certain popular hardness conjectures in fine-grained complexity hold.

-% We would like to argue for a compressed version of $\poly(\vct{X})$, in general $\expct\limits_{\vct{X} \sim \pd}\pbox{\poly(\vct{X})}$ even for tis, cannot be computed in linear time. We will argue two flavors of such a hardness result. In Section~\ref{sec:multiple-p}, we argue that computing the expected value exactly for all query polynommials $\poly(\vct{X})$ for multiple values of $p$ is \sharpwonehard. However, this does not rule out the possibility of being able to solve the problem for a any {\em fixed} value of $p$ in linear time. In Section~\ref{sec:single-p}, we rule out even this possibility (based on some popular hardness conjectures in fine-grained complexity).

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Preliminaries}

 Our hardness results are based on (exactly) counting the number of occurrences of a subgraph $H$ in $G$. Let $\numocc{G}{H}$ denote the number of occurrences of $H$ in graph $G$.  We can think of $H$ as being of constant size and $G$ as growing.  In query processing, $H$ can be viewed as the query while $G$ as the database instance.
-In particular, we will consider the problems of computing the following counts (given $G$ as an input and its adjacency list representation): $\numocc{G}{\tri}$ (the number of triangles), $\numocc{G}{\threepath}$ (the number of $3$-paths),  $\numocc{G}{\threedis}$ (the number of $3$-matchings or collection of three node-disjoint edges) and its generalization $\numocc{G}{\kmatch}$ (the number of $k$-matchings or collections of $k$ node-disjoint edges).
-%
-Our hardness result in \Cref{sec:multiple-p} is based on the following result:
+In particular, we will consider the problems of computing the following counts (given $G$ as an input and its adjacency list representation): $\numocc{G}{\tri}$ (the number of triangles), $\numocc{G}{\threepath}$ (the number of $3$-paths),  $\numocc{G}{\threedis}$ (the number of $3$-matchings) and its generalization $\numocc{G}{\kmatch}$ (the number of $k$-matchings).  Our hardness result in \Cref{sec:multiple-p} is based on the following result:

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Theorem}[\cite{k-match}]
@ -77,78 +74,3 @@ Here, we present a reduction from the problem of counting $\kElem$-matchings in
 \begin{Lemma}\label{lem:qEk-multi-p}
 Let $\prob_0,\ldots, \prob_{2\kElem}$ be distinct values in $(0, 1]$.  Then given the values $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for $0\leq i\leq 2\kElem$, the number of $\kElem$-matchings in $G$ can be computed in $O\inparen{\kElem^3}$ time.
 \end{Lemma}
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{proof}[Proof of \Cref{lem:qEk-multi-p}]
-%It is trivial to see that one can readily expand the exponential expression by performing the $n^\kElem$ product operations, yielding the polynomial in the sum of products form of the lemma statement.  By definition $\rpoly_{G}^\kElem$ reduces all variable exponents greater than $1$ to $1$.  Thus, a monomial such as $X_i^\kElem X_j^\kElem$ is $X_iX_j$ in $\rpoly_{G}^\kElem$, and the value after substitution is $p_i\cdot p_j = p^2$.  Further, that the number of terms in the sum is no greater than $2\kElem + 1$, can be easily justified by the fact that each edge has two endpoints, and the most endpoints occur when we have $\kElem$ distinct edges (such a subgraph is also known as a $\kElem$-matching), with non-intersecting points, a case equivalent to $p^{2\kElem}$.
-We first argue that $\rpoly_{G}^\kElem(\prob,\ldots, \prob) = \sum\limits_{i = 0}^{2\kElem} c_i \cdot \prob^i$.  First, since $\poly_G(\vct{X})$ has %$\kElem$ products of monomials of 
- degree $2$, it follows that $\poly_G^\kElem(\vct{X})$ has degree $2\kElem$.  
-%We can further write $\poly_{G}^{\kElem}(\vct{X})$ in its expanded SOP form,
-%\begin{equation*}
-%\sum_{\substack{(i_1, j_1),\\\cdots,\\(i_\kElem, j_\kElem) \in E}}X_{i_1}X_{j_1}\cdots X_{i_\kElem}X_{j_\kElem}
-%\end{equation*}
-%Since each of $(i_1, j_1),\ldots, (i_\kElem, j_\kElem)$ are from $E$, it follows that the set of $\kElem!$ permutations of the $\kElem$ $X_iX_j$ pairs which form the monomial products are of degree $2\kElem$ with the number of distinct variables in an arbitrary monomial $\leq 2\kElem$.  
-By definition, $\rpoly_{G}^{\kElem}(\vct{X})$ sets every exponent $e > 1$ to $e = 1$, which means that $\degree(\rpoly_{G}^\kElem)\le \degree(\poly_G^\kElem)= 2k$. Thus, if we think of $\prob$ as a variable, then $\rpoly_{G}^{\kElem}(\prob,\dots,\prob)$ is a univariate polynomial of degree at most $\degree(\rpoly_{G}^\kElem)\le 2k$. Thus, we can write
-%thereby shrinking the degree a monomial product term in the SOP form of $\poly_{G}^{\kElem}(\vct{X})$ to the exact number of distinct variables the monomial contains.  This implies that $\rpoly_{G}^\kElem$ is a polynomial of degree $2\kElem$ and hence $\rpoly_{G}^\kElem(\prob,\ldots, \prob)$ is a polynomial in $\prob$ of degree $2\kElem$.  Then it is the case that
-\begin{equation*}
-\rpoly_{G}^{\kElem}(\prob,\ldots, \prob) = \sum_{i = 0}^{2\kElem} c_i \prob^i
-\end{equation*}
-
-We note that $c_i$ is {\em exactly} the number of monomials in the SMB %\BG{\abbrSMB?}
-expansion of $\poly_{G}^{\kElem}(\vct{X})$ composed of $i$ distinct variables.%, with $\prob$ substituted for each distinct variable
-\footnote{Since $\rpoly_G^\kElem(\vct{X})$ does not have any monomial with degree $< 2$, it is the case that $c_0 = c_1 = 0$ but for the sake of simplcity we will ignore this observation.}
-
-Given that we then have $2\kElem + 1$ distinct values of $\rpoly_{G}^\kElem(\prob,\ldots, \prob)$ for $0\leq i\leq2\kElem$, it follows that 
-%we then have $2\kElem + 1$ distinct rows of the form $\prob_i^0\ldots\prob_i^{2\kElem}$ which form a matrix $M$.  
- we have a linear system of the form $\vct{M} \cdot \vct{c} = \vct{b}$ where the $i$th row of $\vct{M}$ is $\inparen{\prob_i^0\ldots\prob_i^{2\kElem}}$, $\vct{c}$ is the coefficient vector $\inparen{c_0,\ldots, c_{2\kElem}}$, and $\vct{b}$ is the vector such that $\vct{b}[i] = \rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$.  
- In other words, matrix $\vct{M}$ is the Vandermonde matrix, from which it follows that we have a matrix with full rank (the $p_i$'s are distinct), and we can solve the linear system in $O(k^3)$ time (e.g., using Gaussian Elimination) to determine $\vct{c}$ exactly. 
- Thus, after $O(k^3)$ work, we know $\vct{c}$ and in particular, $c_{2k}$ exactly. 
- Next, we show why we can compute $\numocc{G}{\kmatch}$ from $c_{2k}$ in $O(1)$ additional time.
-
-%Denote the number of $\kElem$-matchings in $G$ as $\numocc{G}{\kmatch}$.  
-We claim that $c_{2\kElem}$ is $\kElem! \cdot \numocc{G}{\kmatch}$.  This can be seen intuitively by looking at the original factorized representation 
-\[\poly_{G}^\kElem(\vct{X}) = \sum_{\substack{(i_1, j_1),\cdots,(i_\kElem, j_\kElem) \in E}}X_{i_1}X_{j_1}\cdots X_{i_\kElem}X_{j_\kElem},\] 
-where across each of the $\kElem$ products, an arbitrary $\kElem$-matching can be selected $\prod_{i = 1}^\kElem i = \kElem!$ times.  
-Indeed, note that each $\kElem$-matching $(i_1, j_1)\ldots$ $(i_k, j_k)$ in $G$ corresponds to the monomial $\prod_{\ell = 1}^\kElem X_{i_\ell}X_{j_\ell}$ in $\poly_{G}^\kElem(\vct{X})$, with distinct indexes.  %Since each index is distinct, then each variable has an exponent $e = 1$ and this monomial survives in $\rpoly_{G}^{\kElem}(\vct{X})$  Since $\rpoly$ contains only exponents $e \leq 1$, the only degree $2\kElem$ terms that can exist in $\rpoly_{G}^\kElem$ are $\kElem$-matchings since every other monomial in $\poly_{G}^\kElem(\vct{X})$ has strictly less than $2\kElem$ distinct variables, which, as stated earlier implies that every other non-$\kElem$-matching monomial in $\rpoly_{G}^\kElem(\vct{X})$ has degree $< 2\kElem$.
-Second, the only surviving monomials $\prod_{\ell = 1}^\kElem X_{i_\ell}X_{j_\ell}$ of degree exactly $2k$ in $\rpoly_{G}^{\kElem}(\vct{X})$ must have that all of $i_1,j_1,\dots,i_\kElem,j_\kElem$ are distinct in $\poly_{G}^{\kElem}(\vct{X})$. 
-By the last two statements, only monomials composed of $2k$ distinct variables in $\poly_{G}^{\kElem}(\vct{X})$ (and hence of degree $2\kElem$ in $\rpoly_{G}^{\kElem}(\vct{X})$) correspond to a $k$-matching in $G$.
-%It has already been established above that a $\kElem$-matching ($\kmatch$) has coefficient $c_{2\kElem}$.  As noted, a $\kElem$-matching occurs when there are $\kElem$ edges, $e_1, e_2,\ldots, e_\kElem$, such that all of them are disjoint, i.e., $e_1 \neq e_2 \neq \cdots \neq e_\kElem$.  In all $\kElem$ factors of $\poly_{G}^\kElem(\vct{X})$ there are $k$ choices from the first factor to select an edge for a given $\kElem$ matching, $\kElem - 1$ choices in the second factor, and so on throughout all the factors, yielding $\kElem!$ duplicate terms for each $\kElem$ matching in the expansion of $\poly_{G}^\kElem(\vct{X})$.
-
-Notice that %we have $\kElem!$ duplicates of 
-each of the $k!$ permutations of an arbitrary monomial maps to the same distinct $\kElem$-matching in $G$, and this implies a $\kElem!$ to $1$ mapping between degree $2\kElem$ monomials in $\rpoly_{G}^{\kElem}(\vct{X})$ and $\kElem$-matchings in $G$. 
-%we then have $\kElem!$ monomials mapped to each distinct $\kElem$-matching of $G$.  
-%Since $c_{2k}$ is the cardinality of the multi-set of degree $2\kElem$ monomials in $\rpoly_{G}^{\kElem}(\vct{X})$, where each $\kElem$-matching in $G$ has $\kElem!$ monomial representations,
-It then follows that $c_{2\kElem}= \kElem! \cdot \numocc{G}{\kmatch}$.
-% and the fact that $c_{2\kElem}$ contains all monomials with degree $2\kElem$, it follows that $c_{2\kElem} = \kElem!\cdot\numocc{G}{\kmatch}$.  
-Thus, simply dividing $c_{2\kElem}$ by $\kElem!$ gives us $\numocc{G}{\kmatch}$, as needed. \qed % by simply dividing $c_{2\kElem}$ by $\kElem!$.
-\end{proof}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-
-
-%\begin{Corollary}\label{cor:lem-qEk}
-%One can compute $\numocc{G}{\kmatch}$ in $\query_{G}^\kElem(\vct{X})$ exactly.
-%\end{Corollary}
-%
-%\begin{proof}[Proof for Corollary ~\ref{cor:lem-qEk}]
-%By ~\cref{lem:qEk-multi-p}, the term $c_{2\kElem}$ can be exactly computed.  Additionally we know that $c_{2\kElem}$ can be broken into two factors, and by dividing $c_{2\kElem}$ by the factor $\kElem!$, it follows that the resulting value is indeed $\numocc{G}{\kmatch}$.
-%\end{proof}
-%
-%\qed
-%\begin{Corollary}\label{cor:tilde-q-hard}
-%Computing $\rpoly(\vct{X})$ is $\#W[1]$-hard.
-%\end{Corollary}
-%
-%\begin{proof}[Proof of Corollary ~\ref{cor:tilde-q-hard}]
-%The proof follows by ~\cref{thm:k-match-hard}, ~\cref{lem:qEk-multi-p} and ~\cref{cor:lem-qEk}.
-%\end{proof}
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End: