Changes to app C.

2021-06-15 16:57:32 -04:00 · 2021-06-15 16:57:32 -04:00 · 45488b5396
parent 200da71a90
commit 45488b5396
10 changed files with 91 additions and 139 deletions
--- a/app_approx-alg-analysis.tex
+++ b/app_approx-alg-analysis.tex
@ -28,28 +28,19 @@ For any $\circuit$ with $\degree(poly(|\circuit|)) = k$, algorithm \ref{alg:mon-


 Before proving \Cref{lem:mon-samp}, we use it to argue our main result, \Cref{lem:approx-alg}.  
-%The algorithm to prove \Cref{lem:approx-alg} follows from the following observation.  Given a query polynomial $\poly(\vct{X})=\polyf(\circuit)$ for circuit \circuit over $\bi$, we can exactly represent $\rpoly(\vct{X})$ as follows:
-%\begin{equation}
-%\label{eq:tilde-Q-bi}
-%\rpoly\inparen{X_1,\dots,X_\numvar}=\hspace*{-1mm}\sum_{(\monom,\coef)\in \expansion{\circuit}} \hspace*{-2mm} \indicator{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \coef\cdot\hspace*{-2mm}\prod_{X_i\in \var\inparen{\monom}}\hspace*{-2mm} X_i
-%\end{equation}
-%Given the above, the algorithm is a sampling based algorithm for the above sum: we sample $(\monom,\coef)\in \expansion{\circuit}$ with probability proportional\footnote{We could have also uniformly sampled from $\expansion{\circuit}$ but this gives better parameters.} to $\abs{\coef}$ and compute $Y=\indicator{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \prod_{X_i\in \var\inparen{\monom}} p_i$. Taking $\numsamp$ samples and computing the average of $Y$ gives us our final estimate.
-%The number of samples is computed by (see \Cref{app:subsec-th-mon-samp}):
-%\begin{equation*}
-%2\exp{\left(-\frac{\samplesize\error^2}{2}\right)}\leq \conf \implies\samplesize \geq \frac{2\log{\frac{2}{\conf}}}{\error^2}.
-%\end{equation*}
+
 \begin{proof}
 Set $\mathcal{E}=\approxq({\circuit}, (\prob_1,\dots,\prob_\numvar),$ $\conf, \error')$, where
 \[\error' = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{{\circuit}}(1,\ldots, 1)},\]
 which achieves the claimed accuracy bound on $\mathcal{E}$ due to \Cref{lem:mon-samp}.
-
+\AH{Where did the $1 - \gamma$ term go to?}
 The claim on the runtime follows from \Cref{lem:mon-samp} since
+\AH{I think $\error'$ causes more confusion than necessary.  Any way we can clean this up?}
 \begin{align*}
 \frac 1{\inparen{\error'}^2}\cdot \log\inparen{\frac 1\conf}=&\frac{\log{\frac{1}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)}{\abs{{\circuit}}(1,\ldots, 1)}\right)^2}\\
-= &\frac{\log{\frac{1}{\conf}}\cdot \abs{{\circuit}}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)},
+= &\frac{\log{\frac{1}{\conf}}\cdot \abs{{\circuit}}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)}.
 \end{align*}
-%and the runtime then follows, thus upholding  \Cref{lem:approx-alg}.
-which completes the proof.
+
 \qed
 \end{proof}

@ -57,6 +48,7 @@ We now return to the proof of \Cref{lem:mon-samp}:
 \subsection{Proof of Theorem \ref{lem:mon-samp}}\label{app:subsec-th-mon-samp}
 \begin{proof}
 Consider now the random variables $\randvar_1,\dots,\randvar_\numvar$, where each $\randvar_i$ is the value of $\vari{Y}_{\vari{i}}$ after \Cref{alg:mon-sam-product} is executed. In particular, note that we have
+\AH{I have two comments. 1) Is it too much to have different r.v.s?  Why not consolidate and use the same in the algo and analysis?  2)Here again, we are using $\monom$ as a monomial rather than a set of variables.}
 \[Y_i= \onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \prod_{X_i\in \var\inparen{v}} p_i,\]
 where the indicator variable handles the check in \Cref{alg:check-duplicate-block}
 Then for random variable $\randvar_i$, it is the case that
@ -82,15 +74,15 @@ Using Hoeffding's inequality, we then get:
 \begin{equation*}
 \probOf\left(~\left| \empmean - \expct\pbox{\empmean} ~\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{2^2 \samplesize}\right)} = 2\exp{\left(-\frac{\samplesize\error^2}{2 }\right)}\leq \conf,
 \end{equation*}
-where the last inequality follows from our choice of $\samplesize$ in \Cref{alg:mon-sam-global2}.
+where the last inequality dictates our choice of $\samplesize$ in \Cref{alg:mon-sam-global2}.

-For the claimed probability bound of $\probOf\left(\left|\vari{acc} - \rpoly(\prob_1,\ldots, \prob_\numvar)\right|> \error \cdot \abs{\circuit}(1,\ldots, 1)\right) \leq \conf$, note that in the algorithm, \vari{acc} is exactly $\empmean \cdot \abs{\circuit}(1,\ldots, 1)$.  Multiplying the rest of the terms by the same factor yields the said bound.
+For the claimed probability bound of $\probOf\left(\left|\vari{acc} - \rpoly(\prob_1,\ldots, \prob_\numvar)\right|> \error \cdot \abs{\circuit}(1,\ldots, 1)\right) \leq \conf$, note that in the algorithm, \vari{acc} is exactly $\empmean \cdot \abs{\circuit}(1,\ldots, 1)$.  Multiplying the rest of the terms by the additional factor $\abs{\circuit}(1,\ldots, 1)$ yields the said bound.

 This concludes the proof for the first claim of theorem~\ref{lem:mon-samp}. We prove the claim  on the runtime next.

 \paragraph*{Run-time Analysis}
-The runtime of the algorithm is dominated by \Cref{alg:mon-sam-onepass} (which by \Cref{lem:one-pass} takes time $O\left({\size(\circuit)}\cdot \multc{\log\left(\abs{\circuit}^2(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)$) and the $\samplesize$ iterations of the loop in \Cref{alg:sampling-loop}. Each iteration's run time is dominated by the call to \Cref{alg:mon-sam-sample} (which by \Cref{lem:sample} takes $O\left(\log{k} \cdot k \cdot {\depth(\circuit)}\cdot \multc{\log\left(\abs{\circuit}^2(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)$
-) and \Cref{alg:check-duplicate-block}, which by the subsequent argument takes $O(k\log{k})$ time. We sort the $O(k)$ variables by their block IDs and then check if there is a duplicate block ID or not. Adding up all the times discussed here gives us the desired overall runtime.
+The runtime of the algorithm is dominated first by \Cref{alg:mon-sam-onepass} (which by \Cref{lem:one-pass} takes time $O\left({\size(\circuit)}\cdot \multc{\log\left(\abs{\circuit}^2(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)$) and then by $\samplesize$ iterations of the loop in \Cref{alg:sampling-loop}. Each iteration's run time is dominated by the call to \sampmon in \Cref{alg:mon-sam-sample} (which by \Cref{lem:sample} takes $O\left(\log{k} \cdot k \cdot {\depth(\circuit)}\cdot \multc{\log\left(\abs{\circuit}^2(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)$
+) and the check \Cref{alg:check-duplicate-block}, which by the subsequent argument takes $O(k\log{k})$ time. We sort the $O(k)$ variables by their block IDs and then check if there is a duplicate block ID or not. Combinng up all the times discussed here gives us the desired overall runtime.
 \qed
 \end{proof}

@ -98,7 +90,8 @@ The runtime of the algorithm is dominated by \Cref{alg:mon-sam-onepass} (which b
 \begin{proof}
 The result follows by first noting that by definition of $\gamma$, we have
 \[\rpoly(1,\dots,1)= (1-\gamma)\cdot \abs{{\circuit}}(1,\dots,1).\]
-Further, since each $\prob_i\ge \prob_0$ and $\poly(\vct{X})$ (and hence $\rpoly(\vct{X})$) has degree at most $k$, we have that
+Further, since each $\prob_i\ge \prob_0$ and $\poly(\vct{X})$ (and hence $\rpoly(\vct{X})$) has degree at most $k$,
+\AH{degree $k$ assumes that $k = \degree(\circuit)$, correct? Further it can be confusing with the analysis of the $\poly_G$ in section 3, where we have degreed $2k$.  It would be nice to be able to deliniate these further.} we have that
 \[ \rpoly(1,\dots,1) \ge \prob_0^k\cdot \rpoly(1,\dots,1).\]
 The above two inequalities implies $\rpoly(1,\dots,1) \ge \prob_0^k\cdot (1-\gamma)\cdot \abs{{\circuit}}(1,\dots,1)$.
 Applying this bound in the runtime bound in \Cref{lem:approx-alg} gives the first claimed runtime. The final runtime of $O_k\left(\frac 1{\eps^2}\cdot\size(\circuit)\cdot \log{\frac{1}{\conf}}\cdot \multc{\log\left(\abs{\circuit}^2(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)$ follows by noting that $\depth({\circuit})\le \size({\circuit})$ and absorbing all factors that just depend on $k$.
@ -117,15 +110,14 @@ We will prove \Cref{lem:val-ub} by considering the three cases separately. We st
 Let $\circuit$ be a tree (i.e. the sub-circuits corresponding to two children of a node in $\circuit$ are completely disjoint). Then we have
 \[\abs{\circuit}(1,\dots,1)\le \left(\size(\circuit)\right)^{\degree(\circuit)+1}.\]
 \end{Lemma}
-\begin{proof}%[Proof of $\abs{\circuit}(1,\ldots, 1)$ is size $O(N)$]
+\begin{proof}[Proof of \Cref{lem:C-ub-tree}]%[Proof of $\abs{\circuit}(1,\ldots, 1)$ is size $O(N)$]
 For notational simplicity define $N=\size(\circuit)$ and $k=\degree(\circuit)$.
-To prove this result, we by prove by induction on $\depth(\circuit)$ that $\abs{\circuit}(1,\ldots, 1) \leq N^{k+1 }$.
+We use induction on $\depth(\circuit)$ to show that $\abs{\circuit}(1,\ldots, 1) \leq N^{k+1 }$.
 For the base case, we have that \depth(\circuit) $= 0$, and there can only be one node which must contain a coefficient (or constant) of $1$.  In this case, $\abs{\circuit}(1,\ldots, 1) = 1$, and \size(\circuit) $= 1$, and it is true that $\abs{\circuit}(1,\ldots, 1) = 1 \leq N^{k+1} = 1^{1} = 1$.

 Assume for $\ell > 0$ an arbitrary circuit \circuit of $\depth(\circuit) \leq \ell$ that it is true that $\abs{\circuit}(1,\ldots, 1) \leq N^{\deg(\circuit)+1 }$.% for $k \geq 1$ when \depth(C) $\geq 1$.

-For the inductive step we consider a circuit \circuit such that $\depth(\circuit) = \ell + 1$.  The sink can only be either a $\circmult$ or $\circplus$ gate.  Consider when sink node is $\circmult$.  Let $k_\linput, k_\rinput$ denote \degree($\circuit_\linput$) and \degree($\circuit_\rinput$) respectively.  %Note that this case does not require the constraint on $N_\linput$ or $N_\rinput$.
-%In this case we do not use the fact that $\circuit$ is a tree and just assume that $N_\linput,N_\rinput\le N-1$. 
+For the inductive step we consider a circuit \circuit such that $\depth(\circuit) = \ell + 1$.  The sink can only be either a $\circmult$ or $\circplus$ gate.  Let $k_\linput, k_\rinput$ denote \degree($\circuit_\linput$) and \degree($\circuit_\rinput$) respectively.  Consider when sink node is $\circmult$.  
 Then note that
 \begin{align}
 \abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1)\cdot \abs{\circuit_\rinput}(1,\ldots, 1) \nonumber\\
@ -134,7 +126,7 @@ For the inductive step we consider a circuit \circuit such that $\depth(\circuit
 &\leq N^{k + 1}.\nonumber
 \end{align}
 %We derive the upperbound of \Cref{eq:sumcoeff-times-upper} by noting that the maximum value of the LHS occurs when both the base and exponent are maximized.
-In the above the first inequality follows from the inductive hypothesis (and the fact that the size of either subtree is at most $N-1$) and \Cref{eq:sumcoeff-times-upper} follows by nothing that for a $\times$ gate we have $k=k_\linput+k_\rinput+1$.
+In the above the first inequality follows from the inductive hypothesis (and the fact that the size of either subtree is at most $N-1$) and \Cref{eq:sumcoeff-times-upper} follows by noting by \cref{def:degree} for $k = \degree(\circuit)$ we have $k=k_\linput+k_\rinput+1$.

 For the case when the sink gate is a $\circplus$ gate, then for $N_\linput = \size(\circuit_\linput)$ and $N_\rinput = \size(\circuit_\rinput)$ we have
 \begin{align}
@ -144,7 +136,7 @@ N_\linput^{k+1} + N_\rinput^{k+1}\nonumber\\
 &\leq (N-1)^{k+1 } \label{eq:sumcoeff-plus-upper}\\
 &\leq N^{k+1}.\nonumber
 \end{align}
-In the above, the first inequality follows from the inductive hypothesis (and the fact that $k_\linput,k_\rinput\le k$).  Note that the RHS of this inequality is maximized when the base and exponent of one of the terms is maximized.  The second inequality follows from this fact as well as the fact that since $\circuit$ is a tree we have $N_\linput+N_\rinput=N-1$ and, lastly, the fact that $k\ge 0$. This completes the proof.
+In the above, the first inequality follows from the inductive hypothes and \cref{def:degree} (which implies the fact that $k_\linput,k_\rinput\le k$).  Note that the RHS of this inequality is maximized when the base and exponent of one of the terms is maximized.  The second inequality follows from this fact as well as the fact that since $\circuit$ is a tree we have $N_\linput+N_\rinput=N-1$ and, lastly, the fact that $k\ge 0$. This completes the proof.
 \end{proof}

 %\revision{\textbf{THE PART BELOW NEEDS WORK. --Atri}}
@ -155,7 +147,7 @@ Let $\circuit$ be a (general) circuit. % tree (i.e. the sub-circuits correspondi
 Then we have
 \[\abs{\circuit}(1,\dots,1)\le 2^{2^{\degree(\circuit)}\cdot \size(\circuit)}.\]
 \end{Lemma}
-\begin{proof}[Proof Sketch]
+\begin{proof}[Proof Sketch of \Cref{lem:C-ub-gen}]
 We use the same notation as in the proof of \Cref{lem:C-ub-tree}. We will prove by induction on $\depth(\circuit)$ that $\abs{\circuit}(1,\ldots, 1) \leq 2^{2^k\cdot N }$. The base case argument is similar to that in the proof of \Cref{lem:C-ub-tree}. In the inductive case we have that $N_\linput,N_\rinput\le N-1$.

 For the case when the sink node is $\times$, we get that
@ -165,7 +157,7 @@ For the case when the sink node is $\times$, we get that
 &\leq 2^{2\cdot 2^{k-1}\cdot (N-1)}\\
 &\leq 2^{2^k N}.
 \end{align*}
-In the above the first inequality follows from inductive hypothesis while the second inequality follows from the fact that $k_\linput,k_\rinput\le k-1$ and $N_\linput, N_\rinput\le N-1$.
+In the above the first inequality follows from inductive hypothesis while the second inequality follows from the fact that $k_\linput,k_\rinput\le k-1$ and $N_\linput, N_\rinput\le N-1$, where we substitute the upperbound into every respective term.
 %$k_\linput+k_\rinput=k$ (and hence $\max(k_\linput,k_\rinput)\le k$) as well as the fact that $k\ge 0$.

 Now consider the case when the sink node is $+$, we get that
@ -179,6 +171,7 @@ In the above the first inequality follows from the inductive hypothesis while th
 \qed
 \end{proof}

-Finally, we consider the case when $\circuit$ encodes the run of the algorithm from~\cite{DBLP:conf/pods/KhamisNR16} on an FAQ query. We cannot handle the full generality of an FAQ query but we can handle an FAQ query that has a ``core'' join query on $k$ relations and then a subset of the $k$ attributes are ``summed'' out (e.g. the sum could be because of projecting out a subset of attributes from the join query). While the algorithm~\cite{DBLP:conf/pods/KhamisNR16} essentially figures out when to `push in' the sums, in our case since we only care about $\abs{\circuit}(1,\dots,1)$ we will consider the obvious circuit that computes the ``inner join'' using a worst-case optimal join (WCOJ) algorithm like~\cite{NPRR} and then adding in the addition gates. The basic idea is very simple: we will argue that the there are at most $\size(\circuit)^k$ tuples in the join output (each with having a value of $1$ in $\abs{\circuit}(1,\dots,1)$). Then the largest value we can see in $\abs{\circuit}(1,\dots,1)$ is by summing up these at most $\size(\circuit)^k$ values of $1$. Note that this immediately implies the claimed bound in \Cref{lem:val-ub}.
+\AH{I really didn't follow the rest of this...}
+Finally, we consider the case when $\circuit$ encodes the run of the algorithm from~\cite{DBLP:conf/pods/KhamisNR16} on an FAQ query. We cannot handle the full generality of an FAQ query but we can handle an FAQ query that has a ``core'' join query on $k$ relations and then a subset of the $k$ attributes are ``summed'' out (e.g. the sum could be because of projecting out a subset of attributes from the join query). While the algorithm~\cite{DBLP:conf/pods/KhamisNR16} essentially figures out when to `push in' the sums, in our case since we only care about $\abs{\circuit}(1,\dots,1)$.  We will consider the obvious circuit that computes the ``inner join'' using a worst-case optimal join (WCOJ) algorithm like~\cite{NPRR} and then adding in the addition gates. The basic idea is very simple: we will argue that the there are at most $\size(\circuit)^k$ tuples in the join output (each with having a value of $1$ in $\abs{\circuit}(1,\dots,1)$). Then the largest value we can see in $\abs{\circuit}(1,\dots,1)$ is by summing up these at most $\size(\circuit)^k$ values of $1$. Note that this immediately implies the claimed bound in \Cref{lem:val-ub}.

 We now sketch the argument for the claim about the join query above. First, we note that the computation of a WCOJ algorithm like~\cite{NPRR} can be expressed as a circuit with {\em multiple} sinks (one for each output tuple). Note that annotation corresponding to $\mathbf{t}$ in $\circuit$ is the polynomial $\prod_{e\in E} R(\pi_e(\mathbf{t}))$ (where $E$ indexes the set of relations). It is easy to see that in this case the value of  $\mathbf{t}$ in $\abs{\circuit}(1,\dots,1)$ will be $1$ (by multiplying $1$ $k$ times). The claim on the number of output tuples follow from the trivial bound of multiplying the input size bound (each relation has at most $n\le \size(\circuit)$ tuples and hence we get an overall bound of $n^k\le\size(\circuit)^k$. Note that we did not really use anything about the WCOJ algorithm except for the fact that $\circuit$ for the join part only is built only of multiplication gates. In fact, we do not need the better WCOJ join size bounds either (since we used the trivial $n^k$ bound). As a final remark, we note that we can build the circuit for the join part by running say the algorithm from~\cite{DBLP:conf/pods/KhamisNR16} on an FAQ query that just has the join query but each tuple is annotated with the corresponding variable $X_i$ (i.e. the semi-ring for the FAQ query is $\mathbb{N}[\mathbf{X}]$). 
--- a/app_approx-alg-defs-and-examples.tex
+++ b/app_approx-alg-defs-and-examples.tex
@ -8,7 +8,7 @@ In the following definitions and examples, we use the following polynomial as an
 \begin{Definition}[Pure Expansion]
 The pure expansion of a polynomial $\poly$ is formed by computing all product of sums occurring in $\poly$, without combining like monomials.  The pure expansion of $\poly$ generalizes \Cref{def:smb} by allowing monomials $m_i = m_j$ for $i \neq j$.
 \end{Definition}
-Note that similar in spirit to \Cref{def:reduced-bi-poly}, $\expansion{\circuit}$ \Cref{def:expand-circuit} reduces all variable exponents $e > 1$ to $e = 1$.
+Note that similar in spirit to \Cref{def:reduced-bi-poly}, $\expansion{\circuit}$ \Cref{def:expand-circuit} reduces all variable exponents $e > 1$ to $e = 1$.  Further, it is true that $\expansion{\circuit}$ is the pure expansion of $\circuit$.

 In the following, we abuse notation and write $\monom$ to denote the monomial obtained as the products of the variables in the set.

@ -17,16 +17,18 @@ Consider the factorized representation $(X+ 2Y)(2X - Y)$ of the polynomial in \C
 Its circuit $\circuit$ is illustrated in \Cref{fig:circuit}.
 The pure expansion of the product is $2X^2 - XY + 4XY - 2Y^2$ and the $\expansion{\circuit}$ is $[(X, 2), (XY, -1), (XY, 4), (Y, -2)]$.
 \end{Example}
-$\expansion{\circuit}$ effectively\footnote{The minor difference here is that $\expansion{\circuit}$ encodes the \emph{reduced} form over the SOP expansion of the compressed representation, as opposed to the \abbrSMB representation} encodes the \emph{reduced} form of $\polyf\inparen{\circuit}$, decoupling each monomial into a set of variables $\monom$ and a real coefficient $\coef$.
-However, unlike the constraint on the input to compute $\rpoly$, the input circuit $\circuit$ does not need to be in \abbrSMB/SOP form.
+$\expansion{\circuit}$ effectively\footnote{The minor difference here is that $\expansion{\circuit}$ encodes the \emph{reduced} form over the SOP pure expansion of the compressed representation, as opposed to the \abbrSMB representation} encodes the \emph{reduced} form of $\polyf\inparen{\circuit}$, decoupling each monomial into a set of variables $\monom$ and a real coefficient $\coef$.
+However, unlike the constraint on the input $\poly$ to compute $\rpoly$, the input circuit $\circuit$ does not need to be in \abbrSMB/SOP form.

 \begin{Example}[Example for \Cref{def:positive-circuit}]\label{ex:def-pos-circ}
 Using the same factorization from \Cref{example:expr-tree-T}, $\polyf(\abs{\circuit}) = (X + 2Y)(2X + Y) = 2X^2 +XY +4XY + 2Y^2 = 2X^2 + 5XY + 2Y^2$.  Note that this \textit{is not} the same as the polynomial from \Cref{eq:poly-eg}.
 \end{Example}

-\begin{Definition}[Evaluation]\label{def:exp-poly-eval}
-Given a circuit $\circuit$ and a valuation $\vct{a} \in \mathbb{R}^\numvar$, we define the evaluation of $\circuit$ on $\vct{a}$ as $\circuit(\vct{a}) = \polyf(\circuit)(\vct{a})$.
-\end{Definition}
+%\begin{Definition}[Evaluation]\label{def:exp-poly-eval}
+%Given a circuit $\circuit$ and a valuation $\vct{a} \in \mathbb{R}^\numvar$, we define the evaluation of $\circuit$ on $\vct{a}$ as $\circuit(\vct{a}) = \polyf(\circuit)(\vct{a})$.
+%\end{Definition}
+%
+%\AH{Do we use this anywhere \cref{def:exp-poly-eval}?}

 \begin{Definition}[Subcircuit]
 A subcircuit of a circuit $\circuit$ is a circuit \subcircuit such that \subcircuit is a DAG \textit{subgraph} of the DAG representing \circuit.  The sink of \subcircuit has exactly one gate \gate.
--- a/app_hardness-results.tex
+++ b/app_hardness-results.tex
@ -42,67 +42,59 @@ To  compute $\numocc{G}{\threepath}$, note that for an arbitrary edge $(i, j)$,

 \subsection{Proofs for \Cref{lem:3m-G2}, \Cref{lem:tri}, and \Cref{lem:lin-sys}}\label{subsec:proofs-struc-lemmas}
 Before proceeding, let us introduce a few more helpful definitions.
-\AH{Main concerns:  More consistent and less-bulky (if possible) notation.  Make sure types match.  Remove extraneous information.}
-\begin{Definition}\label{def:ed-nota}
-For $\ell > 1$, we use $E_\ell$ to denote the set of edges in $\graph{\ell}$.  For any graph $\graph{\ell}$, its edges are denoted by the a pair $(e, b)$, such that $b \in \{0,\ldots, \ell-1\}$ and $e\in E_1$, where $(e,0),\dots,(e,\ell-1)$ is the $\ell$-path that replaces the edge $e$.
-\end{Definition}

-\AH{It might be helpful to keep the subscripts the same between the above and below definition.}
+\begin{Definition}[$\esetType{\ell}$]\label{def:ed-nota}
+For $\ell > 1$, we use $\esetType{\ell}$ to denote the set of edges in $\graph{\ell}$.  For any graph $\graph{\ell}$, its edges are denoted by the a pair $(e, b)$, such that $b \in \{0,\ldots, \ell-1\}$ and $e\in \esetType{1}$, where $(e,0),\dots,(e,\ell-1)$ is the $\ell$-path that replaces the edge $e$.
+\end{Definition}

 \begin{Definition}[$\eset{\ell}$]
 Given an arbitrary subgraph $\sg{1}$ of $\graph{1}$, let $\eset{1}$ denote the set of edges in $\sg{1}$.  Define then $\eset{\ell}$ for $\ell > 1$ as the set of edges in the generated subgraph $\sg{\ell}$ (i.e. when we apply \Cref{def:Gk} to $\sg{1})$.
 \end{Definition}

 For example, consider $\sg{1}$ with edges $\eset{1} = \{e_1\}$.  Then the edge set of $\sg{2}$ is defined as $\eset{2} = \{(e_1, 0), (e_1, 1)\}$.
-\begin{Definition}\label{def:ed-sub}
+\begin{Definition}[$\binom{\edgeSet}{t}$ and $\binom{\edgeSet}{\leq t}$]\label{def:ed-sub}
 Let $\binom{E}{t}$ denote the set of subsets in $E$ with exactly $t$ edges.  In a similar manner, $\binom{E}{\leq t}$ is used to mean the subsets of $E$ with $t$ or fewer edges.
 \end{Definition}

 The following function $f_\ell$  is a mapping from every $3$-edge shape in $\graph{\ell}$ to its `projection' in $\graph{1}$.
 \begin{Definition}\label{def:fk}
-Let $f_\ell: \binom{E_\ell}{3} \mapsto \binom{E_1}{\leq3}$ be defined as follows.  For any element $s \in \binom{E_\ell}{3}$ such that $s = \pbrace{(e_1, b_1), (e_2, b_2), (e_3, b_3)}$, define:
+Let $f_\ell: \binom{\esetType{\ell}}{3} \rightarrow \binom{\esetType{1}}{\leq3}$ be defined as follows.  For any element $s \in \binom{\esetType{\ell}}{3}$ such that $s = \pbrace{(e_1, b_1), (e_2, b_2), (e_3, b_3)}$, define:
 \[ f_\ell\left(\pbrace{(e_1, b_1), (e_2, b_2), (e_3, b_3)}\right) = \pbrace{e_1, e_2, e_3}.\]
 \end{Definition}


 \begin{Definition}[$f_\ell^{-1}$]\label{def:fk-inv}
-For an arbitrary subgraph $\sg{1}$ of $\graph{1}$ with at most $m \leq 3$ edges, the inverse function $f_\ell^{-1}: \binom{E_1}{\leq 3}\mapsto 2^{\binom{E_\ell}{3}}$ takes $\eset{1}$ and outputs the set of all elements $s \in \binom{\eset{\ell}}{3}$ such that 
+For an arbitrary subgraph $\sg{1}$ of $\graph{1}$ with at most $m \leq 3$ edges, the inverse function $f_\ell^{-1}: \binom{\esetType{1}}{\leq 3}\rightarrow 2^{\binom{\esetType{\ell}}{3}}$ takes $\eset{1}$ and outputs the set of all elements $s \in \binom{\eset{\ell}}{3}$ such that 
 $f_\ell(s) = \eset{1}$.  
 \end{Definition}
-\AH{The above definition seems choppy.  Perhaps saying ``takes the set of edges in $\sg{1}$ and outputs...''}

+Note, importantly, that when we discuss $f_\ell^{-1}$, that each \textit{edge} present in $\eset{1}$ must have an edge in $s\in f_\ell^{-1}(\eset{1})$ that projects down to it.  In particular, if $|\eset{1}| = 3$, then it must be the case that each $s\in f_\ell^{-1}(S)$ consists of the following set of edges: $\{ (e_i, b), (e_j, b'), (e_m, b'') \}$, where $i,j$ and $m$ are distinct.  

-Note, importantly, that when we discuss $f_\ell^{-1}$, that each \textit{edge} present in $\eset{1}$ must have an edge in $s\in f_\ell^{-1}(S)$ that projects down to it.  In particular, if $|\eset{1}| = 3$, then it must be the case that each $s\in f_\ell^{-1}(S)$ consists of the following set of edges: $\{ (e_i, b), (e_j, b'), (e_m, b'') \}$, where $i,j$ and $m$ are distinct.  
-
-\AH{The type for the input seems inconsistent.  Does it take in a subgraph, or a set of edges?  We need to be precise and consistent with this.}
-
-We first note that $f_\ell$ is well-defined:
 \begin{Lemma}\label{lem:fk-func}
 $f_\ell$ is a function.
 \end{Lemma}
-\AH{I think that only the third sentence is necessary to prove the lemma's claim.}
+
 \begin{proof}\label{subsubsec:proof-fk}
-Note that $f_\ell$ is properly defined.  For any $S \in \binom{E_\ell}{3}$, $|f(S)| \leq 3$, since it has to be the case that any subset of $3$ edges in $E_\ell$ will map to at most three edges in $E_1$.  All mappings are in the required range.  Then,  since for any $b \in \{0,\ldots, \ell-1\}$ the map $(e, b) \mapsto e$ is a function and has exactly one mapping, which %` mapping for which $(e, b)$ maps to no other edge than $e$, and this
-implies that $f_\ell$ is a function.\qed
+For any $b \in \{0,\ldots, \ell-1\}$, the map $(e, b) \mapsto e$ is a function since it has exactly one mapping.  It then follows that $f_\ell$ is a function.\qed
 \end{proof}

-We are now ready to prove the structural lemmas. Note that $f_\ell$ maps subsets of three edges in $\graph{\ell}$ to a subset of at most three edges in $E_1$. To prove the structural lemmas, we will use the map $f_\ell^{-1}$. In particular, to count the number of occurrences of $\tri,\threepath,\threedis$ in $\graph{\ell}$ we count for each $S\in\binom{E_1}{\le 3}$, how many  $\threedis$ and $\tri$ subgraphs appear in $f_\ell^{-1}(S)$.
+We are now ready to prove the structural lemmas. Note that $f_\ell$ maps subsets of three edges in $\graph{\ell}$ to a subset of at most three edges in $\esetType{1}$. To prove the structural lemmas, we will use the map $f_\ell^{-1}$. In particular, to count the number of occurrences of $\tri$ and $\threedis$ in $\graph{\ell}$ we count for each $S\in\binom{E_1}{\le 3}$, how many  $\threedis$ and $\tri$ subgraphs appear in $f_\ell^{-1}(S)$.


 \subsubsection{Proof of Lemma \ref{lem:3m-G2}}
 \begin{proof}%[Proof of \Cref{lem:3m-G2}]
-For each subset  $\eset{1}\in \binom{E_1}{\le 3}$, we count the number of $3$-matchings in the $3$-edge subgraphs of $\graph{2}$ in $f_2^{-1}(\eset{1})$.  We first consider the case of $\eset{1} \in \binom{E_1}{3}$, where $\eset{1}$ is composed of the edges $e_1, e_2, e_3$ and $f_2^{-1}(\eset{1})$ is the set of all $3$-edge subsets $s \in \{(e_1, 0), (e_1, 1), (e_2, 0), (e_2, 1),$ $(e_3, 0), (e_3, 1)\}$ such that $f_\ell(s) = \{e_1, e_2, e_3\}$.
+For each subset  $\eset{1}\in \binom{E_1}{\le 3}$, we count the number of {\emph{$3$-matchings }}in the $3$-edge subgraphs of $\graph{2}$ in $f_2^{-1}(\eset{1})$.  We first consider the case of $\eset{1} \in \binom{E_1}{3}$, where $\eset{1}$ is composed of the edges $e_1, e_2, e_3$ and $f_2^{-1}(\eset{1})$ is the set of all $3$-edge subsets $s \in \{(e_1, 0), (e_1, 1), (e_2, 0), (e_2, 1),$ $(e_3, 0), (e_3, 1)\}$ such that $f_\ell(s) = \{e_1, e_2, e_3\}$.  Note that the size of the output denoted $\abs{f_2^{-1}(\esetType{1})}$ is $8$ in such a case, where each set of edges of the form $\{(e_1, b_1), (e_2, b_2), (e_3, b_3)\}$ for $b_i \in [2], i \in [3]$ is present.  We count the number of $3$-matchings from the set $f_2^{-1}(\eset{1})$. 

-We do a case analysis based on the subgraph $\sg{1}$ induced by $\eset{1}$ (denoted $\eset{1} \equiv \sg{1}$):
+We do a case analysis based on the subgraph $\sg{1}$ induced by $\eset{1}$. %(denoted $\eset{1} \equiv \sg{1}$):
 \begin{itemize}
 	\item $3$-matching ($\threedis$)
 \end{itemize}
-When $\sg{1}$ is isomorphic to $\threedis$, it is the case that edges in $\eset{2}$ are {\em not} disjoint only for the pairs $(e_i, 0), (e_i, 1)$ for $i\in \{1,2,3\}$.  All choices for $b_1, b_2, b_3 \in \{0, 1\}$, $(e_1, b_1), (e_2, b_2), (e_3, b_3)$ will compose a 3-matching.  One can see that we have a total of two possible choices for $b_i$ for each edge $e_i$ in $\graph{1}$ yielding $2^3 = 8$ possible 3-matchings in $f_2^{-1}(\eset{1})$.
+When $\sg{1}$ is isomorphic to $\threedis$, it is the case that edges in $\eset{2}$ are {\em not} disjoint only for the pairs $(e_i, 0), (e_i, 1)$ for $i\in \{1,2,3\}$.  By definition, each set of edges is a three matching.  One can see that we have a total of two possible choices for $b_i$ for each edge $e_i$ in $\graph{1}$ yielding $2^3 = 8$ possible 3-matchings in $f_2^{-1}(\eset{1})$.

 \begin{itemize}
 	\item Disjoint Two-Path ($\twopathdis$)
 \end{itemize}
-For $\sg{1}$ isomorphic to $\twopathdis$ edges $e_2, e_3$ form a $2$-path with $e_1$ being disjoint.  This means that $(e_2, 0), (e_2, 1), (e_3, 0), (e_3, 1)$ form a $4$-path while $(e_1, 0), (e_1, 1)$ is its own disjoint $2$-path.  We can only pick either $(e_1, 0)$ or $(e_1, 1)$ for $f_2^{-1}(\eset{1})$, and then we need to pick a $2$-matching from $e_2$ and $e_3$.  Note that the four path allows there to be 3 possible 2 matchings, specifically,
+For $\sg{1}$ isomorphic to $\twopathdis$ edges $e_2, e_3$ form a $2$-path with $e_1$ being disjoint.  This means that in $\sg{2}$ edges $(e_2, 0), (e_2, 1), (e_3, 0), (e_3, 1)$ form a $4$-path while $(e_1, 0), (e_1, 1)$ is its own disjoint $2$-path.  We can pick either $(e_1, 0)$ or $(e_1, 1)$ for the first edge in the $3$-path, while it is necessary to have a $2$-matching from path $(e_2, 0),\ldots(e_3, 1)$.  Note that the $4$-path allows for three possible $2$-matchings, specifically,
 \begin{equation*}
 \pbrace{(e_2, 0), (e_3, 0)}, \pbrace{(e_2, 0), (e_3, 1)}, \pbrace{(e_2, 1), (e_3, 1)}.
 \end{equation*}
@ -112,7 +104,7 @@ Since these two selections can be made independently, there are $2 \cdot 3 = 6$
 \begin{itemize}
 	\item $3$-star ($\oneint$)
 \end{itemize}
-When $\sg{1}$ is isomorphic to $\oneint$, the inner edges $(e_i, 1)$ of $\eset{2}$ are all connected, and the outer edges $(e_i, 0)$ are all disjoint.  Note that for a valid 3 matching it must be the case that at most one inner edge can be part of the set of disjoint edges.  For the case of when exactly one inner edge is chosen, there exist $3$ possiblities, based on which inner edge is chosen.  Note that if $(e_i, 1)$ is chosen, the matching has to choose $(e_j, 0)$ for $j \neq i$ and $(e_{j'}, 0)$ for $j' \neq i, j' \neq j$.  The remaining possible 3-matching occurs when all 3 outer edges are chosen.  Thus, there are four 3-matchings in $f_2^{-1}(\eset{1})$.
+When $\sg{1}$ is isomorphic to $\oneint$, the inner edges $(e_i, 1)$ of $\sg{2}$ are all connected, and the outer edges $(e_i, 0)$ are all disjoint.  Note that for a valid $3$-matching it must be the case that at most one inner edge can be part of the set of disjoint edges.  For the case of when exactly one inner edge is chosen, there exist $3$ possiblities, based on which inner edge is chosen.  Note that if $(e_i, 1)$ is chosen, the matching has to choose $(e_j, 0)$ for $j \neq i$ and $(e_{j'}, 0)$ for $j' \neq i, j' \neq j$.  The remaining possible 3-matching occurs when all 3 outer edges are chosen.  Thus, there are four 3-matchings in $f_2^{-1}(\eset{1})$.

 \begin{itemize}
 	\item $3$-path ($\threepath$)
@ -125,11 +117,11 @@ When $\sg{1}$ is isomorphic to $\threepath$ it is the case that all edges beginn
 \end{itemize}
 For $\sg{1}$ isomorphic to $\tri$, note that it is the case that the edges in $\eset{2}$ are connected in a successive manner, but this time in a cycle, such that $(e_1, 0)$ and $(e_3, 1)$ are also connected.  While this is similar to the discussion of the three path above, the first and last edges are not disjoint, since they are connected.  This rules out both subsets of $(e_1, 0), (e_2, 0), (e_3, 1)$ and $(e_1, 0), (e_2, 1), (e_3, 1)$, yielding two 3-matchings.

-Let us now consider when $\eset{1} \in \binom{E_1}{\leq 2}$, i.e. patterns among
+Let us now consider when $\eset{1} \in \binom{E_1}{\leq 2}$, i.e. fixed subgraphs among
 \begin{itemize}
 	\item $2$-matching ($\twodis$), $2$-path ($\twopath$), $1$ edge ($\ed$)
 \end{itemize}
-When $|\eset{1}| = 2$, we can only pick one from each of two pairs, $\pbrace{(e_1, 0), (e_1, 1)}$ and $\pbrace{(e_2, 0), (e_2, 1)}$.  This implies that a $3$-matching cannot exist in $f_2^{-1}(\eset{1})$.  The same argument holds for $|\eset{1}| = 1$, where we can only pick one edge from the pair $\pbrace{(e_1, 0), (e_1, 1)}$.  Trivially, no $3$-matching exists in $f_2^{-1}(\eset{1})$ either.
+When $|\eset{1}| = 2$, we can only pick one from each of two pairs, $\pbrace{(e_1, 0), (e_1, 1)}$ and $\pbrace{(e_2, 0), (e_2, 1)}$.  The third edge for any set of edges in $f_2^{-1}(\esetType{1})$ would break the disjoint property of a $3$-path. Thus, a $3$-matching cannot exist in $f_2^{-1}(\eset{1})$.  A similar argument holds for $|\eset{1}| = 1$, where the output of $f_2^{-1}$ is $\{\emptyset\}$ since there are not enough edges in the input to produce any other output.  %we can only pick one edge from the pair $\pbrace{(e_1, 0), (e_1, 1)}$.  Trivially, no $3$-matching exists in $f_2^{-1}(\eset{1})$ either.

 Observe that all of the arguments above focused solely on the property of subgraph $\sg{1}$ being isomorphmic.  In other words, all $\eset{1}$ of a given ``shape'' yield the same number of $3$-matchings in $f_2^{-1}(\eset{1})$, and this is why we get the required identity using the above case analysis.
 \qed
--- a/app_one-pass-analysis.tex
+++ b/app_one-pass-analysis.tex
@ -101,38 +101,7 @@ level 2/.style={sibling distance=0.7cm},
 \end{figure}


-\subsection{\onepass}
-%%%%%%%%%%%%%%%%REDUCE ALGO PSEUDOCODE%%%%%%%%%%%
-%\begin{algorithm}[h!]
-%	\caption{\reduce$(\circuit)$}
-%	\label{alg:reduce}
-%	\begin{algorithmic}[1]
-%		\Require \circuit: Circuit
-%		\Ensure \circuit: Reduced Circuit	
-%		\For{\gate in \topord(\circuit)}\label{alg:reduce-loop}\Comment{\topord($\cdot$) is the topological order of \circuit}
-%			\If{\gate.\type $=$ \var}\label{alg:reduce-var}
-%				\State \gate.\degval $\gets 1$\label{alg:reduce-add-deg}
-%			\ElsIf{\gate.\type $=$ \tnum}\label{alg:reduce-num}
-%				\State \gate.\degval $\gets 0$\label{alg:reduce-no-deg}
-%			\ElsIf{\gate.\type $= \circmult$}\label{alg:reduce-mult}
-%				\State \gate.\degval $\gets \gate_\linput.\degval + \gate_\rinput.\degval$
-%				\If{\gate.\degval $= 0$}
-%					\State \gate.\type $\gets \tnum$
-%					\State $\gate.\val \gets \gate_\linput.\val \times \gate_\rinput.\val$
-%					\State $\gate_\linput, \gate_\rinput \gets \nullval$		
-%				\EndIf		
-%			\Else \label{alg:reduce-plus}
-%				\State \gate.\degval $\gets \max(\gate_\linput.\degval, \gate_\rinput.\degval)$
-%				\If{\gate.\degval $= 0$}
-%					\State \gate.\type $\gets \tnum$
-%					\State $\gate.\val \gets \gate_\linput.\val + \gate_\rinput.\val$
-%					\State $\gate_\linput, \gate_\rinput \gets \nullval$
-%				\EndIf
-%			\EndIf
-%		\EndFor
-%		\State \Return $\circuit$
-%	\end{algorithmic}
-%\end{algorithm}
+%\subsection{\onepass}

 \begin{algorithm}[h!]
 	\caption{\onepass$(\circuit)$}
@ -140,9 +109,8 @@ level 2/.style={sibling distance=0.7cm},
 	\begin{algorithmic}[1]
 		\Require \circuit: Circuit
 		\Ensure \circuit: Annotated Circuit
-		\Ensure \vari{sum} $\in \domR$
-		\State $\circuit' \gets \reduce(\circuit)$
-		\For{\gate in \topord(\circuit')}\label{alg:one-pass-loop}\Comment{\topord($\cdot$) is the topological order of \circuit}
+		\Ensure \vari{sum} $\in \domN$
+		\For{\gate in \topord(\circuit)}\label{alg:one-pass-loop}\Comment{\topord($\cdot$) is the topological order of \circuit}
 			\If{\gate.\type $=$ \var}
 				\State \gate.\prt $\gets 1$\label{alg:one-pass-var}
 			\ElsIf{\gate.\type $=$ \tnum}
@ -156,24 +124,24 @@ level 2/.style={sibling distance=0.7cm},
 			\EndIf
 			\State \vari{sum} $\gets \gate.\prt$
 		\EndFor
-		\State \Return (\vari{sum}, $\circuit'$) 
+		\State \Return (\vari{sum}, $\circuit$) 
 	\end{algorithmic}
 \end{algorithm}

-\subsection{Proof of  \Cref{lem:one-pass}}\label{sec:proof-one-pass}
-\begin{proof}
-We prove the correct computation of \prt, \lwght, \rwght values on \circuit by induction over the number of iterations in line~\ref{alg:one-pass-loop} over the topological order \topord of the input circuit \circuit.  Note that \topord is the standard definition of a topological ordering over the DAG structure of \circuit.
+\subsection{\onepass Proof}\label{sec:proof-one-pass}
+\begin{proof}[Proof of \Cref{lem:one-pass}]
+We prove the correct computation of \prt, \lwght, \rwght values on \circuit by induction over the number of iterations in  the topological order \topord (line~\ref{alg:one-pass-loop}) of the input circuit \circuit.  Note that \topord follows the standard definition of a topological ordering over the DAG structure of \circuit.

-For the base case, we have only one gate, which by definition is a source gate and must be either \var or \tnum.  In this case, as per \Cref{eq:T-all-ones}, lines~\ref{alg:one-pass-var} and~\ref{alg:one-pass-num} correctly compute \circuit.\prt as $1$ and \circuit.\val respectively.
+For the base case, we have only one gate, which by definition is a source gate and must be either \var or \tnum.  In this case, as per \cref{eq:T-all-ones}, lines~\ref{alg:one-pass-var} and~\ref{alg:one-pass-num} correctly compute \circuit.\prt as $1$ and \circuit.\val respectively.

 For the inductive hypothesis, assume that \onepass correctly computes \subcircuit.\prt, \subcircuit.\lwght, and \subcircuit.\rwght for all gates \gate in \circuit with $k \geq 0$ iterations over \topord.  
-
+\AH{Notes above:  Algo uses Reduce, but we don't use that anymore.  The figure needs to change to a circuit.}
 We now prove for $k + 1$ iterations that \onepass correctly computes the \prt, \lwght, and \rwght values for each gate $\gate_\vari{i}$ in \circuit for $i \in [k + 1]$.
-Note that the $\gate_\vari{k + 1}$ must be in the last ordering of all gates $\gate_\vari{i}$.  It is also the case that $\gate_{k+1}$ has  two inputs.  Finally, note that for \size(\circuit) > 1, if $\gate_{k+1}$ is a leaf node, we are back to the base case.  Otherwise $\gate_{k + 1}$ is an internal node $\gate_\vari{s}.\type = \circplus$ or $\gate_\vari{s}.\type = \circmult$.
+Note that the $\gate_\vari{k + 1}$ must be in the last ordering of all gates $\gate_\vari{i}$.  Note that for \size(\circuit) > 1, if $\gate_{k+1}$ is a leaf node, we are back to the base case.  Otherwise $\gate_{k + 1}$ is an internal node $\gate_\vari{s}.\type = \circplus$ or $\gate_\vari{s}.\type = \circmult$, which both require binary input.

-When $\gate_{k+1}.\type = \circplus$, then by line~\ref{alg:one-pass-plus} $\gate_{k+1}$.\prt $= \gate_{{k+1}_\lchild}$.\prt $+ \gate_{{k+1}_\rchild}$.\prt, a correct computation, as per \Cref{eq:T-all-ones}.  Further, lines~\ref{alg:one-pass-lwght} and~\ref{alg:one-pass-rwght} compute $\gate_{{k+1}}.\lwght = \frac{\gate_{{k+1}_\lchild}.\prt}{\gate_{{k+1}}.\prt}$ and analogously for $\gate_{{k+1}}.\rwght$.  Note that all values needed for each computation have been correctly computed by the inductive hypothesis.
+When $\gate_{k+1}.\type = \circplus$, then by line~\ref{alg:one-pass-plus} $\gate_{k+1}$.\prt $= \gate_{{k+1}_\lchild}$.\prt $+ \gate_{{k+1}_\rchild}$.\prt, a correct computation, as per \cref{eq:T-all-ones}.  Further, lines~\ref{alg:one-pass-lwght} and~\ref{alg:one-pass-rwght} compute $\gate_{{k+1}}.\lwght = \frac{\gate_{{k+1}_\lchild}.\prt}{\gate_{{k+1}}.\prt}$ and analogously for $\gate_{{k+1}}.\rwght$.  Note that all values needed for each computation have been correctly computed by the inductive hypothesis.

-When $\gate_{k+1}.\type = \circmult$, then line~\ref{alg:one-pass-mult} computes $\gate_{k+1}.\prt = \gate_{{k+1}_\lchild.\prt} \circmult \gate_{{k+1}_\rchild}.\prt$, which indeed is correct, as per \Cref{eq:T-all-ones}.
+When $\gate_{k+1}.\type = \circmult$, then line~\ref{alg:one-pass-mult} computes $\gate_{k+1}.\prt = \gate_{{k+1}_\lchild.\prt} \circmult \gate_{{k+1}_\rchild}.\prt$, which indeed by \cref{eq:T-all-ones} is correct.

 \paragraph*{Runtime Analysis}
 It is known that $\topord(G)$ is computable in linear time.  Next, each of the $\size(\circuit)$ iterations of the loop in \Cref{alg:one-pass-loop} take $O\left( \multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit)}}\right)$ time.  It is easy to see that each of all the numbers which the algorithm computes is at most $\abs{\circuit}(1,\dots,1)$. Hence, by definition each such operation takes $\multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit)}}$ time, which proves the claimed runtime.
--- a/app_samp-monom-analysis.tex
+++ b/app_samp-monom-analysis.tex
@ -22,26 +22,20 @@ For the base case, let the depth $d$ of $\circuit$ be $0$.  We have that the roo

 For the inductive hypothesis, assume that for $d \leq k$ for some $k \geq 0$, that it is indeed the case that $\sampmon$ returns a monomial.

-For the inductive step, let us take a circuit $\circuit$ with $d = k + 1$.  Note that each input has depth $d \leq k$, and by inductive hypothesis both of them return a valid monomial.  Then the root can be either a $\circplus$ or $\circmult$ node.  For the case of a $\circplus$ root node, line~\ref{alg:sample-plus-bsamp} of $\sampmon$ will choose one of the inputs of the root.  By inductive hypothesis it is the case that a monomial in \expansion{\circuit} is being returned from either input.  Then it follows that for the case of $+$ root node a valid monomial is returned by $\sampmon$.  When the root is a $\circmult$ node, line~\ref{alg:sample-times-union} %and~\ref{alg:sample-times-product} multiply
-computes the set union of the monomials returned by the two inputs of the root, and it is trivial to see
-%by definition~\ref{def:monomial}
-%the product of two monomials is also a monomial, and
-by \Cref{def:expand-circuit} that \monom is a valid monomial in some $(\monom, \coef) \in \expansion{\circuit}$.
+For the inductive step, let us take a circuit $\circuit$ with $d = k + 1$.  Note that each input has depth $d \leq k$, and by inductive hypothesis both of them return a valid monomial.  Then the sink can be either a $\circplus$ or $\circmult$ gate.  For the case when $\circuit.\type = \circplus$, line~\ref{alg:sample-plus-bsamp} of $\sampmon$ will choose one of the inputs of the source.  By inductive hypothesis it is the case that a monomial $\monom$ in some $(\monom, \coef)$ of $\expansion{\circuit}$ is being returned from either input.  Then it follows when $\circuit.\type = \circplus$ that a valid monomial is returned by $\sampmon$.  When the $\circuit.\type = \circmult$, line~\ref{alg:sample-times-union} computes the set union of the monomials returned by the two inputs of the root, and it is trivial to see by \cref{def:expand-circuit} that \monom is a valid monomial in some $(\monom, \coef)$ of $\expansion{\circuit}$.

 We will next prove by induction on the depth $d$ of $\circuit$ that the $(\monom,\coef) \in \expansion{\circuit}$ is the \monom returned by $\sampmon$ with a probability %`that is in accordance with the monomial sampled,
 $\frac{|\coef|}{\abs{\circuit}\polyinput{1}{1}}$.

-For the base case $d = 0$, by definition~\ref{def:circuit} we know that the root has to be either a coefficient or a variable.  For either case, the probability of the value returned is $1$ since there is only one value to sample from.  When the root is a variable $x$ the algorithm correctly returns $(\{x\}, 1 )$.  When the root is a coefficient, \sampmon ~correctly returns $(\{~\}, sign(\coef_i))$.
+For the base case $d = 0$, by definition~\ref{def:circuit} we know that the source has to be either a coefficient or a variable.  For either case, the probability of the value returned is $1$ since there is only one value to sample from.  When the root is a variable $x$ the algorithm correctly returns $(\{x\}, 1 )$.  When the root is a coefficient, \sampmon ~correctly returns $(\{~\}, sign(\coef_i))$.

 For the inductive hypothesis, assume that for $d \leq k$ and $k \geq 0$ $\sampmon$ indeed samples $\monom$ in $(\monom, \coef)$ in $\expansion{\circuit}$ with probability $\frac{|\coef|}{\abs{\circuit}\polyinput{1}{1}}$.%bove is true.%lemma~\ref{lem:sample} is true.

-We prove now for $d = k + 1$ the inductive step holds.  It is the case that the root of $\circuit$ has up to two inputs $\circuit_\linput$ and $\circuit_\rinput$.  Since $\circuit_\linput$ and $\circuit_\rinput$ are both depth $d \leq k$, by inductive hypothesis, $\sampmon$ will sample both monomials $\monom_\lchild$ in $(\monom_\lchild, \coef_\lchild)$ of $\expansion{\circuit_\linput}$ and $\monom_\rchild$ in $(\monom_\rchild, \coef_\rchild)$ of $\expansion{\circuit_\rinput}$, from $\circuit_\linput$ and $\circuit_\rinput$ with probability $\frac{|\coef_\lchild|}{\abs{\circuit_\linput}\polyinput{1}{1}}$ and $\frac{|\coef_\rchild|}{\abs{\circuit_\rinput}\polyinput{1}{1}}$.
+We prove now for $d = k + 1$ the inductive step holds.  It is the case that the sink of $\circuit$ has up to two inputs $\circuit_\linput$ and $\circuit_\rinput$.  Since $\circuit_\linput$ and $\circuit_\rinput$ are both depth $d \leq k$, by inductive hypothesis, $\sampmon$ will sample both monomials $\monom_\lchild$ in $(\monom_\lchild, \coef_\lchild)$ of $\expansion{\circuit_\linput}$ and $\monom_\rchild$ in $(\monom_\rchild, \coef_\rchild)$ of $\expansion{\circuit_\rinput}$, from $\circuit_\linput$ and $\circuit_\rinput$ with probability $\frac{|\coef_\lchild|}{\abs{\circuit_\linput}\polyinput{1}{1}}$ and $\frac{|\coef_\rchild|}{\abs{\circuit_\rinput}\polyinput{1}{1}}$.

-The root has to be either a $\circplus$ or $\circmult$ node.
+Consider the case when $\circuit.\type = \circmult$.  Note that we are sampling a term from $\expansion{\circuit}$.  Consider $(\monom, \coef)$ in $\expansion{\circuit}$, where $\monom$ is the sampled monomial.  Notice also that it is the case that $\monom = \monom_\lchild \circmult \monom_\rchild$, where $\monom_\lchild$ is coming from $\circuit_\linput$ and $\monom_\rchild$ from $\circuit_\rinput$.  The probability that \sampmon$(\circuit_{\lchild})$ returns $\monom_\lchild$ is $\frac{|\coef_{\monom_\lchild}|}{|\circuit_\linput|(1,\ldots, 1)}$ and $\frac{|\coef_{\monom_\rchild}|}{\abs{\circuit_\rinput}\polyinput{1}{1}}$ for $\monom_\rchild$.  Since both $\monom_\lchild$ and $\monom_\rchild$ are sampled with independent randomness, the final probability for sample $\monom$ is then $\frac{|\coef_{\monom_\lchild}| \cdot |\coef_{\monom_\rchild}|}{|\circuit_\linput|(1,\ldots, 1) \cdot |\circuit_\rinput|(1,\ldots, 1)}$.  For $(\monom, \coef)$ in $\expansion{\circuit}$, by \cref{def:expand-circuit} it is indeed the case that $|\coef| = |\coef_{\monom_\lchild}| \cdot |\coef_{\monom_\rchild}|$ and that (as shown in \cref{eq:T-all-ones}) $\abs{\circuit}(1,\ldots, 1) = |\circuit_\linput|(1,\ldots, 1) \cdot |\circuit_\rinput|(1,\ldots, 1)$, and therefore $\monom$ is sampled with correct probability $\frac{|\coef|}{\abs{\circuit}(1,\ldots, 1)}$.

-Consider the case when the root is $\circmult$.  Note that we are sampling a term from $\expansion{\circuit}$.  Consider $(\monom, \coef)$ in $\expansion{\circuit}$, where $\monom$ is the sampled monomial.  Notice also that it is the case that $\monom = \monom_\lchild \circmult \monom_\rchild$, where $\monom_\lchild$ is coming from $\circuit_\linput$ and $\monom_\rchild$ from $\circuit_\rinput$.  The probability that \sampmon$(\circuit_{\lchild})$ returns $\monom_\lchild$ is $\frac{|\coef_{\monom_\lchild}|}{|\circuit_\linput|(1,\ldots, 1)}$ and $\frac{|\coef_{\monom_\rchild}|}{\abs{\circuit_\rinput}\polyinput{1}{1}}$ for $\monom_\rchild$.  Since both $\monom_\lchild$ and $\monom_\rchild$ are sampled with independent randomness, the final probability for sample $\monom$ is then $\frac{|\coef_{\monom_\lchild}| \cdot |\coef_{\monom_\rchild}|}{|\circuit_\linput|(1,\ldots, 1) \cdot |\circuit_\rinput|(1,\ldots, 1)}$.  For $(\monom, \coef)$ in \expansion{\circuit}, it is indeed the case that $|\coef| = |\coef_{\monom_\lchild}| \cdot |\coef_{\monom_\rchild}|$ and that $\abs{\circuit}(1,\ldots, 1) = |\circuit_\linput|(1,\ldots, 1) \cdot |\circuit_\rinput|(1,\ldots, 1)$, and therefore $\monom$ is sampled with correct probability $\frac{|\coef|}{\abs{\circuit}(1,\ldots, 1)}$.
-
-For the case when $\circuit.\val = \circplus$, \sampmon ~will sample monomial $\monom$ from one of its inputs.  By inductive hypothesis we know that any $\monom_\lchild$ in $\expansion{\circuit_\linput}$ and any $\monom_\rchild$ in $\expansion{\circuit_\rinput}$ will both be sampled with correct probability $\frac{|\coef_{\monom_\lchild}|}{\circuit_{\lchild}(1,\ldots, 1)}$ and $\frac{|\coef_{\monom_\rchild}|}{|\circuit_\rinput|(1,\ldots, 1)}$, where either $\monom_\lchild$ or $\monom_\rchild$ will equal $\monom$, depending on whether $\circuit_\linput$ or $\circuit_\rinput$ is sampled.  Assume that $\monom$ is sampled from $\circuit_\linput$, and note that a symmetric argument holds for the case when $\monom$ is sampled from $\circuit_\rinput$.  Notice also that the probability of choosing $\circuit_\linput$ from $\circuit$ is $\frac{\abs{\circuit_\linput}\polyinput{1}{1}}{\abs{\circuit_\linput}\polyinput{1}{1} + \abs{\circuit_\rinput}\polyinput{1}{1}}$ as computed by $\onepass$.  Then, since $\sampmon$ goes top-down, and each sampling choice is independent (which follows from the randomness in the root of $\circuit$ being independent from the randomness used in its subtrees), the probability for $\monom$ to be sampled from $\circuit$ is equal to the product of the probability that $\circuit_\linput$ is sampled from $\circuit$ and $\monom$ is sampled in $\circuit_\linput$, and
+For the case when $\circuit.\type = \circplus$, \sampmon ~will sample monomial $\monom$ from one of its inputs.  By inductive hypothesis we know that any $\monom_\lchild$ in $\expansion{\circuit_\linput}$ and any $\monom_\rchild$ in $\expansion{\circuit_\rinput}$ will both be sampled with correct probability $\frac{|\coef_{\monom_\lchild}|}{\circuit_{\lchild}(1,\ldots, 1)}$ and $\frac{|\coef_{\monom_\rchild}|}{|\circuit_\rinput|(1,\ldots, 1)}$, where either $\monom_\lchild$ or $\monom_\rchild$ will equal $\monom$, depending on whether $\circuit_\linput$ or $\circuit_\rinput$ is sampled.  Assume that $\monom$ is sampled from $\circuit_\linput$, and note that a symmetric argument holds for the case when $\monom$ is sampled from $\circuit_\rinput$.  Notice also that the probability of choosing $\circuit_\linput$ from $\circuit$ is $\frac{\abs{\circuit_\linput}\polyinput{1}{1}}{\abs{\circuit_\linput}\polyinput{1}{1} + \abs{\circuit_\rinput}\polyinput{1}{1}}$ as computed by $\onepass$.  Then, since $\sampmon$ goes top-down, and each sampling choice is independent (which follows from the randomness in the root of $\circuit$ being independent from the randomness used in its subtrees), the probability for $\monom$ to be sampled from $\circuit$ is equal to the product of the probability that $\circuit_\linput$ is sampled from $\circuit$ and $\monom$ is sampled in $\circuit_\linput$, and
 \begin{align*}
 &\probOf(\sampmon(\circuit) = \monom) = \\
 &\probOf(\sampmon(\circuit_\linput) = \monom) \cdot \probOf(SampledChild(\circuit) = \circuit_\linput)\\
@ -53,7 +47,7 @@ and we obtain the desired result.


 \paragraph*{Run-time Analysis}
-It is easy to check that except for lines~\ref{alg:sample-times-union} and~\ref{alg:sample-plus-bsamp}, all lines take $O(1)$ time.  For \Cref{alg:sample-times-union}, consider an execution of \Cref{alg:sample-times-union}. We note that we will be adding a given set of variables to some set at most once: since the sum of the sizes of the sets at a given level is at most $\degree(\circuit)$, each gate visited takes $O(\log{\degree(\circuit)})$.  For \Cref{alg:sample-plus-bsamp}, note that we pick $\circuit_\linput$ with probability $\frac a{a+b}$ where $a=\circuit.\vari{Lweight}$ and $b=\circuit.\vari{Rweight}$. We can implement this step by picking a random number $r\in[a+b]$ and then checking if $r\le a$. It is easy to check that $a+b\le \abs{\circuit}(1,\dots,1)$. This means we need to add and compare $\log{\abs{\circuit}(1,\ldots, 1)}$-bit numbers, which can certainly be done in time $\multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit)}}$ (note that this is an over-estimate).
+It is easy to check that except for lines~\ref{alg:sample-plus-bsamp} and~\ref{alg:sample-times-union}, all lines take $O(1)$ time.  Consider an execution of \cref{alg:sample-times-union}. We note that we will be adding a given set of variables to some set at most once: since the sum of the sizes of the sets at a given level is at most $\degree(\circuit)$, each gate visited takes $O(\log{\degree(\circuit)})$.  For \Cref{alg:sample-plus-bsamp}, note that we pick $\circuit_\linput$ with probability $\frac a{a+b}$ where $a=\circuit.\vari{Lweight}$ and $b=\circuit.\vari{Rweight}$. We can implement this step by picking a random number $r\in[a+b]$ and then checking if $r\le a$. It is easy to check that $a+b\le \abs{\circuit}(1,\dots,1)$. This means we need to add and compare $\log{\abs{\circuit}(1,\ldots, 1)}$-bit numbers, which can certainly be done in time $\multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit)}}$ (note that this is an over-estimate).
 % we have $> O(1)$ time when $\abs{\circuit}(1,\ldots, 1) > \size(\circuit)$.  when this is the case that for each sample, we have $\frac{\log{\abs{\circuit}(1,\ldots, 1)}}{\log{\size(\circuit)}}$ operations, since we need to read in and then compare numbers of of $\log{{\abs{\circuit}(1,\ldots, 1)}}$ bits.  
 Denote \cost(\circuit) (\Cref{eq:cost-sampmon}) to be an upper bound of the number of nodes visited by \sampmon.  Then the runtime is $O\left(\cost(\circuit)\cdot \log{\degree(\circuit)}\cdot \multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit)}}\right)$.

@ -70,14 +64,14 @@ Let \cost$(\cdot)$ be a function that models an upper bound on the number of gat
 		\end{cases}\label{eq:cost-sampmon}
 \end{equation}

-First note that the number of gates visited in \sampmon is $\leq\cost(\circuit)$.  To show that \Cref{eq:cost-sampmon} upper bounds the number of nodes visited by \sampmon, note that when \sampmon visits a gate such that \circuit.\type $ =\circmult$, line~\ref{alg:sample-times-for-loop} visits each input of \circuit, as defined in (\ref{eq:cost-sampmon}).  For the case when \circuit.\type $= \circplus$, line~\ref{alg:sample-plus-bsamp} visits exactly one of the input gates, which may or may not be the subcircuit with the maximum number of gates traversed, which makes \cost$(\cdot)$ an upperbound.  Finally, it is trivial to see that when \circuit.\type $\in \{\var, \tnum\}$, i.e., a source gate, that only one gate is visited.
+First note that the number of gates visited in \sampmon is $\leq\cost(\circuit)$.  To show that \cref{eq:cost-sampmon} upper bounds the number of nodes visited by \sampmon, note that when \sampmon visits a gate such that \circuit.\type $ =\circmult$, line~\ref{alg:sample-times-for-loop} visits each input of \circuit, as defined in (\ref{eq:cost-sampmon}).  For the case when \circuit.\type $= \circplus$, line~\ref{alg:sample-plus-bsamp} visits exactly one of the input gates, which may or may not be the subcircuit with the maximum number of gates traversed, which makes \cost$(\cdot)$ an upperbound.  Finally, it is trivial to see that when \circuit.\type $\in \{\var, \tnum\}$, i.e., a source gate, that only one gate is visited.

 We prove the following inequality holds.
 \begin{equation}
 2\left(\degree(\circuit) + 1\right) \cdot \depth(\circuit) + 1 \geq \cost(\circuit)\label{eq:strict-upper-bound}
 \end{equation} 

-Note that \Cref{eq:strict-upper-bound} implies the claimed runtime.  We prove \Cref{eq:strict-upper-bound} for the number of gates traversed in \sampmon using induction over $\depth(\circuit)$.  Recall how degree is defined in \Cref{def:degree}.
+Note that \cref{eq:strict-upper-bound} implies the claimed runtime.  We prove \cref{eq:strict-upper-bound} for the number of gates traversed in \sampmon using induction over $\depth(\circuit)$.  Recall how degree is defined in \cref{def:degree}.

 For the base case $\degree(\circuit) = \depth(\circuit) = 0$, $\cost(\circuit) = 1$, and it is trivial to see that the inequality $2\degree(\circuit) \cdot \depth(\circuit) + 1 \geq \cost(\circuit)$ holds.

@ -86,24 +80,23 @@ Now consider the case when \sampmon has an arbitrary circuit \circuit input with
 \begin{equation}
 2\left(\degree(\circuit_i) + 1\right)\cdot \depth(\circuit_i) + 1 \geq \cost(\circuit_i).\label{eq:ih-bound-cost}
 \end{equation}
+In particular, since for any $i$, \cref{eq:ih-bound-cost} holds, then it immediately follows that an inequality whose operands consist of a sum of the aforementioned inequalities must also hold.  This is readily seen in the inequality of \cref{eq:times-middle} and \cref{eq:times-rhs}, where $2\inparen{\degree(\circuit_\linput) + 1}\cdot \depth(\circuit_\linput) \geq \cost(\circuit_\linput)$, likewise for $\circuit_\rinput$, and $1\geq 1$.
 It is also true that $\depth(\circuit_\linput) \leq \depth(\circuit) - 1$ and $\depth(\circuit_\rinput) \leq \depth(\circuit) - 1$.  

-If \circuit.\type $= \circplus$, then $\degree(\circuit) = \max\left(\degree(\circuit_\linput), \degree(\circuit_\rinput)\right)$.  Otherwise \circuit.\type = $\circmult$ and $\degree(\circuit) = \degree(\circuit_\linput) + \degree(\circuit_\rinput) + 1$.  In either case it is true that $\depth(\circuit) = \max(\depth(\circuit_\linput), \depth(\circuit_\rinput)) + 1$.
+If \circuit.\type $= \circplus$, then $\degree(\circuit) = \max\left(\degree(\circuit_\linput), \degree(\circuit_\rinput)\right)$.  Otherwise \circuit.\type = $\circmult$ and $\degree(\circuit) = \degree(\circuit_\linput) + \degree(\circuit_\rinput) + 1$.  In either case it is true that $\depth(\circuit) = \max\inparen{\depth(\circuit_\linput), \depth(\circuit_\rinput)} + 1$.

-If \circuit.\type $= \circmult$, then, 
-substituting values, the following should hold,  
+If \circuit.\type $= \circmult$, then, by \cref{eq:cost-sampmon}, substituting values, the following should hold,  
 \begin{align}
-&2\left(\degree(\circuit_\linput) + \degree(\circuit_\rinput) + 2\right) \cdot \left(\max(\depth(\circuit_\linput), \depth(\circuit_\rinput)) + 1\right) + 1 \nonumber\\%\label{eq:times-lhs}\\
+&2\left(\degree(\circuit_\linput) + \degree(\circuit_\rinput) + 2\right) \cdot \left(\max(\depth(\circuit_\linput), \depth(\circuit_\rinput)) + 1\right) + 1 \label{eq:times-lhs}\\
 &\qquad\geq 2\left(\degree(\circuit_\linput) + 1\right) \cdot \depth(\circuit_\linput) + 2\left(\degree(\circuit_\rinput) + 1\right)\cdot \depth(\circuit_\rinput) + 3\label{eq:times-middle} \\
 &\qquad\geq 1 + \cost(\circuit_\linput) + \cost(\circuit_\rinput) = \cost(\circuit) \label{eq:times-rhs}.
 \end{align}

-To prove (\ref{eq:times-middle}), first, the LHS expands to, %\Cref{eq:times-lhs},  
+To prove (\ref{eq:times-middle}), first, \cref{eq:times-lhs} expands to,   
 \begin{equation}
-%(\ref{eq:times-lhs}) 
 2\degree(\circuit_\linput)\cdot\depth_{\max} + 2\degree(\circuit_\rinput)\cdot\depth_{\max} + 4\depth_{\max} + 2\degree(\circuit_\linput) +  2\degree(\circuit_\rinput) + 4 + 1\label{eq:times-lhs-expanded}
 \end{equation}
-where $\depth_{\max}$ is used to denote the maximum depth of the two input subcircuits.  The RHS expands to
+where $\depth_{\max}$ is used to denote the maximum depth of the two input subcircuits.  \Cref{eq:times-middle} expands to
 \begin{equation}
 2\degree(\circuit_\linput)\cdot\depth(\circuit_\linput) + 2\depth(\circuit_\linput) + 2\degree(\circuit_\rinput)\cdot\depth(\circuit_\rinput) + 2\depth(\circuit_\rinput) + 3\label{eq:times-middle-expanded}
 \end{equation}
@ -132,22 +125,22 @@ then it is the case that \Cref{eq:times-lhs-middle} is \emph{always} true.
 %\end{equation*}
 %and \Cref{eq:times-lhs-middle-step1} follows.

-Now to justify (\ref{eq:times-rhs}) which holds for the following reasons.  First, the RHS %\Cref{eq:times-rhs} 
-is the result of \Cref{eq:cost-sampmon} when $\circuit.\type = \circmult$.  The LHS %\Cref{eq:times-middle}
-is then produced by substituting the upperbound of (\ref{eq:ih-bound-cost}) for each $\cost(\circuit_i)$, trivially establishing the upper bound of (\ref{eq:times-rhs}).  This proves \Cref{eq:strict-upper-bound} for the $\circmult$ case.
+Now to justify (\ref{eq:times-rhs}) which holds for the following reasons.  First, \cref{eq:times-rhs} %\Cref{eq:times-rhs} 
+is the result of \Cref{eq:cost-sampmon} when $\circuit.\type = \circmult$.  \Cref{eq:times-middle} %\Cref{eq:times-middle}
+is then produced by substituting the upperbound of (\ref{eq:ih-bound-cost}) for each $\cost(\circuit_i)$, trivially establishing the upper bound of (\ref{eq:times-rhs}).  This proves \cref{eq:strict-upper-bound} for the $\circmult$ case.

 For the case when \circuit.\type $= \circplus$, substituting values yields
 \begin{align}
-&2\left(\max(\degree(\circuit_\linput), \degree(\circuit_\rinput)) + 1\right) \cdot \left(\max(\depth(\circuit_\linput), \depth(\circuit_\rinput)) + 1\right) +1\nonumber\\%\label{eq:plus-lhs-inequality}\\
+&2\left(\max(\degree(\circuit_\linput), \degree(\circuit_\rinput)) + 1\right) \cdot \left(\max(\depth(\circuit_\linput), \depth(\circuit_\rinput)) + 1\right) +1\label{eq:plus-lhs-inequality}\\
 &\qquad \geq \max\left(2\left(\degree(\circuit_\linput) + 1\right) \cdot \depth(\circuit_\linput) + 1, 2\left(\degree(\circuit_\rinput) + 1\right) \cdot \depth(\circuit_\rinput) +1\right) + 1\label{eq:plus-middle}\\
 &\qquad \geq 1 + \max(\cost(\circuit_\linput), \cost(\circuit_\rinput)) = \cost(\circuit)\label{eq:plus-rhs}
 \end{align}

-To prove  (\ref{eq:plus-middle}), the LHS expands to %(\ref{eq:plus-lhs-inequality}) as
+To prove  (\ref{eq:plus-middle}), \cref{eq:plus-lhs-inequality} expands to
 \begin{equation}
 2\degree_{\max}\depth_{\max} + 2\degree_{\max} + 2\depth_{\max} + 2 + 1.\label{eq:plus-lhs-expanded}
 \end{equation}
-Since $\degree_{\max} \cdot \depth_{\max} \geq \degree(\circuit_i)\cdot \depth(\circuit_i),$ the following upper bound holds for the expanded RHS of (\ref{eq:plus-middle}):
+Since $\degree_{\max} \cdot \depth_{\max} \geq \degree(\circuit_i)\cdot \depth(\circuit_i),$ the following upper bound holds for the expansion of \cref{eq:plus-middle}:
 \begin{equation}
 2\degree_{\max}\depth_{\max} + 2\depth_{\max} + 2 %\geq  \max\left(2\degree(\circuit_\linput) \cdot \depth(\circuit_\linput) + 1, 2\degree(\circuit_\rinput) \cdot \depth(\circuit_\rinput) +1\right) + 1.
 \label{eq:plus-middle-expanded}
--- a/approx_alg.tex
+++ b/approx_alg.tex
@ -21,7 +21,7 @@ We now introduce useful definitions and notation related to circuits and polynom


 \begin{Definition}[$\expansion{\circuit}$]\label{def:expand-circuit}
-For a circuit $\circuit$, we define $\expansion{\circuit}$ as a list of tuples $(\monom, \coef)$, where $\monom$ is a set of variables and $\coef \in \domR$.
+For a circuit $\circuit$, we define $\expansion{\circuit}$ as a list of tuples $(\monom, \coef)$, where $\monom$ is a set of variables and $\coef \in \domN$.
 $\expansion{\circuit}$ has the following recursive definition ($\circ$ is list concatenation).

 $\expansion{\circuit} =
@ -83,6 +83,7 @@ To get linear runtime results from \Cref{lem:approx-alg}, we will need to define
 \begin{Definition}[Parameter $\gamma$]\label{def:param-gamma}
 Given an expression tree $\circuit$, define
 \AH{Technically, $\monom$ is a set of variables rather than a monomial.  Perhaps we don't need the $\var(\cdot)$ function and can replace is with a function that returns the monomial represented by a set of variables.}
+\AH{To add, this is an issue on line 1073, 1117 of app C.}
 \[\gamma(\circuit)=\frac{\sum_{(\monom, \coef)\in \expansion{\circuit}} \abs{\coef}\cdot \indicator{\monom\mod{\mathcal{B}}\equiv 0}}{\abs{\circuit}(1,\ldots, 1)}\]
 \end{Definition}

--- a/experiments.tex
+++ b/experiments.tex
@ -5,9 +5,9 @@ Recall that by definition of $\bi$, a query result cannot be derived by a self-j

 We ran our experiments using Windows 10 WSL Operating System with an Intel Core i7 2.40GHz processor and 16GB RAM.  All experiments used the PostgreSQL 13.0 database system.

-For the data we used the MayBMS data generator~\cite{pdbench} tool to randomly generate uncertain versions of TPCH tables.  The queries computed over the database instance are $\poly_1$, $\poly_2$, and $\poly_3$ from~\cite{Antova_fastand}, all of which are modified versions of TPC-H queries $\poly_3$, $\poly_6$, and $\poly_7$ where all aggregations have been dropped.
+For the data we used the MayBMS data generator~\cite{pdbench} tool to randomly generate uncertain versions of TPCH tables.  The queries computed over the database instance are $\query_1$, $\query_2$, and $\query_3$ from~\cite{Antova_fastand}, all of which are modified versions of TPC-H queries $\query_3$, $\query_6$, and $\query_7$ where all aggregations have been dropped.

-As written, the queries disallow $\bi$ cross terms.  We first ran all queries, noting the result size for each.  Next the queries were rewritten so as not to filter out the cross terms.  The comparison of the sizes of both result sets should then suggest in one way or another whether or not there exist many cross terms in practice.  As seen, the experimental query results contain little to no cancelling terms.  \Cref{fig:experiment-bidb-cancel} shows the result sizes of the queries, where column CF is the result size when all cross terms are filtered out, column CI shows the number of output tuples when the cancelled tuples are included in the result,  and the last column is the value of $\gamma$.  The experiments show $\gamma$ to be in a range between $[0, 0.1]\%$, indicating that only a negligible or constant (compare the result sizes of $\poly_1 < \poly_2$ and their respective $\gamma$ values) amount of tuples are cancelled in practice when running queries over a typical $\bi$ instance.  Interestingly, only one of the three queries had tuples that violated the $\bi$ constraint.
+As written, the queries disallow $\bi$ cross terms.  We first ran all queries, noting the result size for each.  Next the queries were rewritten so as not to filter out the cross terms.  The comparison of the sizes of both result sets should then suggest in one way or another whether or not there exist many cross terms in practice.  As seen, the experimental query results contain little to no cancelling terms.  \Cref{fig:experiment-bidb-cancel} shows the result sizes of the queries, where column CF is the result size when all cross terms are filtered out, column CI shows the number of output tuples when the cancelled tuples are included in the result,  and the last column is the value of $\gamma$.  The experiments show $\gamma$ to be in a range between $[0, 0.1]\%$, indicating that only a negligible or constant (compare the result sizes of $\query_1 < \query_2$ and their respective $\gamma$ values) amount of tuples are cancelled in practice when running queries over a typical $\bi$ instance.  Interestingly, only one of the three queries had tuples that violated the $\bi$ constraint.

 To conclude, the results in \Cref{fig:experiment-bidb-cancel} show experimentally that $\gamma$ is negligible in practice for BIDB queries.  We also observe that (i) tuple presence is independent across blocks, so the corresponding probabilities (and hence $\prob_0$) are independent of the number of blocks, and (ii) \bis model uncertain attributes, so block size (and hence $\gamma$) is a function of the ``messiness'' of a dataset, rather than its size.
 Thus, we expect the corollary to hold in general.
@ -24,3 +24,4 @@ Thus, we expect the corollary to hold in general.
 	\label{fig:experiment-bidb-cancel}
 \end{figure}

+\AH{The main to-dos for App C is to fix $\sampmon$ the same way we choose to fix precisely identifying the monomial contained in $\monom$.  The layout could be streamlined more.}
--- a/lin_sys.tex
+++ b/lin_sys.tex
@ -1,5 +1,5 @@
 %root: main.tex
-\AH{Just wanted to go over the math calculation in this before moving on.}
+
 \begin{proof}%[Proof of \Cref{lem:lin-sys}]
 The proof consists of two parts.  First we need to show that a vector $\vct{b}$ satisfying the linear system exists and further can be computed in $O(m)$ time.  Second we need to show that $\numocc{G}{\tri}, \numocc{G}{\threedis}$ can indeed be computed in time $O(1)$.

@ -22,13 +22,13 @@ To prove the first step, we use \Cref{lem:qE3-exp} to derive the following equal
 &= \frac{\rpoly_{G}^3(\prob,\ldots, \prob)}{6\prob^3} - \frac{\numocc{G}{\ed}}{6\prob} - \numocc{G}{\twopath}-\numocc{G}{\twodis}\prob-\numocc{G}{\oneint}\prob\label{eq:b1-alg-1}\\
 \numocc{G}{\tri}(1-3p) &- \numocc{G}{\threedis}(3\prob^2 -\prob^3) = \nonumber\\
 \frac{\rpoly_{G}^3(\prob,\ldots, \prob)}{6\prob^3} &- \frac{\numocc{G}{\ed}}{6\prob} - \numocc{G}{\twopath}-\numocc{G}{\twodis}\prob-\numocc{G}{\oneint}\prob\nonumber\\
-&-\left[\numocc{G}{\twopathdis}\prob^2+3\numocc{G}{\threedis}\prob^2\right]-\left[\numocc{G}{\threepath}\prob+3\numocc{G}{\tri}\prob\right]\label{eq:b1-alg-2}
+&-\left[\numocc{G}{\threepath}\prob+3\numocc{G}{\tri}\prob\right]-\left[\numocc{G}{\twopathdis}\prob^2+3\numocc{G}{\threedis}\prob^2\right]\label{eq:b1-alg-2}
 \end{align}
 \Cref{eq:lem-qE3-exp} is the result of \Cref{lem:qE3-exp}.  We obtain the remaining equations through standard algebraic manipulations.  

-Note that the LHS of \Cref{eq:b1-alg-2} is indeed the product $\vct{M}[1] \cdot \vct{x}[1]$.  Further note that this product is equal to the RHS of \Cref{eq:b1-alg-2}, where every term is computable in $O(m)$ time (by equations (\ref{eq:1e})-(\ref{eq:3p-3tri})).  We set $\vct{b}[1]$ to the RHS of \Cref{eq:b1-alg-2}.
+Note that the LHS of \Cref{eq:b1-alg-2} is obtained using \cref{eq:2pd-3d} and \cref{eq:3p-3tri} and is indeed the product $\vct{M}[1] \cdot \vct{x}[1]$.  Further note that this product is equal to the RHS of \Cref{eq:b1-alg-2}, where every term is computable in $O(m)$ time (by equations (\ref{eq:1e})-(\ref{eq:3p-3tri})).  We set $\vct{b}[1]$ to the RHS of \Cref{eq:b1-alg-2}.

-We follow the same process in deriving an equality for $G^{(2)}$.  Replacing occurrences of $G$ with $G^{(2)}$, we obtain \Cref{eq:b1-alg-2} for $G^{(2)}$.  Substituting identities from \Cref{lem:3m-G2} and \Cref{lem:tri} we obtain
+We follow the same process in deriving an equality for $G^{(2)}$.  Replacing occurrences of $G$ with $G^{(2)}$, we obtain an equation (below) of the form of \cref{eq:b1-alg-2} for $G^{(2)}$.  Substituting identities from \cref{lem:3m-G2} and \Cref{lem:tri} we obtain
 \begin{align}
 0-\left(8\numocc{G}{\threedis}\right.&\left.+6\numocc{G}{\twopathdis}+4\numocc{G}{\oneint}+4\numocc{G}{\threepath}+2\numocc{G}{\tri}(3\prob^2 -\prob^3)\right)=\nonumber\\
 &\frac{\rpoly_{\graph{2}}^3(\prob,\ldots, \prob)}{6\prob^3} - \frac{\numocc{\graph{2}}{\ed}}{6\prob} - \numocc{\graph{2}}{\twopath}-\numocc{\graph{2}}{\twodis}\prob-\numocc{\graph{2}}{\oneint}\prob\nonumber\\
@ -38,7 +38,7 @@ We follow the same process in deriving an equality for $G^{(2)}$.  Replacing occ
 &-\left[\numocc{\graph{2}}{\threepath}\prob+3\numocc{\graph{2}}{\tri}\prob\right]-\left[\numocc{\graph{2}}{\twopathdis}\prob^2-3\numocc{\graph{2}}{\threedis}\prob^2\right]\nonumber\\
 &+\left(4\numocc{G}{\oneint}+\left[6\numocc{G}{\twopathdis}+18\numocc{G}{\threedis}\right]+\left[4\numocc{G}{\threepath}+12\numocc{G}{\tri}\right]\right)(3\prob^2 - \prob^3)\label{eq:b2-final}   
 \end{align}
-As in the previous equality derivation for $G$, note that the LHS of \Cref{eq:b2-final} is the same as $\vct{M}[2]\cdot \vct{x}[2]$.  The RHS of \Cref{eq:b2-final} has terms all computable (by equations (\ref{eq:1e})-(\ref{eq:3p-3tri})) in $O(m)$ time.  Setting $\vct{b}[2]$ to the RHS then completes the proof of step 1.
+The steps to obtaining \cref{eq:b2-final} are analogous to the derivation immediately preceding.  As in the previous derivation, note that the LHS of \Cref{eq:b2-final} is the same as $\vct{M}[2]\cdot \vct{x}[2]$.  The RHS of \Cref{eq:b2-final} has terms all computable (by equations (\ref{eq:1e})-(\ref{eq:3p-3tri})) in $O(m)$ time.  Setting $\vct{b}[2]$ to the RHS then completes the proof of step 1.

 Note that if $\vct{M}$ has full rank then one can compute $\numocc{G}{\tri}$ and $\numocc{G}{\threedis}$ in $O(1)$ using Gaussian elimination.

--- a/macros.tex
+++ b/macros.tex
@ -198,6 +198,8 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \newcommand{\vset}{V}
 \newcommand{\edgeSet}{E}
+\newcommand{\gtype}[1]{\inparen{#1}}
+\newcommand{\esetType}[1]{\edgeSet^{\gtype{#1}}}%edge set for induced graph G^{\inparen{\ell}}
 \newcommand{\graph}[1]{G^{(#1)}}
 \newcommand{\numocc}[2]{\#\left(#1,#2\right)}
 \newcommand{\eset}[1]{E^{(#1)}_S} %edge set for arbitrary subgraph
--- a/prob-def.tex
+++ b/prob-def.tex
@ -11,7 +11,7 @@ We represent query polynomials via {\em arithmetic circuits}~\cite{arith-complex

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Definition}[Circuit]\label{def:circuit}
-A circuit $\circuit$ is a Directed Acyclic Graph (DAG) whose source gates (in degree of $0$) consist of elements in either $\domR$ or $\vct{X}$.  The internal gates and (the single) sink gate of $\circuit$ (corresponding to the result tuple $t$) have binary input and are either sum ($\circplus$) or product ($\circmult$) gates.
+A circuit $\circuit$ is a Directed Acyclic Graph (DAG) whose source gates (in degree of $0$) consist of elements in either $\domN$ or $\vct{X}$.  The internal gates and (the single) sink gate of $\circuit$ (corresponding to the result tuple $t$) have binary input and are either sum ($\circplus$) or product ($\circmult$) gates.
 %
 Each node in a circuit $\circuit$ has the following members: \type, \val, \vpartial, \vari{input}, \degval and \vari{Lweight}, \vari{Rweight}, where \type is the type of value stored in the gate (one of $\{\circplus, \circmult, \var, \tnum\}$, \val is the value stored (a constant or variable), and \vari{input} is the list of the gate's inputs. We use $\circuit_\linput$ to denote the left input and $\circuit_\rinput$ the right input of the sink of circuit $\circuit$.
 %The member \degval holds the degree of \circuit.