Done till correctness of main approx algo.

Still need to make pass on auxialliary algos
This commit is contained in:
Atri Rudra 2020-12-14 23:24:09 -05:00
parent fd23ea69f9
commit cba9d6adc5

View file

@ -258,7 +258,7 @@ Recall that the notation $[x, y]$ denotes the range of values between $x$ and $y
\State $(\vari{\etree}_\vari{mod}, \vari{size}) \gets $ \onepass($\etree$)\label{alg:mon-sam-onepass}\Comment{$\onepass$ is ~\cref{alg:one-pass}}
%\newline
%\State $\vari{i} \gets 1$
\For{$\vari{i} \in 1 \text{ to }\numsamp$}\Comment{Perform the required number of samples}
\For{$\vari{i} \in 1 \text{ to }\numsamp$}\label{alg:sampling-loop}\Comment{Perform the required number of samples}
%\State $\bivec \gets [0]^{\abs{\block}}$\Comment{$\bivec$ is an array whose size is the number of blocks, used to check for cross-terms}\newline
\State $(\vari{M}, \vari{sgn}_\vari{i}) \gets $ \sampmon($\etree_\vari{mod}$)\label{alg:mon-sam-sample}\Comment{\sampmon \; is ~\cref{alg:sample}}
%\For{$\vari{x}_\vari{\block,i}$ \text{ in } $\vari{M}$}
@ -280,7 +280,7 @@ Recall that the notation $[x, y]$ denotes the range of values between $x$ and $y
% \If{$\vari{sample}_{\vari{next}} = 1$}\label{alg:mon-sam-drop}
% \State $\vari{sample}_{\vari{next}} \gets 0$\label{alg:mon-sam-resamp}
% \Else
\If{$\vari{M}$ has at most one variable from each block}
\If{$\vari{M}$ has at most one variable from each block}\label{alg:check-duplicate-block}
\State $\vari{Y}_\vari{i} \gets \prod_{X_j\in\var\inparen{\vari{M}}}p_j$\label{alg:mon-sam-assign1}%\newline
%\For{$\vari{x}_{\vari{j}}$ \text{ in } $\vari{M}$}%_{\vari{i}}$}
% \State $\vari{Y}_\vari{i} \gets \vari{Y}_\vari{i} \times \; \vari{\prob}_\vari{j}$\label{alg:mon-sam-product2} \Comment{$\vari{p}_\vari{j}$ is the assignment to $\vari{x}_\vari{j}$ from input $\vct{p}$}
@ -345,91 +345,112 @@ In proving correctness of~\Cref{alg:mon-sam}, we will only use the following fac
The function $\sampmon$ completes in $O(\log{k} \cdot k \cdot depth(\etree))$ time, where $k = \degree(poly(\abs{\etree})$. Upon completion, every $\left(\monom, sign(\coef)\right)\in \expandtree{\abs{\etree}}$ is returned with probability $\frac{|\coef|}{\abs{\etree}(1,\ldots, 1)}$. %, $\sampmon$ returns the sampled term $\left(\monom, sign(\coef)\right)$ from $\expandtree{\abs{\etree}}$.
\end{Lemma}
Armed with the above two lemmas, we are ready to argue the following result:
\begin{Theorem}\label{lem:mon-samp}
If the contracts for $\onepass$ and $\sampmon$ hold, then for any $\etree$ with $\degree(poly(|\etree|)) = k$, algorithm \ref{alg:mon-sam} outputs an estimate $\empmean$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ such that $\expct\pbox{\empmean} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$. %within an additive $\error \cdot \abs{\etree}(1,\ldots, 1)$ error with
$\empmean$ has bounds $P\left(\left|\empmean - \expct\pbox{\empmean}\right|\geq \error \cdot \frac{\abs{\etree}(1,\ldots, 1)}{1 - \gamma}\right) \leq \conf$, in $O\left(\treesize(\etree)\right.$ $+$ $\left.\left(\frac{\log{\frac{1}{\conf}}}{\error^2} \cdot k \cdot\log{k} \cdot depth(\etree)\right)\right)$ time.
%If the contracts for $\onepass$ and $\sampmon$ hold, then
For any $\etree$ with $\degree(poly(|\etree|)) = k$, algorithm \ref{alg:mon-sam} outputs an estimate $\vari{acc}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ such that %$\expct\pbox{\empmean} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$. %within an additive $\error \cdot \abs{\etree}(1,\ldots, 1)$ error with
$\empmean$ has bounds
\[P\left(\left|\vari{acc} - \rpoly(\prob_1,\ldots, \prob_\numvar)\right|> \error \cdot \abs{\etree}(1,\ldots, 1)\right) \leq \conf,\]
in $O\left(\treesize(\etree)\right.$ $+$ $\left.\left(\frac{\log{\frac{1}{\conf}}}{\error^2} \cdot k \cdot\log{k} \cdot depth(\etree)\right)\right)$ time.
\end{Theorem}
Before proving~\Cref{lem:mon-samp}, we use it to argue our main result:
\begin{proof}[Proof of Theorem \ref{lem:approx-alg}]
%\begin{Corollary}\label{cor:adj-err}
Set $\mathcal{E}=\approxq(\etree, (p_1,\dots,p_\numvar), \conf, \error')$, where
\[\error' = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)},\]
which achives the claimed accuracy bound on $\mathcal{E}$.
% achieves $1 \pm \epsilon$ multiplicative error bounds, in $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)(1 - \gamma)^2}\right)$.
%\end{Corollary}
\begin{proof}[Proof of Theorem \ref{lem:mon-samp}]
As previously noted, by lines ~\ref{alg:mon-sam-check} and ~\ref{alg:mon-sam-drop} the algorithm will resample when it encounters a sample with variables from the same block. The probability of sampling such a monomial is $\gamma$.
Now, consider $\expandtree{\etree}$ and let $(\monom, \coef)$ be an arbitrary tuple in $\expandtree{\etree}$. For convenience, over an alphabet $\Sigma$ of size $\numvar$, define
\begin{equation*}
\evalmp: \left(\left\{\monom^a~|~\monom \in \Sigma^b, a \in \mathbb{N}, b \in [k]\right\}, [0, 1]^\numvar\right)\mapsto \mathbb{R},
\end{equation*}
a function that takes a monomial $\monom$ in $\left\{\monom^a ~|~ \monom \in \Sigma^b, a \in \mathbb{N}, b \in [k]\right\}$ and probability vector $\vct{p}$ (introduced in ~\cref{subsec:def-data}) as input and outputs the evaluation of $\monom$ over $\vct{p}$. By ~\cref{lem:sample}, the sampling scheme samples $(\monom, \coef)$ in $\expandtree{\etree}$ with probability $\frac{|\coef|}{\abs{\etree}(1,\ldots, 1)}$. Note that $\coef \cdot \evalmp(\monom, \vct{p})$ is the value of $(\monom, \coef)$ in $\expandtree{\etree}$ when all variables in $\monom$ are assigned their corresponding probabilities.
Let $Vars(\monom) = \{X_{\block, i} \st X_{\block, i} \in \monom\}$. Define the set of elements containing no cross-terms in $\expandtree{\etree}$ as $\expandtree{\etree}' = \{(\monom, \coef) \st \forall (\monom, \coef) \in \expandtree{\etree}, \forall X_{\block, i}, X_{\block', j} \in Vars(\monom), \block \neq \block'\}$.
Note again that the sum of $\coef \cdot \evalmp(\monom, \vct{p})$ over $\expandtree{\etree}'$ is equivalently $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
Consider now a set of $\samplesize$ random variables $\vct{\randvar}$, where each $\randvar_i$ is distributed as described above. Then for random variable $\randvar_i$, it is the case that
$\expct\pbox{\randvar_i} = \sum\limits_{(\monom, \coef) \in \expandtree{\etree}' }\frac{\coef \cdot \evalmp(\monom, p)}{\sum\limits_{(\monom, \coef) \in \expandtree{\etree}'}|\coef|} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)\cdot \frac{1}{1 - \gamma}} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$. Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$. It is also true that
%Since it is the case that we have $\error \cdot \abs{\etree}(1,\ldots, 1)$ additive error, one can set $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$, yielding a multiplicative error proportional to $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This only affects the runtime in the number of samples taken, changing the first factor of the second summand of the original runtime accordingly.
%The derivation over the number of samples is then
The claim on the runtime follows since
\begin{align*}
&\expct\pbox{\empmean} = \expct\pbox{ \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i} = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\expct\pbox{\randvar_i}\nonumber\\
&= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\sum\limits_{(\monom, \coef) \in \expandtree{\etree}'}\frac{\coef \cdot \evalmp(\monom, \vct{p})}{\sum\limits_{(\monom, \coef) \in \expandtree{\etree}'}} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}.
\frac 1{\inparen{\error'}^2}\cdot \log\inparen{\frac 1\conf}=&\frac{\log{\frac{1}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)}{\abs{\etree}(1,\ldots, 1)}\right)^2}\\
= &\frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)},
\end{align*}
%and the runtime then follows, thus upholding ~\cref{lem:approx-alg}.
which completes the proof.
\end{proof}
Hoeffding' inequality can be used to compute an upper bound on the number of samples $\samplesize$ needed to establish the $(\error, \conf)$-bound. The inequality states that if we know that each $\randvar_i$ is strictly bounded by the intervals $[a_i, b_i]$, then it is true that
\qed
We now return to the proof of~\Cref{lem:mon-samp}:
\begin{proof}[Proof of Theorem \ref{lem:mon-samp}]
%As previously noted, by lines ~\ref{alg:mon-sam-check} and ~\ref{alg:mon-sam-drop} the algorithm will resample when it encounters a sample with variables from the same block. The probability of sampling such a monomial is $\gamma$.
%Now, consider $\expandtree{\etree}$ and let $(\monom, \coef)$ be an arbitrary tuple in $\expandtree{\etree}$. For convenience, over an alphabet $\Sigma$ of size $\numvar$, define
%\begin{equation*}
%\evalmp: \left(\left\{\monom^a~|~\monom \in \Sigma^b, a \in \mathbb{N}, b \in [k]\right\}, [0, 1]^\numvar\right)\mapsto \mathbb{R},
%\end{equation*}
%a function that takes a monomial $\monom$ in $\left\{\monom^a ~|~ \monom \in \Sigma^b, a \in \mathbb{N}, b \in [k]\right\}$ and probability vector $\vct{p}$ (introduced in ~\cref{subsec:def-data}) as input and outputs the evaluation of $\monom$ over $\vct{p}$. By ~\cref{lem:sample}, the sampling scheme samples $(\monom, \coef)$ in $\expandtree{\etree}$ with probability $\frac{|\coef|}{\abs{\etree}(1,\ldots, 1)}$. Note that $\coef \cdot \evalmp(\monom, \vct{p})$ is the value of $(\monom, \coef)$ in $\expandtree{\etree}$ when all variables in $\monom$ are assigned their corresponding probabilities.
%Let $Vars(\monom) = \{X_{\block, i} \st X_{\block, i} \in \monom\}$. Define the set of elements containing no cross-terms in $\expandtree{\etree}$ as $\expandtree{\etree}' = \{(\monom, \coef) \st \forall (\monom, \coef) \in \expandtree{\etree}, \forall X_{\block, i}, X_{\block', j} \in Vars(\monom), \block \neq \block'\}$.
%Note again that the sum of $\coef \cdot \evalmp(\monom, \vct{p})$ over $\expandtree{\etree}'$ is equivalently $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
Consider now the random variables $\randvar_1,\dots,\randvar_\numvar$, where each $\randvar_i$ is the value of $\vari{Y}_{\vari{i}}$ after~\Cref{alg:mon-sam-product} is executed. In particular, note that we have
\[Y_i= \onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \prod_{X_i\in \var\inparen{v}} p_i,\]
where the indicator variable handles the check in~\Cref{alg:check-duplicate-block}
Then for random variable $\randvar_i$, it is the case that
\[\expct\pbox{\randvar_i} = \sum\limits_{(\monom, \coef) \in \expandtree{\etree} }\frac{\onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot c\cdot\prod_{X_i\in \var\inparen{v}} p_i }{\abs{\etree}(1,\dots,1)} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)},\]
where in the first equality we use the fact that $\vari{sgn}_{\vari{i}}\cdot \abs{\coef}=\coef$ and the second equality follows from~\cref{eq:tilde-Q-bi}.
% = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}.\]
Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$. It is also true that
\[\expct\pbox{\empmean} %\expct\pbox{ \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i}
= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\expct\pbox{\randvar_i}
%&= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\sum\limits_{(\monom, \coef) \in \expandtree{\etree}'}\frac{\coef \cdot \evalmp(\monom, \vct{p})}{\sum\limits_{(\monom, \coef) \in \expandtree{\etree}'}}
= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)}.\]
Hoeffding's inequality %can be used to compute an upper bound on the number of samples $\samplesize$ needed to establish the $(\error, \conf)$-bound. The inequality
states that if we know that each $\randvar_i$ (which are all independent) always lie in the intervals $[a_i, b_i]$, then it is true that
\begin{equation*}
P\left(\left|\empmean - \expct\pbox{\empmean}\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{\sum_{i = 1}^{\samplesize}(b_i -a_i)^2}\right)}.
\end{equation*}
As implied above, Hoeffding is assuming the sum of random variables be divided by the number of variables. Since $\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma) = \expct\pbox{\empmean} \cdot \abs{\etree}(1,\ldots, 1)$, then our estimate is the sum of random samples multiplied by $\frac{\abs{\etree}(1,\ldots, 1)}{\samplesize \cdot (1 - \gamma)}$. This computation is performed on ~\cref{alg:mon-sam-global3}.
%As implied above, Hoeffding is assuming the sum of random variables be divided by the number of variables. Since $\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma) = \expct\pbox{\empmean} \cdot \abs{\etree}(1,\ldots, 1)$, then our estimate is the sum of random samples multiplied by $\frac{\abs{\etree}(1,\ldots, 1)}{\samplesize \cdot (1 - \gamma)}$. This computation is performed on ~\cref{alg:mon-sam-global3}.
%Also see that to properly estimate $\rpoly$, it is necessary to multiply by the number of monomials in $\rpoly$, i.e. $\abs{\etree}(1,\ldots, 1)$. Therefore it is the case that $\frac{acc}{N}$ gives the estimate of one monomial, and multiplying by $\abs{\etree}(1,\ldots, 1)$ yields the estimate of $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This scaling is performed in line ~\ref{alg:mon-sam-global3}.
Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is multiplied with at most $\degree(\polyf(\abs{\etree}))$ factors from $\vct{p}$ (\cref{alg:mon-sam-product2}) such that each $p_i$ is in $[0, 1]$, the range for each $\randvar_i$ ($\vari{Y}_\vari{i}$ in the pseudo code) is then strictly bounded by $[-1, 1]$. Bounding Hoeffding's results by $\conf$ ensures confidence no less than $1 - \conf$. Then by upper bounding Hoeffding with $\frac{\conf}{2}$ (since we take an additional estimate of $\gamma$), it is the case that
Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is multiplied with $O(k)$ %at most $\degree(\polyf(\abs{\etree}))$ factors from $\vct{p}$ (\cref{alg:mon-sam-product2}) such that each
$p_i\in [0, 1]$, the range for each $\randvar_i$ is $[-1, 1]$. % Bounding Hoeffding's results by $\conf$ ensures confidence no less than $1 - \conf$. Then by upper bounding Hoeffding with $\frac{\conf}{2}$ (since we take an additional estimate of $\gamma$), it is the case that
Using Hoeffding;s inequality, we then get:
\begin{equation*}
P\pbox{~\left| \empmean - \expct\pbox{\empmean} ~\right| \geq \error} \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{2^2 \samplesize}\right)} \leq \frac{\conf}{2}.
P\pbox{~\left| \empmean - \expct\pbox{\empmean} ~\right| \geq \error} \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{2^2 \samplesize}\right)} = 2\exp{\left(-\frac{\samplesize\error^2}{2 }\right)}\leq \conf,
\end{equation*}
Solving for the number of samples $\samplesize$ we get
\begin{align}
&\frac{\conf}{2} \geq 2\exp{-\left(\frac{2\samplesize^2\error^2}{4\samplesize}\right)}\label{eq:hoeff-1}\\
where the last inequality follows from our choice of $\samplesize$.
%Solving for the number of samples $\samplesize$ we get
%\begin{align}
%&\frac{\conf}{2} \geq 2\exp{-\left(\frac{2\samplesize^2\error^2}{4\samplesize}\right)}\label{eq:hoeff-1}\\
%&\frac{\conf}{2} \geq \exp{-\left(\frac{2\samplesize^2\error^2}{4\samplesize}\right)}\label{eq:hoeff-2}\\
%&\frac{2}{\conf} \leq \exp{\left(\frac{2\samplesize^2\error^2}{4\samplesize}\right)}\label{eq:hoeff-3}\\
%&\log{\frac{2}{\conf}} \leq \left(\frac{2\samplesize^2\error^2}{4\samplesize}\right)\label{eq:hoeff-4}\\
%&\log{\frac{2}{\conf}} \leq \frac{\samplesize\error^2}{2}\label{eq:hoeff-5}\\
&\frac{2\log{\frac{4}{\conf}}}{\error^2} \leq \samplesize.\label{eq:hoeff-6}
\end{align}
%&\frac{2\log{\frac{4}{\conf}}}{\error^2} \leq \samplesize.\label{eq:hoeff-6}
%\end{align}
By Hoeffding we obtain the number of samples necessary to achieve the claimed additive error bounds.
%By Hoeffding we obtain the number of samples necessary to achieve the claimed additive error bounds.
This concludes the proof for the first claim of theorem ~\ref{lem:mon-samp}.
\paragraph{Run-time Analysis}
%For a $\bi$ instance, it is possible that cancellations can occur as seen in ~\cref{alg:mon-sam-drop}, and by ~\cref{alg:mon-sam-resamp} the algorithm will then re-sample. This affects the overall runtime. Let us denote by $\gamma$ the number of cancellations.
Note that lines ~\ref{alg:mon-sam-global1}, ~\ref{alg:mon-sam-global2}, and ~\ref{alg:mon-sam-global3} are $O(1)$ global operations. The call to $\onepass$ in line ~\ref{alg:mon-sam-onepass} by lemma ~\ref{lem:one-pass} is $O(\treesize(\etree))$ time.
%Note that lines ~\ref{alg:mon-sam-global1}, ~\ref{alg:mon-sam-global2}, and ~\ref{alg:mon-sam-global3} are $O(1)$ global operations. The call to $\onepass$ in line ~\ref{alg:mon-sam-onepass} by lemma ~\ref{lem:one-pass} is $O(\treesize(\etree))$ time.
%First, algorithm ~\ref{alg:mon-sam} calls \textsc{OnePass} which takes $O(|\etree|)$ time.
Then for $\numsamp = \ceil{\frac{2 \log{\frac{4}{\conf}}}{\error^2}}$, the $O(1)$ assignment, product, and addition operations occur. Over the same $\numsamp$ iterations, $\sampmon$ is called, with a runtime of $O(\log{k}\cdot k \cdot depth(\etree))$ by lemma ~\ref{lem:sample}. Finally, over the same iterations, because $\degree(\polyf(\abs{\etree})) = k$, the assignment and product operations of line ~\ref{alg:mon-sam-product2} are called at most $k$ times.
%Then for $\numsamp = \ceil{\frac{2 \log{\frac{4}{\conf}}}{\error^2}}$, the $O(1)$ assignment, product, and addition operations occur. Over the same $\numsamp$ iterations, $\sampmon$ is called, with a runtime of $O(\log{k}\cdot k \cdot depth(\etree))$ by lemma ~\ref{lem:sample}. Finally, over the same iterations, because $\degree(\polyf(\abs{\etree})) = k$, the assignment and product operations of line ~\ref{alg:mon-sam-product2} are called at most $k$ times.
Thus we have $O(\treesize(\etree)) + O(\left(\frac{\log{\frac{1}{\conf}}}{\error^2}\right) \cdot \left(k + \log{k}\cdot k \cdot depth(\etree)\right) = O\left(\treesize(\etree) + \left(\left(\frac{\log{\frac{1}{\conf}}}{\error^2}\right) \cdot \left(k \cdot\log{k} \cdot depth(\etree)\right)\right)\right)$ overall running time.
%Thus we have $O(\treesize(\etree)) + O(\left(\frac{\log{\frac{1}{\conf}}}{\error^2}\right) \cdot \left(k + \log{k}\cdot k \cdot depth(\etree)\right) = O\left(\treesize(\etree) + \left(\left(\frac{\log{\frac{1}{\conf}}}{\error^2}\right) \cdot \left(k \cdot\log{k} \cdot depth(\etree)\right)\right)\right)$ overall running time.
The runtime of the algorithm is dominated by~\Cref{alg:mon-sam-onepass} (which by~\Cref{lem:one-pass} takes time $O(size(\etree))$) and the $\samplesize$ iterations of the loop in~\Cref{alg:sampling-loop}. Each iteration's run time is dominated by the call to~\Cref{alg:mon-sam-sample} (which by~\Cref{lem:sample} takes $O(\log{k} \cdot k \cdot depth(\etree))$) and~\Cref{alg:check-duplicate-block}, which by the subsequent argument takes $O(k\log{k})$ time. We sort the $O(k)$ variables by their block IDs and then check if there is a duplicate block ID or not. Adding up all the times disucssed here gives us the desired overall runtime.
\end{proof}
\qed
\AH{Why did we drop the $k \cdot \log{k} \cdot depth(\etree)$ factor in what follows below?}
\begin{proof}[Proof of Theorem \ref{lem:approx-alg}]
%\begin{Corollary}\label{cor:adj-err}
Setting $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$ achieves $1 \pm \epsilon$ multiplicative error bounds, in $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)(1 - \gamma)^2}\right)$.
%\end{Corollary}
Since it is the case that we have $\error \cdot \abs{\etree}(1,\ldots, 1)$ additive error, one can set $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$, yielding a multiplicative error proportional to $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This only affects the runtime in the number of samples taken, changing the first factor of the second summand of the original runtime accordingly.
The derivation over the number of samples is then
\begin{align*}
&\frac{2\log{\frac{4}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}\right)^2}\\
= &\frac{2\log{\frac{4}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)^2},
\end{align*}
and the runtime then follows, thus upholding ~\cref{lem:approx-alg}.
\end{proof}
\qed
\subsection{OnePass Algorithm}