Done till correctness of main approx algo.
Still need to make pass on auxialliary algos
This commit is contained in:
parent
fd23ea69f9
commit
cba9d6adc5
127
approx_alg.tex
127
approx_alg.tex
|
@ -258,7 +258,7 @@ Recall that the notation $[x, y]$ denotes the range of values between $x$ and $y
|
||||||
\State $(\vari{\etree}_\vari{mod}, \vari{size}) \gets $ \onepass($\etree$)\label{alg:mon-sam-onepass}\Comment{$\onepass$ is ~\cref{alg:one-pass}}
|
\State $(\vari{\etree}_\vari{mod}, \vari{size}) \gets $ \onepass($\etree$)\label{alg:mon-sam-onepass}\Comment{$\onepass$ is ~\cref{alg:one-pass}}
|
||||||
%\newline
|
%\newline
|
||||||
%\State $\vari{i} \gets 1$
|
%\State $\vari{i} \gets 1$
|
||||||
\For{$\vari{i} \in 1 \text{ to }\numsamp$}\Comment{Perform the required number of samples}
|
\For{$\vari{i} \in 1 \text{ to }\numsamp$}\label{alg:sampling-loop}\Comment{Perform the required number of samples}
|
||||||
%\State $\bivec \gets [0]^{\abs{\block}}$\Comment{$\bivec$ is an array whose size is the number of blocks, used to check for cross-terms}\newline
|
%\State $\bivec \gets [0]^{\abs{\block}}$\Comment{$\bivec$ is an array whose size is the number of blocks, used to check for cross-terms}\newline
|
||||||
\State $(\vari{M}, \vari{sgn}_\vari{i}) \gets $ \sampmon($\etree_\vari{mod}$)\label{alg:mon-sam-sample}\Comment{\sampmon \; is ~\cref{alg:sample}}
|
\State $(\vari{M}, \vari{sgn}_\vari{i}) \gets $ \sampmon($\etree_\vari{mod}$)\label{alg:mon-sam-sample}\Comment{\sampmon \; is ~\cref{alg:sample}}
|
||||||
%\For{$\vari{x}_\vari{\block,i}$ \text{ in } $\vari{M}$}
|
%\For{$\vari{x}_\vari{\block,i}$ \text{ in } $\vari{M}$}
|
||||||
|
@ -280,7 +280,7 @@ Recall that the notation $[x, y]$ denotes the range of values between $x$ and $y
|
||||||
% \If{$\vari{sample}_{\vari{next}} = 1$}\label{alg:mon-sam-drop}
|
% \If{$\vari{sample}_{\vari{next}} = 1$}\label{alg:mon-sam-drop}
|
||||||
% \State $\vari{sample}_{\vari{next}} \gets 0$\label{alg:mon-sam-resamp}
|
% \State $\vari{sample}_{\vari{next}} \gets 0$\label{alg:mon-sam-resamp}
|
||||||
% \Else
|
% \Else
|
||||||
\If{$\vari{M}$ has at most one variable from each block}
|
\If{$\vari{M}$ has at most one variable from each block}\label{alg:check-duplicate-block}
|
||||||
\State $\vari{Y}_\vari{i} \gets \prod_{X_j\in\var\inparen{\vari{M}}}p_j$\label{alg:mon-sam-assign1}%\newline
|
\State $\vari{Y}_\vari{i} \gets \prod_{X_j\in\var\inparen{\vari{M}}}p_j$\label{alg:mon-sam-assign1}%\newline
|
||||||
%\For{$\vari{x}_{\vari{j}}$ \text{ in } $\vari{M}$}%_{\vari{i}}$}
|
%\For{$\vari{x}_{\vari{j}}$ \text{ in } $\vari{M}$}%_{\vari{i}}$}
|
||||||
% \State $\vari{Y}_\vari{i} \gets \vari{Y}_\vari{i} \times \; \vari{\prob}_\vari{j}$\label{alg:mon-sam-product2} \Comment{$\vari{p}_\vari{j}$ is the assignment to $\vari{x}_\vari{j}$ from input $\vct{p}$}
|
% \State $\vari{Y}_\vari{i} \gets \vari{Y}_\vari{i} \times \; \vari{\prob}_\vari{j}$\label{alg:mon-sam-product2} \Comment{$\vari{p}_\vari{j}$ is the assignment to $\vari{x}_\vari{j}$ from input $\vct{p}$}
|
||||||
|
@ -345,91 +345,112 @@ In proving correctness of~\Cref{alg:mon-sam}, we will only use the following fac
|
||||||
The function $\sampmon$ completes in $O(\log{k} \cdot k \cdot depth(\etree))$ time, where $k = \degree(poly(\abs{\etree})$. Upon completion, every $\left(\monom, sign(\coef)\right)\in \expandtree{\abs{\etree}}$ is returned with probability $\frac{|\coef|}{\abs{\etree}(1,\ldots, 1)}$. %, $\sampmon$ returns the sampled term $\left(\monom, sign(\coef)\right)$ from $\expandtree{\abs{\etree}}$.
|
The function $\sampmon$ completes in $O(\log{k} \cdot k \cdot depth(\etree))$ time, where $k = \degree(poly(\abs{\etree})$. Upon completion, every $\left(\monom, sign(\coef)\right)\in \expandtree{\abs{\etree}}$ is returned with probability $\frac{|\coef|}{\abs{\etree}(1,\ldots, 1)}$. %, $\sampmon$ returns the sampled term $\left(\monom, sign(\coef)\right)$ from $\expandtree{\abs{\etree}}$.
|
||||||
\end{Lemma}
|
\end{Lemma}
|
||||||
|
|
||||||
|
Armed with the above two lemmas, we are ready to argue the following result:
|
||||||
\begin{Theorem}\label{lem:mon-samp}
|
\begin{Theorem}\label{lem:mon-samp}
|
||||||
If the contracts for $\onepass$ and $\sampmon$ hold, then for any $\etree$ with $\degree(poly(|\etree|)) = k$, algorithm \ref{alg:mon-sam} outputs an estimate $\empmean$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ such that $\expct\pbox{\empmean} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$. %within an additive $\error \cdot \abs{\etree}(1,\ldots, 1)$ error with
|
%If the contracts for $\onepass$ and $\sampmon$ hold, then
|
||||||
$\empmean$ has bounds $P\left(\left|\empmean - \expct\pbox{\empmean}\right|\geq \error \cdot \frac{\abs{\etree}(1,\ldots, 1)}{1 - \gamma}\right) \leq \conf$, in $O\left(\treesize(\etree)\right.$ $+$ $\left.\left(\frac{\log{\frac{1}{\conf}}}{\error^2} \cdot k \cdot\log{k} \cdot depth(\etree)\right)\right)$ time.
|
For any $\etree$ with $\degree(poly(|\etree|)) = k$, algorithm \ref{alg:mon-sam} outputs an estimate $\vari{acc}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ such that %$\expct\pbox{\empmean} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$. %within an additive $\error \cdot \abs{\etree}(1,\ldots, 1)$ error with
|
||||||
|
$\empmean$ has bounds
|
||||||
|
\[P\left(\left|\vari{acc} - \rpoly(\prob_1,\ldots, \prob_\numvar)\right|> \error \cdot \abs{\etree}(1,\ldots, 1)\right) \leq \conf,\]
|
||||||
|
in $O\left(\treesize(\etree)\right.$ $+$ $\left.\left(\frac{\log{\frac{1}{\conf}}}{\error^2} \cdot k \cdot\log{k} \cdot depth(\etree)\right)\right)$ time.
|
||||||
\end{Theorem}
|
\end{Theorem}
|
||||||
|
|
||||||
|
Before proving~\Cref{lem:mon-samp}, we use it to argue our main result:
|
||||||
|
\begin{proof}[Proof of Theorem \ref{lem:approx-alg}]
|
||||||
|
%\begin{Corollary}\label{cor:adj-err}
|
||||||
|
Set $\mathcal{E}=\approxq(\etree, (p_1,\dots,p_\numvar), \conf, \error')$, where
|
||||||
|
\[\error' = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)},\]
|
||||||
|
which achives the claimed accuracy bound on $\mathcal{E}$.
|
||||||
|
% achieves $1 \pm \epsilon$ multiplicative error bounds, in $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)(1 - \gamma)^2}\right)$.
|
||||||
|
%\end{Corollary}
|
||||||
|
|
||||||
\begin{proof}[Proof of Theorem \ref{lem:mon-samp}]
|
%Since it is the case that we have $\error \cdot \abs{\etree}(1,\ldots, 1)$ additive error, one can set $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$, yielding a multiplicative error proportional to $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This only affects the runtime in the number of samples taken, changing the first factor of the second summand of the original runtime accordingly.
|
||||||
As previously noted, by lines ~\ref{alg:mon-sam-check} and ~\ref{alg:mon-sam-drop} the algorithm will resample when it encounters a sample with variables from the same block. The probability of sampling such a monomial is $\gamma$.
|
|
||||||
|
|
||||||
Now, consider $\expandtree{\etree}$ and let $(\monom, \coef)$ be an arbitrary tuple in $\expandtree{\etree}$. For convenience, over an alphabet $\Sigma$ of size $\numvar$, define
|
|
||||||
\begin{equation*}
|
|
||||||
\evalmp: \left(\left\{\monom^a~|~\monom \in \Sigma^b, a \in \mathbb{N}, b \in [k]\right\}, [0, 1]^\numvar\right)\mapsto \mathbb{R},
|
|
||||||
\end{equation*}
|
|
||||||
a function that takes a monomial $\monom$ in $\left\{\monom^a ~|~ \monom \in \Sigma^b, a \in \mathbb{N}, b \in [k]\right\}$ and probability vector $\vct{p}$ (introduced in ~\cref{subsec:def-data}) as input and outputs the evaluation of $\monom$ over $\vct{p}$. By ~\cref{lem:sample}, the sampling scheme samples $(\monom, \coef)$ in $\expandtree{\etree}$ with probability $\frac{|\coef|}{\abs{\etree}(1,\ldots, 1)}$. Note that $\coef \cdot \evalmp(\monom, \vct{p})$ is the value of $(\monom, \coef)$ in $\expandtree{\etree}$ when all variables in $\monom$ are assigned their corresponding probabilities.
|
|
||||||
|
|
||||||
Let $Vars(\monom) = \{X_{\block, i} \st X_{\block, i} \in \monom\}$. Define the set of elements containing no cross-terms in $\expandtree{\etree}$ as $\expandtree{\etree}' = \{(\monom, \coef) \st \forall (\monom, \coef) \in \expandtree{\etree}, \forall X_{\block, i}, X_{\block', j} \in Vars(\monom), \block \neq \block'\}$.
|
|
||||||
|
|
||||||
Note again that the sum of $\coef \cdot \evalmp(\monom, \vct{p})$ over $\expandtree{\etree}'$ is equivalently $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
|
|
||||||
|
|
||||||
Consider now a set of $\samplesize$ random variables $\vct{\randvar}$, where each $\randvar_i$ is distributed as described above. Then for random variable $\randvar_i$, it is the case that
|
|
||||||
|
|
||||||
$\expct\pbox{\randvar_i} = \sum\limits_{(\monom, \coef) \in \expandtree{\etree}' }\frac{\coef \cdot \evalmp(\monom, p)}{\sum\limits_{(\monom, \coef) \in \expandtree{\etree}'}|\coef|} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)\cdot \frac{1}{1 - \gamma}} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$. Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$. It is also true that
|
|
||||||
|
|
||||||
|
%The derivation over the number of samples is then
|
||||||
|
The claim on the runtime follows since
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
&\expct\pbox{\empmean} = \expct\pbox{ \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i} = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\expct\pbox{\randvar_i}\nonumber\\
|
\frac 1{\inparen{\error'}^2}\cdot \log\inparen{\frac 1\conf}=&\frac{\log{\frac{1}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)}{\abs{\etree}(1,\ldots, 1)}\right)^2}\\
|
||||||
&= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\sum\limits_{(\monom, \coef) \in \expandtree{\etree}'}\frac{\coef \cdot \evalmp(\monom, \vct{p})}{\sum\limits_{(\monom, \coef) \in \expandtree{\etree}'}} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}.
|
= &\frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)},
|
||||||
\end{align*}
|
\end{align*}
|
||||||
|
%and the runtime then follows, thus upholding ~\cref{lem:approx-alg}.
|
||||||
|
which completes the proof.
|
||||||
|
\end{proof}
|
||||||
|
|
||||||
Hoeffding' inequality can be used to compute an upper bound on the number of samples $\samplesize$ needed to establish the $(\error, \conf)$-bound. The inequality states that if we know that each $\randvar_i$ is strictly bounded by the intervals $[a_i, b_i]$, then it is true that
|
\qed
|
||||||
|
|
||||||
|
We now return to the proof of~\Cref{lem:mon-samp}:
|
||||||
|
\begin{proof}[Proof of Theorem \ref{lem:mon-samp}]
|
||||||
|
%As previously noted, by lines ~\ref{alg:mon-sam-check} and ~\ref{alg:mon-sam-drop} the algorithm will resample when it encounters a sample with variables from the same block. The probability of sampling such a monomial is $\gamma$.
|
||||||
|
|
||||||
|
%Now, consider $\expandtree{\etree}$ and let $(\monom, \coef)$ be an arbitrary tuple in $\expandtree{\etree}$. For convenience, over an alphabet $\Sigma$ of size $\numvar$, define
|
||||||
|
%\begin{equation*}
|
||||||
|
%\evalmp: \left(\left\{\monom^a~|~\monom \in \Sigma^b, a \in \mathbb{N}, b \in [k]\right\}, [0, 1]^\numvar\right)\mapsto \mathbb{R},
|
||||||
|
%\end{equation*}
|
||||||
|
%a function that takes a monomial $\monom$ in $\left\{\monom^a ~|~ \monom \in \Sigma^b, a \in \mathbb{N}, b \in [k]\right\}$ and probability vector $\vct{p}$ (introduced in ~\cref{subsec:def-data}) as input and outputs the evaluation of $\monom$ over $\vct{p}$. By ~\cref{lem:sample}, the sampling scheme samples $(\monom, \coef)$ in $\expandtree{\etree}$ with probability $\frac{|\coef|}{\abs{\etree}(1,\ldots, 1)}$. Note that $\coef \cdot \evalmp(\monom, \vct{p})$ is the value of $(\monom, \coef)$ in $\expandtree{\etree}$ when all variables in $\monom$ are assigned their corresponding probabilities.
|
||||||
|
|
||||||
|
%Let $Vars(\monom) = \{X_{\block, i} \st X_{\block, i} \in \monom\}$. Define the set of elements containing no cross-terms in $\expandtree{\etree}$ as $\expandtree{\etree}' = \{(\monom, \coef) \st \forall (\monom, \coef) \in \expandtree{\etree}, \forall X_{\block, i}, X_{\block', j} \in Vars(\monom), \block \neq \block'\}$.
|
||||||
|
|
||||||
|
%Note again that the sum of $\coef \cdot \evalmp(\monom, \vct{p})$ over $\expandtree{\etree}'$ is equivalently $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
|
||||||
|
|
||||||
|
Consider now the random variables $\randvar_1,\dots,\randvar_\numvar$, where each $\randvar_i$ is the value of $\vari{Y}_{\vari{i}}$ after~\Cref{alg:mon-sam-product} is executed. In particular, note that we have
|
||||||
|
\[Y_i= \onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \prod_{X_i\in \var\inparen{v}} p_i,\]
|
||||||
|
where the indicator variable handles the check in~\Cref{alg:check-duplicate-block}
|
||||||
|
Then for random variable $\randvar_i$, it is the case that
|
||||||
|
|
||||||
|
\[\expct\pbox{\randvar_i} = \sum\limits_{(\monom, \coef) \in \expandtree{\etree} }\frac{\onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot c\cdot\prod_{X_i\in \var\inparen{v}} p_i }{\abs{\etree}(1,\dots,1)} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)},\]
|
||||||
|
where in the first equality we use the fact that $\vari{sgn}_{\vari{i}}\cdot \abs{\coef}=\coef$ and the second equality follows from~\cref{eq:tilde-Q-bi}.
|
||||||
|
% = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}.\]
|
||||||
|
|
||||||
|
Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$. It is also true that
|
||||||
|
|
||||||
|
\[\expct\pbox{\empmean} %\expct\pbox{ \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i}
|
||||||
|
= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\expct\pbox{\randvar_i}
|
||||||
|
%&= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\sum\limits_{(\monom, \coef) \in \expandtree{\etree}'}\frac{\coef \cdot \evalmp(\monom, \vct{p})}{\sum\limits_{(\monom, \coef) \in \expandtree{\etree}'}}
|
||||||
|
= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)}.\]
|
||||||
|
|
||||||
|
Hoeffding's inequality %can be used to compute an upper bound on the number of samples $\samplesize$ needed to establish the $(\error, \conf)$-bound. The inequality
|
||||||
|
states that if we know that each $\randvar_i$ (which are all independent) always lie in the intervals $[a_i, b_i]$, then it is true that
|
||||||
\begin{equation*}
|
\begin{equation*}
|
||||||
P\left(\left|\empmean - \expct\pbox{\empmean}\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{\sum_{i = 1}^{\samplesize}(b_i -a_i)^2}\right)}.
|
P\left(\left|\empmean - \expct\pbox{\empmean}\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{\sum_{i = 1}^{\samplesize}(b_i -a_i)^2}\right)}.
|
||||||
\end{equation*}
|
\end{equation*}
|
||||||
|
|
||||||
As implied above, Hoeffding is assuming the sum of random variables be divided by the number of variables. Since $\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma) = \expct\pbox{\empmean} \cdot \abs{\etree}(1,\ldots, 1)$, then our estimate is the sum of random samples multiplied by $\frac{\abs{\etree}(1,\ldots, 1)}{\samplesize \cdot (1 - \gamma)}$. This computation is performed on ~\cref{alg:mon-sam-global3}.
|
%As implied above, Hoeffding is assuming the sum of random variables be divided by the number of variables. Since $\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma) = \expct\pbox{\empmean} \cdot \abs{\etree}(1,\ldots, 1)$, then our estimate is the sum of random samples multiplied by $\frac{\abs{\etree}(1,\ldots, 1)}{\samplesize \cdot (1 - \gamma)}$. This computation is performed on ~\cref{alg:mon-sam-global3}.
|
||||||
%Also see that to properly estimate $\rpoly$, it is necessary to multiply by the number of monomials in $\rpoly$, i.e. $\abs{\etree}(1,\ldots, 1)$. Therefore it is the case that $\frac{acc}{N}$ gives the estimate of one monomial, and multiplying by $\abs{\etree}(1,\ldots, 1)$ yields the estimate of $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This scaling is performed in line ~\ref{alg:mon-sam-global3}.
|
%Also see that to properly estimate $\rpoly$, it is necessary to multiply by the number of monomials in $\rpoly$, i.e. $\abs{\etree}(1,\ldots, 1)$. Therefore it is the case that $\frac{acc}{N}$ gives the estimate of one monomial, and multiplying by $\abs{\etree}(1,\ldots, 1)$ yields the estimate of $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This scaling is performed in line ~\ref{alg:mon-sam-global3}.
|
||||||
|
|
||||||
Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is multiplied with at most $\degree(\polyf(\abs{\etree}))$ factors from $\vct{p}$ (\cref{alg:mon-sam-product2}) such that each $p_i$ is in $[0, 1]$, the range for each $\randvar_i$ ($\vari{Y}_\vari{i}$ in the pseudo code) is then strictly bounded by $[-1, 1]$. Bounding Hoeffding's results by $\conf$ ensures confidence no less than $1 - \conf$. Then by upper bounding Hoeffding with $\frac{\conf}{2}$ (since we take an additional estimate of $\gamma$), it is the case that
|
Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is multiplied with $O(k)$ %at most $\degree(\polyf(\abs{\etree}))$ factors from $\vct{p}$ (\cref{alg:mon-sam-product2}) such that each
|
||||||
|
$p_i\in [0, 1]$, the range for each $\randvar_i$ is $[-1, 1]$. % Bounding Hoeffding's results by $\conf$ ensures confidence no less than $1 - \conf$. Then by upper bounding Hoeffding with $\frac{\conf}{2}$ (since we take an additional estimate of $\gamma$), it is the case that
|
||||||
|
Using Hoeffding;s inequality, we then get:
|
||||||
\begin{equation*}
|
\begin{equation*}
|
||||||
P\pbox{~\left| \empmean - \expct\pbox{\empmean} ~\right| \geq \error} \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{2^2 \samplesize}\right)} \leq \frac{\conf}{2}.
|
P\pbox{~\left| \empmean - \expct\pbox{\empmean} ~\right| \geq \error} \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{2^2 \samplesize}\right)} = 2\exp{\left(-\frac{\samplesize\error^2}{2 }\right)}\leq \conf,
|
||||||
\end{equation*}
|
\end{equation*}
|
||||||
|
where the last inequality follows from our choice of $\samplesize$.
|
||||||
Solving for the number of samples $\samplesize$ we get
|
%Solving for the number of samples $\samplesize$ we get
|
||||||
\begin{align}
|
%\begin{align}
|
||||||
&\frac{\conf}{2} \geq 2\exp{-\left(\frac{2\samplesize^2\error^2}{4\samplesize}\right)}\label{eq:hoeff-1}\\
|
%&\frac{\conf}{2} \geq 2\exp{-\left(\frac{2\samplesize^2\error^2}{4\samplesize}\right)}\label{eq:hoeff-1}\\
|
||||||
%&\frac{\conf}{2} \geq \exp{-\left(\frac{2\samplesize^2\error^2}{4\samplesize}\right)}\label{eq:hoeff-2}\\
|
%&\frac{\conf}{2} \geq \exp{-\left(\frac{2\samplesize^2\error^2}{4\samplesize}\right)}\label{eq:hoeff-2}\\
|
||||||
%&\frac{2}{\conf} \leq \exp{\left(\frac{2\samplesize^2\error^2}{4\samplesize}\right)}\label{eq:hoeff-3}\\
|
%&\frac{2}{\conf} \leq \exp{\left(\frac{2\samplesize^2\error^2}{4\samplesize}\right)}\label{eq:hoeff-3}\\
|
||||||
%&\log{\frac{2}{\conf}} \leq \left(\frac{2\samplesize^2\error^2}{4\samplesize}\right)\label{eq:hoeff-4}\\
|
%&\log{\frac{2}{\conf}} \leq \left(\frac{2\samplesize^2\error^2}{4\samplesize}\right)\label{eq:hoeff-4}\\
|
||||||
%&\log{\frac{2}{\conf}} \leq \frac{\samplesize\error^2}{2}\label{eq:hoeff-5}\\
|
%&\log{\frac{2}{\conf}} \leq \frac{\samplesize\error^2}{2}\label{eq:hoeff-5}\\
|
||||||
&\frac{2\log{\frac{4}{\conf}}}{\error^2} \leq \samplesize.\label{eq:hoeff-6}
|
%&\frac{2\log{\frac{4}{\conf}}}{\error^2} \leq \samplesize.\label{eq:hoeff-6}
|
||||||
\end{align}
|
%\end{align}
|
||||||
|
|
||||||
By Hoeffding we obtain the number of samples necessary to achieve the claimed additive error bounds.
|
%By Hoeffding we obtain the number of samples necessary to achieve the claimed additive error bounds.
|
||||||
|
|
||||||
This concludes the proof for the first claim of theorem ~\ref{lem:mon-samp}.
|
This concludes the proof for the first claim of theorem ~\ref{lem:mon-samp}.
|
||||||
|
|
||||||
\paragraph{Run-time Analysis}
|
\paragraph{Run-time Analysis}
|
||||||
%For a $\bi$ instance, it is possible that cancellations can occur as seen in ~\cref{alg:mon-sam-drop}, and by ~\cref{alg:mon-sam-resamp} the algorithm will then re-sample. This affects the overall runtime. Let us denote by $\gamma$ the number of cancellations.
|
%For a $\bi$ instance, it is possible that cancellations can occur as seen in ~\cref{alg:mon-sam-drop}, and by ~\cref{alg:mon-sam-resamp} the algorithm will then re-sample. This affects the overall runtime. Let us denote by $\gamma$ the number of cancellations.
|
||||||
|
|
||||||
Note that lines ~\ref{alg:mon-sam-global1}, ~\ref{alg:mon-sam-global2}, and ~\ref{alg:mon-sam-global3} are $O(1)$ global operations. The call to $\onepass$ in line ~\ref{alg:mon-sam-onepass} by lemma ~\ref{lem:one-pass} is $O(\treesize(\etree))$ time.
|
%Note that lines ~\ref{alg:mon-sam-global1}, ~\ref{alg:mon-sam-global2}, and ~\ref{alg:mon-sam-global3} are $O(1)$ global operations. The call to $\onepass$ in line ~\ref{alg:mon-sam-onepass} by lemma ~\ref{lem:one-pass} is $O(\treesize(\etree))$ time.
|
||||||
%First, algorithm ~\ref{alg:mon-sam} calls \textsc{OnePass} which takes $O(|\etree|)$ time.
|
%First, algorithm ~\ref{alg:mon-sam} calls \textsc{OnePass} which takes $O(|\etree|)$ time.
|
||||||
Then for $\numsamp = \ceil{\frac{2 \log{\frac{4}{\conf}}}{\error^2}}$, the $O(1)$ assignment, product, and addition operations occur. Over the same $\numsamp$ iterations, $\sampmon$ is called, with a runtime of $O(\log{k}\cdot k \cdot depth(\etree))$ by lemma ~\ref{lem:sample}. Finally, over the same iterations, because $\degree(\polyf(\abs{\etree})) = k$, the assignment and product operations of line ~\ref{alg:mon-sam-product2} are called at most $k$ times.
|
%Then for $\numsamp = \ceil{\frac{2 \log{\frac{4}{\conf}}}{\error^2}}$, the $O(1)$ assignment, product, and addition operations occur. Over the same $\numsamp$ iterations, $\sampmon$ is called, with a runtime of $O(\log{k}\cdot k \cdot depth(\etree))$ by lemma ~\ref{lem:sample}. Finally, over the same iterations, because $\degree(\polyf(\abs{\etree})) = k$, the assignment and product operations of line ~\ref{alg:mon-sam-product2} are called at most $k$ times.
|
||||||
|
|
||||||
Thus we have $O(\treesize(\etree)) + O(\left(\frac{\log{\frac{1}{\conf}}}{\error^2}\right) \cdot \left(k + \log{k}\cdot k \cdot depth(\etree)\right) = O\left(\treesize(\etree) + \left(\left(\frac{\log{\frac{1}{\conf}}}{\error^2}\right) \cdot \left(k \cdot\log{k} \cdot depth(\etree)\right)\right)\right)$ overall running time.
|
%Thus we have $O(\treesize(\etree)) + O(\left(\frac{\log{\frac{1}{\conf}}}{\error^2}\right) \cdot \left(k + \log{k}\cdot k \cdot depth(\etree)\right) = O\left(\treesize(\etree) + \left(\left(\frac{\log{\frac{1}{\conf}}}{\error^2}\right) \cdot \left(k \cdot\log{k} \cdot depth(\etree)\right)\right)\right)$ overall running time.
|
||||||
|
The runtime of the algorithm is dominated by~\Cref{alg:mon-sam-onepass} (which by~\Cref{lem:one-pass} takes time $O(size(\etree))$) and the $\samplesize$ iterations of the loop in~\Cref{alg:sampling-loop}. Each iteration's run time is dominated by the call to~\Cref{alg:mon-sam-sample} (which by~\Cref{lem:sample} takes $O(\log{k} \cdot k \cdot depth(\etree))$) and~\Cref{alg:check-duplicate-block}, which by the subsequent argument takes $O(k\log{k})$ time. We sort the $O(k)$ variables by their block IDs and then check if there is a duplicate block ID or not. Adding up all the times disucssed here gives us the desired overall runtime.
|
||||||
\end{proof}
|
\end{proof}
|
||||||
|
|
||||||
\qed
|
\qed
|
||||||
|
|
||||||
\AH{Why did we drop the $k \cdot \log{k} \cdot depth(\etree)$ factor in what follows below?}
|
|
||||||
|
|
||||||
\begin{proof}[Proof of Theorem \ref{lem:approx-alg}]
|
|
||||||
%\begin{Corollary}\label{cor:adj-err}
|
|
||||||
Setting $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$ achieves $1 \pm \epsilon$ multiplicative error bounds, in $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)(1 - \gamma)^2}\right)$.
|
|
||||||
%\end{Corollary}
|
|
||||||
|
|
||||||
Since it is the case that we have $\error \cdot \abs{\etree}(1,\ldots, 1)$ additive error, one can set $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$, yielding a multiplicative error proportional to $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This only affects the runtime in the number of samples taken, changing the first factor of the second summand of the original runtime accordingly.
|
|
||||||
|
|
||||||
The derivation over the number of samples is then
|
|
||||||
\begin{align*}
|
|
||||||
&\frac{2\log{\frac{4}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}\right)^2}\\
|
|
||||||
= &\frac{2\log{\frac{4}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)^2},
|
|
||||||
\end{align*}
|
|
||||||
and the runtime then follows, thus upholding ~\cref{lem:approx-alg}.
|
|
||||||
\end{proof}
|
|
||||||
|
|
||||||
\qed
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{OnePass Algorithm}
|
\subsection{OnePass Algorithm}
|
||||||
|
|
Loading…
Reference in a new issue