Incorporated \gamma into outer approx alg analysis.
This commit is contained in:
parent
bbc47b2923
commit
8211a9bfa0
|
@ -144,10 +144,17 @@ Using the same polynomial from the above example, $poly(\abs{\etree}) = (x + 2y)
|
||||||
Given an expression tree $\etree$ and $\vct{v} \in \mathbb{R}^\numvar$, $\etree(\vct{v}) = poly(\etree)(\vct{v})$.
|
Given an expression tree $\etree$ and $\vct{v} \in \mathbb{R}^\numvar$, $\etree(\vct{v}) = poly(\etree)(\vct{v})$.
|
||||||
\end{Definition}
|
\end{Definition}
|
||||||
|
|
||||||
|
\begin{Definition}[Probability $\gamma$]
|
||||||
|
Define $\gamma$ to be the probability that a monomial with variables from the same block $\block$ is sampled.
|
||||||
|
\end{Definition}
|
||||||
|
|
||||||
|
When a monomial with cross terms from the same block $\block$ is sampled, our algorithm will drop the sample and produce a new sample.
|
||||||
|
|
||||||
|
|
||||||
In the subsequent subsections we lay the groundwork to prove the following theorem.
|
In the subsequent subsections we lay the groundwork to prove the following theorem.
|
||||||
|
|
||||||
\begin{Theorem}\label{lem:approx-alg}
|
\begin{Theorem}\label{lem:approx-alg}
|
||||||
For any query polynomial $\poly(\vct{X})$, an approximation of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right)$, with multiplicative $(\error,\delta)$-bounds, where $k$ denotes the degree of $\poly$.
|
For any query polynomial $\poly(\vct{X})$, an approximation of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma)^2}\right)$, with multiplicative $(\error,\delta)$-bounds, where $k$ denotes the degree of $\poly$.
|
||||||
\end{Theorem}
|
\end{Theorem}
|
||||||
|
|
||||||
\subsection{Approximating $\rpoly$}
|
\subsection{Approximating $\rpoly$}
|
||||||
|
@ -261,7 +268,7 @@ with bound $P\left(\left|\mathcal{X} - \expct\pbox{\mathcal{X}}\right|\geq \erro
|
||||||
|
|
||||||
|
|
||||||
\begin{proof}[Proof of Theorem \ref{lem:mon-samp}]
|
\begin{proof}[Proof of Theorem \ref{lem:mon-samp}]
|
||||||
First, define $\gamma$ to be the probability that a monomial with variables from the same block $\block$ is sampled. When such a monomial is sampled, the algorithm effectively drops the sample and samples another monomial due to the mutual exclusion property of $\bi$. This can be seen in ~\cref{alg:mon-sam-check} and ~\cref{alg:mon-sam-drop} of the code.
|
As previously noted, by lines ~\ref{alg:mon-sam-check} and ~\ref{alg:mon-sam-drop} the algorithm will resample when it encounters a sample with variables from the same block. The probability of sampling such a monomial is $\gamma$.
|
||||||
|
|
||||||
Now, consider $\expandtree{\etree}$ and let $(\monom, \coef)$ be an arbitrary tuple in $\expandtree{\etree}$. For convenience, over an alphabet $\Sigma$ of size $\numvar$, define
|
Now, consider $\expandtree{\etree}$ and let $(\monom, \coef)$ be an arbitrary tuple in $\expandtree{\etree}$. For convenience, over an alphabet $\Sigma$ of size $\numvar$, define
|
||||||
\begin{equation*}
|
\begin{equation*}
|
||||||
|
@ -271,11 +278,11 @@ a function that takes a monomial $\monom$ in $\left\{\monom^a ~|~ \monom \in \Si
|
||||||
|
|
||||||
Consider now a set of $\samplesize$ random variables $\vct{\randvar}$, where each $\randvar_i$ is distributed as described above. Then for random variable $\randvar_i$, it is the case that
|
Consider now a set of $\samplesize$ random variables $\vct{\randvar}$, where each $\randvar_i$ is distributed as described above. Then for random variable $\randvar_i$, it is the case that
|
||||||
|
|
||||||
$\expct\pbox{\randvar_i} = \sum\limits_{(\monom, \coef) \in \expandtree{\etree}}\frac{\coef \cdot \evalmp(\monom, p)}{\sum\limits_{(\monom, \coef) \in \expandtree{\etree}}|\coef|}\cdot \frac{1}{1 - \gamma} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)} \cdot \frac{1}{1 - \gamma}$. Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$. It is also true that
|
$\expct\pbox{\randvar_i} = \sum\limits_{\substack{(\monom, \coef) \in \expandtree{\etree} \st\\ \forall X_{b_i, j}, X_{b_k, \ell}\\ \in Vars(v),\\ \block_i \neq \block_k}}\frac{\coef \cdot \evalmp(\monom, p)}{\sum\limits_{\substack{(\monom, \coef) \in \expandtree{\etree}\st\\ \forall X_{b_i, j}, X_{b_k, \ell}\\ \in Vars(v),\\ \block_i \neq \block_k}}|\coef|} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)\cdot \frac{1}{1 - \gamma}} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$. Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$. It is also true that
|
||||||
|
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
&\expct\pbox{\empmean} = \expct\pbox{ \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i} = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\expct\pbox{\randvar_i}\nonumber\\
|
&\expct\pbox{\empmean} = \expct\pbox{ \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i} = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\expct\pbox{\randvar_i}\nonumber\\
|
||||||
&= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\sum\limits_{(\monom, \coef) \in \expandtree{\etree}}\frac{\coef \cdot \evalmp(\monom, \vct{p})}{\sum\limits_{(\monom, \coef) \in \expandtree{\etree}}|\coef|}\cdot \frac{1}{1 - \gamma} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)}\cdot \frac{1}{1 - \gamma}.
|
&= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\sum\limits_{\substack{(\monom, \coef) \in \expandtree{\etree} \st\\ \forall X_{b_i, j}, X_{b_k, \ell}\\ \in Vars(v),\\ \block_i \neq \block_k}}\frac{\coef \cdot \evalmp(\monom, \vct{p})}{\sum\limits_{\substack{(\monom, \coef) \in \expandtree{\etree} \st\\ \forall X_{b_i, j}, X_{b_k, \ell}\\ \in Vars(v),\\ \block_i \neq \block_k}}|\coef|} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}.
|
||||||
\end{align*}
|
\end{align*}
|
||||||
|
|
||||||
Hoeffding' inequality can be used to compute an upper bound on the number of samples $\samplesize$ needed to establish the $(\error, \conf)$-bound. The inequality states that if we know that each $\randvar_i$ is strictly bounded by the intervals $[a_i, b_i]$, then it is true that
|
Hoeffding' inequality can be used to compute an upper bound on the number of samples $\samplesize$ needed to establish the $(\error, \conf)$-bound. The inequality states that if we know that each $\randvar_i$ is strictly bounded by the intervals $[a_i, b_i]$, then it is true that
|
||||||
|
@ -283,7 +290,7 @@ Hoeffding' inequality can be used to compute an upper bound on the number of sam
|
||||||
P\left(\left|\empmean - \expct\pbox{\empmean}\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{\sum_{i = 1}^{\samplesize}(b_i -a_i)^2}\right)}.
|
P\left(\left|\empmean - \expct\pbox{\empmean}\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{\sum_{i = 1}^{\samplesize}(b_i -a_i)^2}\right)}.
|
||||||
\end{equation*}
|
\end{equation*}
|
||||||
|
|
||||||
As implied above, Hoeffding is assuming the sum of random variables be divided by the number of variables. Since $\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot\frac{1}{1 - \gamma} = \expct\pbox{\empmean} \cdot \abs{\etree}(1,\ldots, 1)$, then our estimate is the sum of random samples multiplied by $\frac{\abs{\etree}(1,\ldots, 1)}{\samplesize}$. This computation is performed on ~\cref{alg:mon-sam-global3}.
|
As implied above, Hoeffding is assuming the sum of random variables be divided by the number of variables. Since $\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma) = \expct\pbox{\empmean} \cdot \abs{\etree}(1,\ldots, 1)$, then our estimate is the sum of random samples multiplied by $\frac{\abs{\etree}(1,\ldots, 1)}{\samplesize}$. This computation is performed on ~\cref{alg:mon-sam-global3}.
|
||||||
%Also see that to properly estimate $\rpoly$, it is necessary to multiply by the number of monomials in $\rpoly$, i.e. $\abs{\etree}(1,\ldots, 1)$. Therefore it is the case that $\frac{acc}{N}$ gives the estimate of one monomial, and multiplying by $\abs{\etree}(1,\ldots, 1)$ yields the estimate of $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This scaling is performed in line ~\ref{alg:mon-sam-global3}.
|
%Also see that to properly estimate $\rpoly$, it is necessary to multiply by the number of monomials in $\rpoly$, i.e. $\abs{\etree}(1,\ldots, 1)$. Therefore it is the case that $\frac{acc}{N}$ gives the estimate of one monomial, and multiplying by $\abs{\etree}(1,\ldots, 1)$ yields the estimate of $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This scaling is performed in line ~\ref{alg:mon-sam-global3}.
|
||||||
|
|
||||||
Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is mulitplied with at most $\degree(\polyf(\abs{\etree}))$ factors from $\vct{p}$ (\cref{alg:mon-sam-product2}) such that each $p_i$ is in $[0, 1]$, the range for each $\randvar_i$ ($\vari{Y}_\vari{i}$ in the psuedo code) is then strictly bounded by $[-1, 1]$. Bounding Hoeffding's results by $\conf$ ensures confidence no less than $1 - \conf$. Then by upperbounding Hoeffding with $\conf$, it is the case that
|
Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is mulitplied with at most $\degree(\polyf(\abs{\etree}))$ factors from $\vct{p}$ (\cref{alg:mon-sam-product2}) such that each $p_i$ is in $[0, 1]$, the range for each $\randvar_i$ ($\vari{Y}_\vari{i}$ in the psuedo code) is then strictly bounded by $[-1, 1]$. Bounding Hoeffding's results by $\conf$ ensures confidence no less than $1 - \conf$. Then by upperbounding Hoeffding with $\conf$, it is the case that
|
||||||
|
@ -321,15 +328,15 @@ Thus we have $O(\treesize(\etree)) + O(\left(\frac{\log{\frac{1}{\conf}}}{\error
|
||||||
|
|
||||||
\begin{proof}[Proof of Theorem \ref{lem:approx-alg}]
|
\begin{proof}[Proof of Theorem \ref{lem:approx-alg}]
|
||||||
%\begin{Corollary}\label{cor:adj-err}
|
%\begin{Corollary}\label{cor:adj-err}
|
||||||
Setting $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)\cdot (1 - \gamma)}$ achieves $1 \pm \epsilon$ multiplicative error bounds, in $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)(1 - \gamma)}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right)$.
|
Setting $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$ achieves $1 \pm \epsilon$ multiplicative error bounds, in $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)(1 - \gamma)^2}\right)$.
|
||||||
%\end{Corollary}
|
%\end{Corollary}
|
||||||
|
|
||||||
Since it is the case that we have $\error \cdot \abs{\etree}(1,\ldots, 1)$ additive error, one can set $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)\cdot (1 - \gamma)}$, yielding a multiplicative error proportional to $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This only affects the runtime in the number of samples taken, changing the first factor of the second summand of the original runtime accordingly.
|
Since it is the case that we have $\error \cdot \abs{\etree}(1,\ldots, 1)$ additive error, one can set $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$, yielding a multiplicative error proportional to $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This only affects the runtime in the number of samples taken, changing the first factor of the second summand of the original runtime accordingly.
|
||||||
|
|
||||||
The derivation over the number of samples is then
|
The derivation over the number of samples is then
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
&\frac{2\log{\frac{2}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)}{\abs{\etree}(1,\ldots, 1)\cdot (1 - \gamma)}\right)^2}\\
|
&\frac{2\log{\frac{2}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}\right)^2}\\
|
||||||
= &\frac{2\log{\frac{2}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)\cdot (1 - \gamma)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)},
|
= &\frac{2\log{\frac{2}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)^2},
|
||||||
\end{align*}
|
\end{align*}
|
||||||
and the runtime then follows, thus upholding ~\cref{lem:approx-alg}.
|
and the runtime then follows, thus upholding ~\cref{lem:approx-alg}.
|
||||||
\end{proof}
|
\end{proof}
|
||||||
|
|
Loading…
Reference in a new issue