Incorporated \gamma into outer approx alg analysis.

This commit is contained in:
Aaron Huber 2020-12-11 10:15:35 -05:00
parent bbc47b2923
commit 8211a9bfa0

View file

@ -144,10 +144,17 @@ Using the same polynomial from the above example, $poly(\abs{\etree}) = (x + 2y)
Given an expression tree $\etree$ and $\vct{v} \in \mathbb{R}^\numvar$, $\etree(\vct{v}) = poly(\etree)(\vct{v})$.
\end{Definition}
\begin{Definition}[Probability $\gamma$]
Define $\gamma$ to be the probability that a monomial with variables from the same block $\block$ is sampled.
\end{Definition}
When a monomial with cross terms from the same block $\block$ is sampled, our algorithm will drop the sample and produce a new sample.
In the subsequent subsections we lay the groundwork to prove the following theorem.
\begin{Theorem}\label{lem:approx-alg}
For any query polynomial $\poly(\vct{X})$, an approximation of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right)$, with multiplicative $(\error,\delta)$-bounds, where $k$ denotes the degree of $\poly$.
For any query polynomial $\poly(\vct{X})$, an approximation of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma)^2}\right)$, with multiplicative $(\error,\delta)$-bounds, where $k$ denotes the degree of $\poly$.
\end{Theorem}
\subsection{Approximating $\rpoly$}
@ -261,7 +268,7 @@ with bound $P\left(\left|\mathcal{X} - \expct\pbox{\mathcal{X}}\right|\geq \erro
\begin{proof}[Proof of Theorem \ref{lem:mon-samp}]
First, define $\gamma$ to be the probability that a monomial with variables from the same block $\block$ is sampled. When such a monomial is sampled, the algorithm effectively drops the sample and samples another monomial due to the mutual exclusion property of $\bi$. This can be seen in ~\cref{alg:mon-sam-check} and ~\cref{alg:mon-sam-drop} of the code.
As previously noted, by lines ~\ref{alg:mon-sam-check} and ~\ref{alg:mon-sam-drop} the algorithm will resample when it encounters a sample with variables from the same block. The probability of sampling such a monomial is $\gamma$.
Now, consider $\expandtree{\etree}$ and let $(\monom, \coef)$ be an arbitrary tuple in $\expandtree{\etree}$. For convenience, over an alphabet $\Sigma$ of size $\numvar$, define
\begin{equation*}
@ -271,11 +278,11 @@ a function that takes a monomial $\monom$ in $\left\{\monom^a ~|~ \monom \in \Si
Consider now a set of $\samplesize$ random variables $\vct{\randvar}$, where each $\randvar_i$ is distributed as described above. Then for random variable $\randvar_i$, it is the case that
$\expct\pbox{\randvar_i} = \sum\limits_{(\monom, \coef) \in \expandtree{\etree}}\frac{\coef \cdot \evalmp(\monom, p)}{\sum\limits_{(\monom, \coef) \in \expandtree{\etree}}|\coef|}\cdot \frac{1}{1 - \gamma} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)} \cdot \frac{1}{1 - \gamma}$. Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$. It is also true that
$\expct\pbox{\randvar_i} = \sum\limits_{\substack{(\monom, \coef) \in \expandtree{\etree} \st\\ \forall X_{b_i, j}, X_{b_k, \ell}\\ \in Vars(v),\\ \block_i \neq \block_k}}\frac{\coef \cdot \evalmp(\monom, p)}{\sum\limits_{\substack{(\monom, \coef) \in \expandtree{\etree}\st\\ \forall X_{b_i, j}, X_{b_k, \ell}\\ \in Vars(v),\\ \block_i \neq \block_k}}|\coef|} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)\cdot \frac{1}{1 - \gamma}} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$. Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$. It is also true that
\begin{align*}
&\expct\pbox{\empmean} = \expct\pbox{ \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i} = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\expct\pbox{\randvar_i}\nonumber\\
&= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\sum\limits_{(\monom, \coef) \in \expandtree{\etree}}\frac{\coef \cdot \evalmp(\monom, \vct{p})}{\sum\limits_{(\monom, \coef) \in \expandtree{\etree}}|\coef|}\cdot \frac{1}{1 - \gamma} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)}\cdot \frac{1}{1 - \gamma}.
&= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\sum\limits_{\substack{(\monom, \coef) \in \expandtree{\etree} \st\\ \forall X_{b_i, j}, X_{b_k, \ell}\\ \in Vars(v),\\ \block_i \neq \block_k}}\frac{\coef \cdot \evalmp(\monom, \vct{p})}{\sum\limits_{\substack{(\monom, \coef) \in \expandtree{\etree} \st\\ \forall X_{b_i, j}, X_{b_k, \ell}\\ \in Vars(v),\\ \block_i \neq \block_k}}|\coef|} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}.
\end{align*}
Hoeffding' inequality can be used to compute an upper bound on the number of samples $\samplesize$ needed to establish the $(\error, \conf)$-bound. The inequality states that if we know that each $\randvar_i$ is strictly bounded by the intervals $[a_i, b_i]$, then it is true that
@ -283,7 +290,7 @@ Hoeffding' inequality can be used to compute an upper bound on the number of sam
P\left(\left|\empmean - \expct\pbox{\empmean}\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{\sum_{i = 1}^{\samplesize}(b_i -a_i)^2}\right)}.
\end{equation*}
As implied above, Hoeffding is assuming the sum of random variables be divided by the number of variables. Since $\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot\frac{1}{1 - \gamma} = \expct\pbox{\empmean} \cdot \abs{\etree}(1,\ldots, 1)$, then our estimate is the sum of random samples multiplied by $\frac{\abs{\etree}(1,\ldots, 1)}{\samplesize}$. This computation is performed on ~\cref{alg:mon-sam-global3}.
As implied above, Hoeffding is assuming the sum of random variables be divided by the number of variables. Since $\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma) = \expct\pbox{\empmean} \cdot \abs{\etree}(1,\ldots, 1)$, then our estimate is the sum of random samples multiplied by $\frac{\abs{\etree}(1,\ldots, 1)}{\samplesize}$. This computation is performed on ~\cref{alg:mon-sam-global3}.
%Also see that to properly estimate $\rpoly$, it is necessary to multiply by the number of monomials in $\rpoly$, i.e. $\abs{\etree}(1,\ldots, 1)$. Therefore it is the case that $\frac{acc}{N}$ gives the estimate of one monomial, and multiplying by $\abs{\etree}(1,\ldots, 1)$ yields the estimate of $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This scaling is performed in line ~\ref{alg:mon-sam-global3}.
Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is mulitplied with at most $\degree(\polyf(\abs{\etree}))$ factors from $\vct{p}$ (\cref{alg:mon-sam-product2}) such that each $p_i$ is in $[0, 1]$, the range for each $\randvar_i$ ($\vari{Y}_\vari{i}$ in the psuedo code) is then strictly bounded by $[-1, 1]$. Bounding Hoeffding's results by $\conf$ ensures confidence no less than $1 - \conf$. Then by upperbounding Hoeffding with $\conf$, it is the case that
@ -321,15 +328,15 @@ Thus we have $O(\treesize(\etree)) + O(\left(\frac{\log{\frac{1}{\conf}}}{\error
\begin{proof}[Proof of Theorem \ref{lem:approx-alg}]
%\begin{Corollary}\label{cor:adj-err}
Setting $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)\cdot (1 - \gamma)}$ achieves $1 \pm \epsilon$ multiplicative error bounds, in $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)(1 - \gamma)}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right)$.
Setting $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$ achieves $1 \pm \epsilon$ multiplicative error bounds, in $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)(1 - \gamma)^2}\right)$.
%\end{Corollary}
Since it is the case that we have $\error \cdot \abs{\etree}(1,\ldots, 1)$ additive error, one can set $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)\cdot (1 - \gamma)}$, yielding a multiplicative error proportional to $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This only affects the runtime in the number of samples taken, changing the first factor of the second summand of the original runtime accordingly.
Since it is the case that we have $\error \cdot \abs{\etree}(1,\ldots, 1)$ additive error, one can set $\error = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}$, yielding a multiplicative error proportional to $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This only affects the runtime in the number of samples taken, changing the first factor of the second summand of the original runtime accordingly.
The derivation over the number of samples is then
\begin{align*}
&\frac{2\log{\frac{2}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)}{\abs{\etree}(1,\ldots, 1)\cdot (1 - \gamma)}\right)^2}\\
= &\frac{2\log{\frac{2}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)\cdot (1 - \gamma)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)},
&\frac{2\log{\frac{2}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)}\right)^2}\\
= &\frac{2\log{\frac{2}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)^2},
\end{align*}
and the runtime then follows, thus upholding ~\cref{lem:approx-alg}.
\end{proof}