Finished revising Algorithm Section with circuit representation.

2021-01-29 09:52:04 -05:00 · 2021-01-29 09:52:04 -05:00 · de418689ee
parent 1739852d8e
commit de418689ee
2 changed files with 25 additions and 25 deletions
--- a/approx_alg.tex
+++ b/approx_alg.tex
@ -35,8 +35,8 @@ Denote \revision{$\polyf(\circuit)$}~ to be the function from circuit \revision{

 \begin{equation*}
 	\polyf(\revision{\circuit}) = \begin{cases}
-					\polyf(\revision{\circuit_\lchild}) + \polyf(\revision{\circuit_\rchild})			&\text{ if \revision{\circuit}.\type } = \revision{OR-gate}\\
-					\polyf(\revision{\circuit_\lchild}) \cdot \polyf(\revision{\circuit_\rchild})		&\text{ if \revision{\circuit}.\type } = \revision{AND-gate}\\
+					\polyf(\revision{\circuit_\lchild}) + \polyf(\revision{\circuit_\rchild})			&\text{ if \revision{\circuit}.\type } = \revision{\circplus}\\
+					\polyf(\revision{\circuit_\lchild}) \cdot \polyf(\revision{\circuit_\rchild})		&\text{ if \revision{\circuit}.\type } = \revision{\circmult}\\
 					\revision{\circuit.\val}									&\text{ if \revision{\circuit}.\type } = \var \text{ OR } \tnum.
 				\end{cases}
 \end{equation*}
@ -125,7 +125,7 @@ Using the same factorization from ~\Cref{example:expr-tree-T}, $\polyf(\abs{\cir


 \begin{Definition}[Evaluation]\label{def:exp-poly-eval}
-Given an expression tree $\circuit$ and $\vct{v} \in \mathbb{R}^\numvar$, we define the evaluation of $\circuit$ on $\vct{v}$ as $\circuit(\vct{v}) = \polyf(\circuit)(\vct{v})$.
+Given an expression tree $\circuit$ and a valuation $\vct{a} \in \mathbb{R}^\numvar$, we define the evaluation of $\circuit$ on $\vct{a}$ as $\circuit(\vct{a}) = \polyf(\circuit)(\vct{a})$.
 \end{Definition}

 }
@ -136,11 +136,11 @@ Given an expression tree $\circuit$ and $\vct{v} \in \mathbb{R}^\numvar$, we def
 In the subsequent subsections we will prove the following theorem.

 \begin{Theorem}\label{lem:approx-alg}
-Let $\circuit$ be an expression tree for a UCQ over \bi and define $\poly(\vct{X})=\polyf(\circuit)$ and let $k=\degree(\poly)$.
+Let \revision{\circuit be a circuit} for a UCQ over \bi and define $\poly(\vct{X})=\polyf(\circuit)$ and let $k=\degree(\poly)$.
 %Let $\poly(\vct{X})$ be a query polynomial corresponding to the output of a UCQ in a \bi.
 Then an estimate $\mathcal{E}$ %=\approxq(\circuit, P_1,\dots,p_\numvar), \conf, \error')$
 of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in time
-\[O\left(\treesize(\circuit) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\circuit}^2(1,\ldots, 1)\cdot  k\cdot \log{k} \cdot depth(\circuit))}{\inparen{\error'}^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right)\]
+\[O\left(\revision{\treesize(\circuit)^2} + \frac{\log{\frac{1}{\conf}}\cdot \abs{\circuit}^2(1,\ldots, 1)\cdot  k\cdot \log{k} \cdot depth(\circuit))}{\inparen{\error'}^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right)\]
 such that
 \begin{equation}
 \label{eq:approx-algo-bound}
@ -151,7 +151,7 @@ such that

 \noindent The proof of~\Cref{lem:approx-alg} can be found in~\Cref{sec:proofs-approx-alg}.

-To get linear runtime results from~\Cref{lem:approx-alg}, we will need to define another parameter modeling the (weighted) number of monomials in $\expansion{\circuit}$ to be `canceled' when it is modded with $\mathcal{B}$:
+To get linear runtime results from~\Cref{lem:approx-alg}, we will need to define another parameter modeling the (weighted) number of monomials in $\expansion{\circuit}$ to be `canceled' when it is modded with $\mathcal{B}$ (~\cref{def:mod-set-polys}):
 \begin{Definition}[Parameter $\gamma$]\label{def:param-gamma}
 Given an expression tree $\circuit$, define
 \[\gamma(\circuit)=\frac{\sum_{(\monom, \coef)\in \expansion{\circuit}} \abs{\coef}\cdot \indicator{\monom\mod{\mathcal{B}}\equiv 0}}{\abs{\circuit}(1,\ldots, 1)}\]
@ -164,8 +164,8 @@ Given an expression tree $\circuit$, define
 \begin{Corollary}
 \label{cor:approx-algo-const-p}
 Let $\poly(\vct{X})$ be as in~\Cref{lem:approx-alg} and let $\gamma=\gamma(\circuit)$. Further let it be the case that $\prob_i\ge \prob_0$ for all $i\in[\numvar]$. Then an estimate $\mathcal{E}$  of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ satisfying~\Cref{eq:approx-algo-bound} can be computed in time
-\[O\left(\treesize(\circuit) + \frac{\log{\frac{1}{\conf}}\cdot k\cdot \log{k} \cdot depth(\circuit))}{\inparen{\error'}^2\cdot(1-\gamma)^2\cdot \prob_0^{2k}}\right)\]
-In particular, if $\prob_0>0$ and $\gamma<1$ are absolute constants then the above runtime simplifies to $O_k\left(\frac 1{\inparen{\error'}^2}\cdot\treesize(\circuit)\cdot \log{\frac{1}{\conf}}\right)$.
+\[O\left(\revision{\treesize(\circuit)^2} + \frac{\log{\frac{1}{\conf}}\cdot k\cdot \log{k} \cdot depth(\circuit))}{\inparen{\error'}^2\cdot(1-\gamma)^2\cdot \prob_0^{2k}}\right)\]
+In particular, if $\prob_0>0$ and $\gamma<1$ are absolute constants then the above runtime simplifies to $O_k\left(\revision{\treesize(\circuit)^2} + \frac 1{\inparen{\error'}^2}\cdot\treesize(\circuit)\cdot \log{\frac{1}{\conf}}\right)$.
 \end{Corollary}

 The proof for~\Cref{cor:approx-algo-const-p} can be seen in~\Cref{sec:proofs-approx-alg}.
@ -180,7 +180,7 @@ Thus, we expect the corollary to hold in general.

 \subsection{Approximating $\rpoly$}

-The algorithm to prove~\Cref{lem:approx-alg} follows from the following observation. Given a query polynomial $\poly(\vct{X})=\polyf(\circuit)$ for expression tree $\circuit$ over $\bi$, we can exactly represent $\rpoly(\vct{X})$ as follows:
+The algorithm to prove~\Cref{lem:approx-alg} follows from the following observation. Given a query polynomial $\poly(\vct{X})=\polyf(\circuit)$ for \revision{circuit \circuit} over $\bi$, we can exactly represent $\rpoly(\vct{X})$ as follows:
 \begin{equation}
 \label{eq:tilde-Q-bi}
 \rpoly\inparen{X_1,\dots,X_\numvar}=\hspace*{-1mm}\sum_{(\monom,\coef)\in \expansion{\circuit}} \hspace*{-2mm} \indicator{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \coef\cdot\hspace*{-2mm}\prod_{X_i\in \var\inparen{\monom}}\hspace*{-2mm} X_i
@ -240,7 +240,7 @@ The number of samples is computed by (see \Cref{app:subsec-th-mon-samp}):
 	\caption{$\approxq(\circuit, \vct{p}, \conf, \error)$}
 	\label{alg:mon-sam}
 	\begin{algorithmic}[1]
-		\Require \circuit: Binary Expression Tree
+		\Require \revision{\circuit: Circuit}
 		\Require $\vct{p} = (\prob_1,\ldots, \prob_\numvar)$ $\in [0, 1]^N$
 		\Require $\conf$ $\in [0, 1]$
 		\Require $\error$ $\in [0, 1]$
@ -250,7 +250,7 @@ The number of samples is computed by (see \Cref{app:subsec-th-mon-samp}):
 		%\State $\vari{sample}_\vari{next} \gets 0$
 		\State $\accum \gets 0$\label{alg:mon-sam-global1}
 		\State $\numsamp \gets \ceil{\frac{2 \log{\frac{2}{\conf}}}{\error^2}}$\label{alg:mon-sam-global2}
-		\State $(\vari{\circuit}_\vari{mod}, \vari{size}) \gets $ \onepass($\circuit$)\label{alg:mon-sam-onepass}\Comment{$\onepass$ is ~\Cref{alg:one-pass}}
+		\State $(\circuit_\vari{mod}, \vari{size}) \gets $ \onepass($\circuit$)\label{alg:mon-sam-onepass}\Comment{$\onepass$ is ~\Cref{alg:one-pass}}
 		%\newline
 		%\State $\vari{i} \gets 1$
 		\For{$\vari{i} \in 1 \text{ to }\numsamp$}\label{alg:sampling-loop}\Comment{Perform the required number of samples}
@ -331,9 +331,9 @@ we first state the lemmas that summarize the relevant properties of $\onepass$ a


 \begin{Lemma}\label{lem:one-pass}
-The $\onepass$ function completes in $O(size(\circuit))$ time.  $\onepass$ guarantees two post conditions:  First, for each subtree $\vari{S}$ of $\circuit$, we have that $\vari{S}.\vari{partial}$ is set to $\abs{\vari{S}}(1,\ldots, 1)$.  Second, when $\vari{S}.\type  = +$, each $\vari{child}$ of $\vari{S}$, $\vari{child}.\vari{weight}$ is set to $\frac{\abs{\vari{S}_{\vari{child}}}(1,\ldots, 1)}{\abs{\vari{S}}(1,\ldots, 1)}$. % is correctly computed for each child of $\vari{S}.$
+The $\onepass$ function completes in $O(size(\circuit))$ time.  $\onepass$ guarantees two post conditions:  First, for each subcircuit $\vari{S}$ of $\circuit$, we have that $\vari{S}.\vari{partial}$ is set to $\abs{\vari{S}}(1,\ldots, 1)$.  Second, when $\vari{S}.\type  = \circplus$, for each $\vari{child}$ of $\vari{S}$, $\vari{child}.\vari{weight}$ is set to $\frac{\abs{\vari{S}_{\vari{child}}}(1,\ldots, 1)}{\abs{\vari{S}}(1,\ldots, 1)}$. % is correctly computed for each child of $\vari{S}.$
 \end{Lemma}
-To prove correctness of~\Cref{alg:mon-sam}, we only use the following fact that follows from the above lemma: $\circuit_{\vari{mod}}.\vari{partial}=\abs{\circuit}(1,\dots,1)$.
+To prove correctness of~\Cref{alg:mon-sam}, we only use the following fact that follows from the above lemma: for the modified circuit, $\circuit_{\vari{mod}}$, $\circuit_{\vari{mod}}.\vari{partial}=\abs{\circuit}(1,\dots,1)$.
 %\AH{I'm wondering if there is a better notation to use here.  I myself got confused by my own notation of $\circuit_{\vari{mod}}$.  \emph{But}, we need to to be referencing the modified $\circuit$ returned by $\onepass$ in the algorithm, so maybe this is the best we can do?}
 %\AR{yeah, I think this is fine.}
 %At the conclusion of $\onepass$, $\circuit.\vari{partial}$ will hold the sum of all coefficients in $\expansion{\abs{\circuit}}$, i.e., $\sum\limits_{(\monom, \coef) \in \expansion{\abs{\circuit}}}\coef$.  $\circuit.\vari{weight}$ will hold the weighted probability that $\circuit$ is sampled from from its parent $+$ node.
--- a/hardness-app.tex
+++ b/hardness-app.tex
@ -381,14 +381,14 @@ The number of triangles in $\graph{\ell}$ for $\ell \geq 2$ will always be $0$ f
 Before proving~\Cref{lem:mon-samp}, we use it to argue our main result,~\Cref{lem:approx-alg}:
 \subsection{Proof of Theorem \ref{lem:approx-alg}}

-Set $\mathcal{E}=\approxq(\etree, (\prob_1,\dots,\prob_\numvar),$ $\conf, \error')$, where
-\[\error' = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\etree}(1,\ldots, 1)},\]
+Set $\mathcal{E}=\approxq(\revision{\circuit}, (\prob_1,\dots,\prob_\numvar),$ $\conf, \error')$, where
+\[\error' = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{\revision{\circuit}}(1,\ldots, 1)},\]
 which achieves the claimed accuracy bound on $\mathcal{E}$.

 The claim on the runtime follows since
 \begin{align*}
-\frac 1{\inparen{\error'}^2}\cdot \log\inparen{\frac 1\conf}=&\frac{\log{\frac{1}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)}{\abs{\etree}(1,\ldots, 1)}\right)^2}\\
-= &\frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)},
+\frac 1{\inparen{\error'}^2}\cdot \log\inparen{\frac 1\conf}=&\frac{\log{\frac{1}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)}{\abs{\revision{\circuit}}(1,\ldots, 1)}\right)^2}\\
+= &\frac{\log{\frac{1}{\conf}}\cdot \abs{\revision{\circuit}}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)},
 \end{align*}
 %and the runtime then follows, thus upholding ~\cref{lem:approx-alg}.
 which completes the proof.
@ -400,8 +400,8 @@ Consider now the random variables $\randvar_1,\dots,\randvar_\numvar$, where eac
 where the indicator variable handles the check in~\Cref{alg:check-duplicate-block}
 Then for random variable $\randvar_i$, it is the case that
 \begin{align*}
-\expct\pbox{\randvar_i} &= \sum\limits_{(\monom, \coef) \in \expansion{\etree} }\frac{\onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot c\cdot\prod_{X_i\in \var\inparen{v}} p_i }{\abs{\etree}(1,\dots,1)} \\
-&= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)},
+\expct\pbox{\randvar_i} &= \sum\limits_{(\monom, \coef) \in \expansion{\revision{\circuit}} }\frac{\onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot c\cdot\prod_{X_i\in \var\inparen{v}} p_i }{\abs{\revision{\circuit}}(1,\dots,1)} \\
+&= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\revision{\circuit}}(1,\ldots, 1)},
 \end{align*}
 where in the first equality we use the fact that $\vari{sgn}_{\vari{i}}\cdot \abs{\coef}=\coef$ and the second equality follows from~\cref{eq:tilde-Q-bi} with $X_i$ substituted by $\prob_i$.

@ -409,14 +409,14 @@ Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$.  It

 \[\expct\pbox{\empmean}  %\expct\pbox{ \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i}
 = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\expct\pbox{\randvar_i}
-= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)}.\]
+= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\revision{\circuit}}(1,\ldots, 1)}.\]

 Hoeffding's inequality states that if we know that each $\randvar_i$ (which are all independent) always lie in the intervals $[a_i, b_i]$, then it is true that
 \begin{equation*}
 \probOf\left(\left|\empmean - \expct\pbox{\empmean}\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{\sum_{i = 1}^{\samplesize}(b_i -a_i)^2}\right)}.
 \end{equation*}

-Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is multiplied with $O(k)$ $\prob_i\in [0, 1]$, the range for each $\randvar_i$ is $[-1, 1]$.
+Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is multiplied with $O(k)$ $\prob_i\in [0, 1]$, which implies the range for each $\randvar_i$ is $[-1, 1]$.
 Using Hoeffding's inequality, we then get:
 \begin{equation*}
 \probOf\pbox{~\left| \empmean - \expct\pbox{\empmean} ~\right| \geq \error} \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{2^2 \samplesize}\right)} = 2\exp{\left(-\frac{\samplesize\error^2}{2 }\right)}\leq \conf,
@ -426,18 +426,18 @@ where the last inequality follows from our choice of $\samplesize$ in~\Cref{alg:
 This concludes the proof for the first claim of theorem ~\ref{lem:mon-samp}.

 \paragraph{Run-time Analysis}
-The runtime of the algorithm is dominated by~\Cref{alg:mon-sam-onepass} (which by~\Cref{lem:one-pass} takes time $O(size(\etree))$) and the $\samplesize$ iterations of the loop in~\Cref{alg:sampling-loop}. Each iteration's run time is dominated by the call to~\Cref{alg:mon-sam-sample} (which by~\Cref{lem:sample} takes $O(\log{k} \cdot k \cdot depth(\etree))$) and~\Cref{alg:check-duplicate-block}, which by the subsequent argument takes $O(k\log{k})$ time. We sort the $O(k)$ variables by their block IDs and then check if there is a duplicate block ID or not. Adding up all the times discussed here gives us the desired overall runtime.
+The runtime of the algorithm is dominated by~\Cref{alg:mon-sam-onepass} (which by~\Cref{lem:one-pass} takes time $O(size(\revision{\circuit}))$) and the $\samplesize$ iterations of the loop in~\Cref{alg:sampling-loop}. Each iteration's run time is dominated by the call to~\Cref{alg:mon-sam-sample} (which by~\Cref{lem:sample} takes $O(\log{k} \cdot k \cdot depth(\revision{\circuit}))$) and~\Cref{alg:check-duplicate-block}, which by the subsequent argument takes $O(k\log{k})$ time. We sort the $O(k)$ variables by their block IDs and then check if there is a duplicate block ID or not. Adding up all the times discussed here gives us the desired overall runtime.

 \subsection{Proof of~\Cref{cor:approx-algo-const-p}}
 The result follows by first noting that by definition of $\gamma$, we have
 %\AH{Just wondering why you use $\geq$ as opposed to $=$?}
 %\AR{Ah, right-- fixed}
-\[\rpoly(1,\dots,1)= (1-\gamma)\cdot \abs{\etree}(1,\dots,1).\]
+\[\rpoly(1,\dots,1)= (1-\gamma)\cdot \abs{\revision{\circuit}}(1,\dots,1).\]
 Further, since each $\prob_i\ge \prob_0$ and $\poly(\vct{X})$ (and hence $\rpoly(\vct{X})$) has degree at most $k$, we have that
 \[ \rpoly(1,\dots,1) \ge \prob_0^k\cdot \rpoly(1,\dots,1).\]
-The above two inequalities implies $\rpoly(1,\dots,1) \ge \prob_0^k\cdot (1-\gamma)\cdot \abs{\etree}(1,\dots,1)$.
+The above two inequalities implies $\rpoly(1,\dots,1) \ge \prob_0^k\cdot (1-\gamma)\cdot \abs{\revision{\circuit}}(1,\dots,1)$.
 %\AH{This looks really nice!}
-Applying this bound in the runtime bound in~\Cref{lem:approx-alg} gives the first claimed runtime. The final runtime of $O_k\left(\frac 1{\eps^2}\cdot\treesize(\etree)\cdot \log{\frac{1}{\conf}}\right)$ follows by noting that $depth(\etree)\le \treesize(\etree)$ and absorbing all factors that just depend on $k$.
+Applying this bound in the runtime bound in~\Cref{lem:approx-alg} gives the first claimed runtime. The final runtime of $O_k\left(\revision{\treesize(\circuit)^2 +}\frac 1{\eps^2}\cdot\treesize(\revision{\circuit})\cdot \log{\frac{1}{\conf}}\right)$ follows by noting that $depth(\revision{\circuit})\le \treesize(\revision{\circuit})$ and absorbing all factors that just depend on $k$.