Merge branch 'master' of gitlab.odin.cse.buffalo.edu:ahuber/SketchingWorlds
This commit is contained in:
commit
fd23ea69f9
129
approx_alg.tex
129
approx_alg.tex
|
@ -2,7 +2,7 @@
|
|||
\section{$1 \pm \epsilon$ Approximation Algorithm}
|
||||
\label{sec:algo}
|
||||
|
||||
In~\cref{sec:hard}, we showed that computing the expected multiplicity of a compressed representation of a bag polynomial for TIDB (even just based on project-join queries) is unlikely to be possible in linear time (\cref{thm:mult-p-hard-result}), even if all tuples have the same probability of being present (\cref{cor:single-p-hard}). Given this, in this section we will design an approrixmation algorithm for our that runs in {\em linear time}. Unlike the results in~\cref{sec:hard} our approximation algorithm works for BIDB though our bounds are more meaningful for a non-trivial sublcass of BIDB that includes TIDB as well as PDB benchmarks (\cref{sec:experiments}).
|
||||
In~\cref{sec:hard}, we showed that computing the expected multiplicity of a compressed representation of a bag polynomial for TIDB (even just based on project-join queries) is unlikely to be possible in linear time (\cref{thm:mult-p-hard-result}), even if all tuples have the same probability of being present (\cref{cor:single-p-hard}). Given this, in this section we will design an approximation algorithm for our that runs in {\em linear time}. Unlike the results in~\cref{sec:hard} our approximation algorithm works for BIDB though our bounds are more meaningful for a non-trivial subclass of BIDB that includes TIDB as well as PDB benchmarks (\cref{sec:experiments}).
|
||||
%it is then desirable to have an algorithm to approximate the multiplicity in linear time, which is what we describe next.
|
||||
|
||||
\subsection{Preliminaries and some more notation}
|
||||
|
@ -138,7 +138,7 @@ Consider the factorized representation $(x + 2y)(2x - y)$ of the polynomial in~\
|
|||
|
||||
|
||||
\begin{Definition}[Positive T]\label{def:positive-tree}
|
||||
For any expression tree $\etree$, the correspondign
|
||||
For any expression tree $\etree$, the corresponding
|
||||
{\em positive tree}, denoted $\abs{\etree}$ obtained from $\etree$ as follows. For each leaf node $\ell$ of $\etree$ where $\ell.\type$ is $\tnum$, update $\ell.\vari{value}$ to $|\ell.\vari{value}|$. %value $\coef$ of each coefficient leaf node in $\etree$ is set to %$\coef_i$ in $\etree$ is exchanged with its absolute value$|\coef|$.
|
||||
\end{Definition}
|
||||
|
||||
|
@ -154,19 +154,20 @@ Given an expression tree $\etree$ and $\vct{v} \in \mathbb{R}^\numvar$, $\etree(
|
|||
In the subsequent subsections we will prove the following theorem.
|
||||
|
||||
\begin{Theorem}\label{lem:approx-alg}
|
||||
Let $\etree$ be an expression tree for a UCQ over BIDB and define $\poly(\vct{X})=\polyf(\etree)$ and let $k=\deg(\poly)$
|
||||
Let $\etree$ be an expression tree for a UCQ over BIDB and define $\poly(\vct{X})=\polyf(\etree)$ and let $k=\degree(\poly)$
|
||||
%Let $\poly(\vct{X})$ be a query polynomial corresponding to the output of a UCQ in a BIDB.
|
||||
An estimate $\mathcal{E}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in time
|
||||
\[O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)\cdot k\cdot \log{k} \cdot depth(\etree))}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right),\]
|
||||
An estimate $\mathcal{E}$ %=\approxq(\etree, (p_1,\dots,p_\numvar), \conf, \error')$
|
||||
of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in time
|
||||
\[O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)\cdot k\cdot \log{k} \cdot depth(\etree))}{\inparen{\error'}^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right),\]
|
||||
such that
|
||||
\begin{equation}
|
||||
\label{eq:approx-algo-bound}
|
||||
P\left(\left|\mathcal{E} - \rpoly(\prob_1,\dots,\prob_\numvar)\right|> \error \cdot \rpoly(\prob_1,\dots,\prob_\numvar)\right) \leq \conf.
|
||||
P\left(\left|\mathcal{E} - \rpoly(\prob_1,\dots,\prob_\numvar)\right|> \error' \cdot \rpoly(\prob_1,\dots,\prob_\numvar)\right) \leq \conf.
|
||||
\end{equation}
|
||||
%with multiplicative $(\error,\delta)$-bounds, where $k$ denotes the degree of $\poly$.
|
||||
\end{Theorem}
|
||||
|
||||
It turns out that to get linear runtime resuls from~\cref{lem:approx-alg}, we will need to define another parameter (which roughly counts the (weighted) number of monomials in $\expandtree{\etree}$ that get `canceled' when modded with $\mathcal{B}$):
|
||||
It turns out that to get linear runtime results from~\cref{lem:approx-alg}, we will need to define another parameter (which roughly counts the (weighted) number of monomials in $\expandtree{\etree}$ that get `canceled' when modded with $\mathcal{B}$):
|
||||
\begin{Definition}[Parameter $\gamma$]\label{def:param-gamma}
|
||||
Given an expression tree $\etree$, define
|
||||
\[\gamma(\etree)=\frac{\sum_{(\monom, \coef)\in \expandtree{\etree}} \abs{\coef}\cdot \onesymbol\inparen{\monom\mod{\mathcal{B}}\equiv 0}}{\abs{\etree}(1,\ldots, 1)}\]
|
||||
|
@ -177,10 +178,10 @@ We next present couple of corollaries of~\Cref{lem:approx-alg}.
|
|||
\begin{Corollary}
|
||||
\label{cor:approx-algo-const-p}
|
||||
Let $\poly(\vct{X})$ be as in~\Cref{lem:approx-alg} and let $\gamma=\gamma(\etree)$. Further let it be the case that $p_i\ge p_0$ for all $i\in[\numvar]$. Then an estimate $\mathcal{E}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ satisfying~\cref{eq:approx-algo-bound} can be computed in time
|
||||
\[O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot k\cdot \log{k} \cdot depth(\etree))}{\error^2\cdot(1-\gamma)^2\cdot p_0^{2k}}\right)\]
|
||||
\[O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot k\cdot \log{k} \cdot depth(\etree))}{\inparen{\error'}^2\cdot(1-\gamma)^2\cdot p_0^{2k}}\right)\]
|
||||
In particular, if $p_0>0$ and $\gamma<1$ are absolute constants then the above runtime simplifies to $O_k\left(\frac 1\eps\cdot\treesize(\etree)\cdot \log{\frac{1}{\conf}}\right)$.
|
||||
\end{Corollary}
|
||||
We note that the restiction on $\gamma$ is satisfied by TIDB (where $\gamma=0$) and for some BIDB benchmarks (see~\Cref{sec:experiments} for more on this claim).
|
||||
We note that the restriction on $\gamma$ is satisfied by TIDB (where $\gamma=0$) and for some BIDB benchmarks (see~\Cref{sec:experiments} for more on this claim).
|
||||
\AR{{\bf Boris/Oliver:} Is there a way to claim that all probabilities in practice are actually constants: i.e. they do not increase with the number of tuples?}
|
||||
|
||||
\begin{proof}[Proof of~\Cref{cor:approx-algo-const-p}]
|
||||
|
@ -193,12 +194,21 @@ Applying this bound in the runtime bound in~\Cref{lem:approx-alg} gives the firs
|
|||
\end{proof}
|
||||
|
||||
\subsection{Approximating $\rpoly$}
|
||||
We state the approximation algorithm in terms of a $\bi$.
|
||||
\subsubsection{Description}
|
||||
Algorithm ~\ref{alg:mon-sam} approximates $\rpoly$ using the following steps. First, a call to $\onepass$ on its input $\etree$ produces a non-biased weight distribution over the monomials of $\expandtree{\etree}$ and a correct count of $|\etree|(1,\ldots, 1)$, i.e., the number of monomials in $\expandtree{\etree}$. Next, ~\cref{alg:mon-sam} calls $\sampmon$ to sample one monomial and its sign from $\expandtree{\etree}$. The sampling is repeated $\ceil{\frac{2\log{\frac{2}{\delta}}}{\epsilon^2}}$ times, where each of the samples are evaluated with input $\vct{p}$, multiplied by $1 \times sign$, and summed. The final result is scaled accordingly returning an estimate of $\rpoly$ with the claimed $(\error, \conf)$-bound of ~\cref{lem:mon-samp}.
|
||||
|
||||
The algorithm to prove~\Cref{lem:approx-alg} follows from the following observation. Given a query polynomial $\poly(\vct{X})=poly(\etree)$ for expression tree $\etree$ over $\bi$, we note that we can exactly represent $\rpoly(\vct{X}$ as follows:
|
||||
\begin{equation}
|
||||
\label{eq:tilde-Q-bi}
|
||||
\rpoly\inparen{X_1,\dots,X_\numvar}=\sum_{(v,c)\in \expandtree{\etree}} \onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot c\cdot\prod_{X_i\in \var\inparen{v}} X_i.
|
||||
\end{equation}
|
||||
Given the above, the algorithm is a sampling based algorithm for the above sum: we sample $(v,c)\in \expandtree{\etree}$ with probability proportional\footnote{We could have also uniformly sampled from $\expandtree{\etree}$ but this gives better parameters.} to $\abs{c}$ and compute $Y=\onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \prod_{X_i\in \var\inparen{v}} p_i$. Taking enough samples and computing the average of $Y$ gives us our final estimate. Algowithm~\ref{alg:mon-sam} has the details.
|
||||
|
||||
%We state the approximation algorithm in terms of a $\bi$.
|
||||
%\subsubsection{Description}
|
||||
%Algorithm ~\ref{alg:mon-sam} approximates $\rpoly$ using the following steps. First, a call to $\onepass$ on its input $\etree$ produces a non-biased weight distribution over the monomials of $\expandtree{\etree}$ and a correct count of $|\etree|(1,\ldots, 1)$, i.e., the number of monomials in $\expandtree{\etree}$. Next, ~\cref{alg:mon-sam} calls $\sampmon$ to sample one monomial and its sign from $\expandtree{\etree}$. The sampling is repeated $\ceil{\frac{2\log{\frac{2}{\delta}}}{\epsilon^2}}$ times, where each of the samples are evaluated with input $\vct{p}$, multiplied by $1 \times sign$, and summed. The final result is scaled accordingly returning an estimate of $\rpoly$ with the claimed $(\error, \conf)$-bound of ~\cref{lem:mon-samp}.
|
||||
|
||||
\AR{Seems like the notation below belongs to the notation section (if we decide to state this explicitly at all)?}
|
||||
Recall that the notation $[x, y]$ denotes the range of values between $x$ and $y$ inclusive. The notation $\{x, y\}$ denotes the set of values consisting of $x$ and $y$.
|
||||
\subsubsection{Psuedo Code}
|
||||
%\subsubsection{Psuedo Code}
|
||||
|
||||
%Original TIDB Algorithm
|
||||
%\begin{algorithm}[H]
|
||||
|
@ -232,32 +242,32 @@ Recall that the notation $[x, y]$ denotes the range of values between $x$ and $y
|
|||
|
||||
|
||||
\begin{algorithm}[H]
|
||||
\caption{$\approxq_{\biabb}$($\etree$, $\vct{p}$, $\conf$, $\error$, $\abs{\block}$)}
|
||||
\caption{$\approxq(\etree, \vct{p}, \conf, \error)$}
|
||||
\label{alg:mon-sam}
|
||||
\begin{algorithmic}[1]
|
||||
\Require \etree: Binary Expression Tree
|
||||
\Require $\vct{p} = (\prob_1,\ldots, \prob_\numvar)$ $\in [0, 1]^N$
|
||||
\Require $\conf$ $\in [0, 1]$
|
||||
\Require $\error$ $\in [0, 1]$
|
||||
\Require $\abs{\block} \in \mathbb{N}$%\bivec$ $\in [0, 1]^{\abs{\block}}$
|
||||
%\Require $\abs{\block} \in \mathbb{N}$%\bivec$ $\in [0, 1]^{\abs{\block}}$
|
||||
\Ensure \vari{acc} $\in \mathbb{R}$
|
||||
|
||||
\State $\vari{sample}_\vari{next} \gets 0$
|
||||
%\State $\vari{sample}_\vari{next} \gets 0$
|
||||
\State $\accum \gets 0$\label{alg:mon-sam-global1}
|
||||
\State $\numsamp \gets \ceil{\frac{2 \log{\frac{4}{\conf}}}{\error^2}}$\label{alg:mon-sam-global2}
|
||||
\State $(\vari{\etree}_\vari{mod}, \vari{size}) \gets $ \onepass($\etree$)\label{alg:mon-sam-onepass}\Comment{$\onepass$ is ~\cref{alg:one-pass} \;and \sampmon \; is ~\cref{alg:sample}}
|
||||
\newline
|
||||
\State $\vari{i} \gets 1$
|
||||
\While{$\vari{i} \leq \numsamp$}\Comment{Perform the required number of samples}
|
||||
\State $\bivec \gets [0]^{\abs{\block}}$\Comment{$\bivec$ is an array whose size is the number of blocks, used to check for cross-terms}\newline
|
||||
\State $(\vari{M}, \vari{sgn}_\vari{i}) \gets $ \sampmon($\etree_\vari{mod}$)\label{alg:mon-sam-sample}
|
||||
\For{$\vari{x}_\vari{\block,i}$ \text{ in } $\vari{M}$}
|
||||
\If{$\bivec[\block] = 1$}\label{alg:mon-sam-check}\Comment{If we have already had a variable from this block, $\rpoly$ drops the sample.}
|
||||
\newline
|
||||
\State $\vari{sample}_{\vari{next}} \gets 1$
|
||||
\State break
|
||||
\Else
|
||||
\State $\bivec[\block] = 1$
|
||||
\State $\numsamp \gets \ceil{\frac{2 \log{\frac{2}{\conf}}}{\error^2}}$\label{alg:mon-sam-global2}
|
||||
\State $(\vari{\etree}_\vari{mod}, \vari{size}) \gets $ \onepass($\etree$)\label{alg:mon-sam-onepass}\Comment{$\onepass$ is ~\cref{alg:one-pass}}
|
||||
%\newline
|
||||
%\State $\vari{i} \gets 1$
|
||||
\For{$\vari{i} \in 1 \text{ to }\numsamp$}\Comment{Perform the required number of samples}
|
||||
%\State $\bivec \gets [0]^{\abs{\block}}$\Comment{$\bivec$ is an array whose size is the number of blocks, used to check for cross-terms}\newline
|
||||
\State $(\vari{M}, \vari{sgn}_\vari{i}) \gets $ \sampmon($\etree_\vari{mod}$)\label{alg:mon-sam-sample}\Comment{\sampmon \; is ~\cref{alg:sample}}
|
||||
%\For{$\vari{x}_\vari{\block,i}$ \text{ in } $\vari{M}$}
|
||||
% \If{$\bivec[\block] = 1$}\label{alg:mon-sam-check}\Comment{If we have already had a variable from this block, $\rpoly$ drops the sample.}
|
||||
% \newline
|
||||
% \State $\vari{sample}_{\vari{next}} \gets 1$
|
||||
% \State break
|
||||
% \Else
|
||||
% \State $\bivec[\block] = 1$
|
||||
% \State $\vari{sum} = 0$
|
||||
% \For{$\ell \in [\abs{\block}]$}
|
||||
% \State $\vari{sum} = \vari{sum} + \bivec[\block][\ell]$
|
||||
|
@ -265,23 +275,24 @@ Recall that the notation $[x, y]$ denotes the range of values between $x$ and $y
|
|||
% \If{$\vari{sum} \geq 2$}
|
||||
% \State $\vari{sample}_{\vari{next}} \gets 1$
|
||||
% \State continue\Comment{Not sure for psuedo code the best way to state this, but this is analogous to C language continue statement.}
|
||||
\EndIf
|
||||
\EndFor
|
||||
\If{$\vari{sample}_{\vari{next}} = 1$}\label{alg:mon-sam-drop}
|
||||
\State $\vari{sample}_{\vari{next}} \gets 0$\label{alg:mon-sam-resamp}
|
||||
\Else
|
||||
\State $\vari{Y}_\vari{i} \gets 1$\label{alg:mon-sam-assign1}\newline
|
||||
\For{$\vari{x}_{\vari{j}}$ \text{ in } $\vari{M}$}%_{\vari{i}}$}
|
||||
\State $\vari{Y}_\vari{i} \gets \vari{Y}_\vari{i} \times \; \vari{\prob}_\vari{j}$\label{alg:mon-sam-product2} \Comment{$\vari{p}_\vari{j}$ is the assignment to $\vari{x}_\vari{j}$ from input $\vct{p}$}
|
||||
\EndFor
|
||||
% \EndIf
|
||||
% \EndFor
|
||||
% \If{$\vari{sample}_{\vari{next}} = 1$}\label{alg:mon-sam-drop}
|
||||
% \State $\vari{sample}_{\vari{next}} \gets 0$\label{alg:mon-sam-resamp}
|
||||
% \Else
|
||||
\If{$\vari{M}$ has at most one variable from each block}
|
||||
\State $\vari{Y}_\vari{i} \gets \prod_{X_j\in\var\inparen{\vari{M}}}p_j$\label{alg:mon-sam-assign1}%\newline
|
||||
%\For{$\vari{x}_{\vari{j}}$ \text{ in } $\vari{M}$}%_{\vari{i}}$}
|
||||
% \State $\vari{Y}_\vari{i} \gets \vari{Y}_\vari{i} \times \; \vari{\prob}_\vari{j}$\label{alg:mon-sam-product2} \Comment{$\vari{p}_\vari{j}$ is the assignment to $\vari{x}_\vari{j}$ from input $\vct{p}$}
|
||||
%\EndFor
|
||||
\State $\vari{Y}_\vari{i} \gets \vari{Y}_\vari{i} \times\; \vari{sgn}_\vari{i}$\label{alg:mon-sam-product}
|
||||
\State $\accum \gets \accum + \vari{Y}_\vari{i}$\Comment{Store the sum over all samples}\label{alg:mon-sam-add}
|
||||
\State $\vari{i} \gets \vari{i} + 1$
|
||||
%\State $\vari{i} \gets \vari{i} + 1$
|
||||
\EndIf
|
||||
\EndWhile
|
||||
\EndFor
|
||||
|
||||
\State $\gamma \gets $ $\algname{Estimate}$ $\gamma(\etree, \numsamp, \abs{\block})$
|
||||
\State $\vari{acc} \gets \vari{acc} \times \frac{\vari{size}}{\numsamp \cdot (1 - \gamma)}$\label{alg:mon-sam-global3}
|
||||
%\State $\gamma \gets $ $\algname{Estimate}$ $\gamma(\etree, \numsamp, \abs{\block})$
|
||||
\State $\vari{acc} \gets \vari{acc} \times \frac{\vari{size}}{\numsamp}$\label{alg:mon-sam-global3}
|
||||
\State \Return \vari{acc}
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
@ -319,16 +330,19 @@ Recall that the notation $[x, y]$ denotes the range of values between $x$ and $y
|
|||
|
||||
|
||||
\subsubsection{Correctness}
|
||||
We state the lemmas for $\onepass$ and \newline$\sampmon$, the auxiliary algorithms on which ~\cref{alg:mon-sam} relies. Their proofs are subsequent.
|
||||
|
||||
In order to prove~\Cref{lem:approx-alg}, we will need to argue the correctness of~\cref{alg:mon-sam}. Before we formally do that,
|
||||
we first state the lemmas that summarize the relevant properties of $\onepass$ and \newline$\sampmon$, the auxiliary algorithms on which ~\cref{alg:mon-sam} relies. Their proofs are given in~\Cref{sec:onepass} and~\Cref{sec:samplemonomial} respectively.
|
||||
|
||||
|
||||
\begin{Lemma}\label{lem:one-pass}
|
||||
The $\onepass$ function completes in $O(size(\etree))$ time. After $\onepass$ returns the following post conditions hold. First, that $\abs{\vari{S}}(1,\ldots, 1)$ is correctly computed for each subtree $\vari{S}$ of $\etree$. Second, when $\vari{S}.\val = +$, the weighted distribution $\frac{\abs{\vari{S}_{\vari{child}}}(1,\ldots, 1)}{\abs{\vari{S}}(1,\ldots, 1)}$ is correctly computed for each child of $\vari{S}.$
|
||||
The $\onepass$ function completes in $O(size(\etree))$ time. After $\onepass$ returns the following post conditions hold. First, for each subtree $\vari{S}$ of $\etree$, we have that $\vari{S}.\vari{partial}$ is set to $\abs{\vari{S}}(1,\ldots, 1)$. Second, when$\vari{S}.\val = +$, each $\vari{child}$ of $\vari{S}$, $\vari{child}.\vari{weight}$ is set to $\frac{\abs{\vari{S}_{\vari{child}}}(1,\ldots, 1)}{\abs{\vari{S}}(1,\ldots, 1)}$. % is correctly computed for each child of $\vari{S}.$
|
||||
\end{Lemma}
|
||||
|
||||
At the conclusion of $\onepass$, $\etree.\vari{partial}$ will hold the sum of all coefficients in $\expandtree{\abs{\etree}}$, i.e., $\sum\limits_{(\monom, \coef) \in \expandtree{\abs{\etree}}}\coef$. $\etree.\vari{weight}$ will hold the weighted probability that $\etree$ is sampled from from its parent $+$ node.
|
||||
In proving correctness of~\Cref{alg:mon-sam}, we will only use the following fact (which follows from the above lemma), $\etree_{\vari{mod}}.\vari{partial}=\abs{\etree}(1,\dots,1)$.
|
||||
%At the conclusion of $\onepass$, $\etree.\vari{partial}$ will hold the sum of all coefficients in $\expandtree{\abs{\etree}}$, i.e., $\sum\limits_{(\monom, \coef) \in \expandtree{\abs{\etree}}}\coef$. $\etree.\vari{weight}$ will hold the weighted probability that $\etree$ is sampled from from its parent $+$ node.
|
||||
|
||||
\begin{Lemma}\label{lem:sample}
|
||||
The function $\sampmon$ completes in $O(\log{k} \cdot k \cdot depth(\etree))$ time, where $k = \degree(poly(\abs{\etree})$. Upon completion, with probability $\frac{|\coef|}{\abs{\etree}(1,\ldots, 1)}$, $\sampmon$ returns the sampled term $\left(\monom, sign(\coef)\right)$ from $\expandtree{\abs{\etree}}$.
|
||||
The function $\sampmon$ completes in $O(\log{k} \cdot k \cdot depth(\etree))$ time, where $k = \degree(poly(\abs{\etree})$. Upon completion, every $\left(\monom, sign(\coef)\right)\in \expandtree{\abs{\etree}}$ is returned with probability $\frac{|\coef|}{\abs{\etree}(1,\ldots, 1)}$. %, $\sampmon$ returns the sampled term $\left(\monom, sign(\coef)\right)$ from $\expandtree{\abs{\etree}}$.
|
||||
\end{Lemma}
|
||||
|
||||
\begin{Theorem}\label{lem:mon-samp}
|
||||
|
@ -367,7 +381,7 @@ P\left(\left|\empmean - \expct\pbox{\empmean}\right| \geq \error\right) \leq 2\e
|
|||
As implied above, Hoeffding is assuming the sum of random variables be divided by the number of variables. Since $\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma) = \expct\pbox{\empmean} \cdot \abs{\etree}(1,\ldots, 1)$, then our estimate is the sum of random samples multiplied by $\frac{\abs{\etree}(1,\ldots, 1)}{\samplesize \cdot (1 - \gamma)}$. This computation is performed on ~\cref{alg:mon-sam-global3}.
|
||||
%Also see that to properly estimate $\rpoly$, it is necessary to multiply by the number of monomials in $\rpoly$, i.e. $\abs{\etree}(1,\ldots, 1)$. Therefore it is the case that $\frac{acc}{N}$ gives the estimate of one monomial, and multiplying by $\abs{\etree}(1,\ldots, 1)$ yields the estimate of $\rpoly(\prob_1,\ldots, \prob_\numvar)$. This scaling is performed in line ~\ref{alg:mon-sam-global3}.
|
||||
|
||||
Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is mulitplied with at most $\degree(\polyf(\abs{\etree}))$ factors from $\vct{p}$ (\cref{alg:mon-sam-product2}) such that each $p_i$ is in $[0, 1]$, the range for each $\randvar_i$ ($\vari{Y}_\vari{i}$ in the psuedo code) is then strictly bounded by $[-1, 1]$. Bounding Hoeffding's results by $\conf$ ensures confidence no less than $1 - \conf$. Then by upperbounding Hoeffding with $\frac{\conf}{2}$ (since we take an additional estimate of $\gamma$), it is the case that
|
||||
Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is multiplied with at most $\degree(\polyf(\abs{\etree}))$ factors from $\vct{p}$ (\cref{alg:mon-sam-product2}) such that each $p_i$ is in $[0, 1]$, the range for each $\randvar_i$ ($\vari{Y}_\vari{i}$ in the pseudo code) is then strictly bounded by $[-1, 1]$. Bounding Hoeffding's results by $\conf$ ensures confidence no less than $1 - \conf$. Then by upper bounding Hoeffding with $\frac{\conf}{2}$ (since we take an additional estimate of $\gamma$), it is the case that
|
||||
\begin{equation*}
|
||||
P\pbox{~\left| \empmean - \expct\pbox{\empmean} ~\right| \geq \error} \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{2^2 \samplesize}\right)} \leq \frac{\conf}{2}.
|
||||
\end{equation*}
|
||||
|
@ -382,7 +396,7 @@ Solving for the number of samples $\samplesize$ we get
|
|||
&\frac{2\log{\frac{4}{\conf}}}{\error^2} \leq \samplesize.\label{eq:hoeff-6}
|
||||
\end{align}
|
||||
|
||||
By Hoeffding we obtain the number of samples necessary to acheive the claimed additive error bounds.
|
||||
By Hoeffding we obtain the number of samples necessary to achieve the claimed additive error bounds.
|
||||
|
||||
This concludes the proof for the first claim of theorem ~\ref{lem:mon-samp}.
|
||||
|
||||
|
@ -419,6 +433,8 @@ and the runtime then follows, thus upholding ~\cref{lem:approx-alg}.
|
|||
|
||||
|
||||
\subsection{OnePass Algorithm}
|
||||
\label{sec:onepass}
|
||||
|
||||
\subsubsection{Description}
|
||||
Algorithm ~\ref{alg:one-pass} satisfies the requirements of lemma ~\ref{lem:one-pass}.
|
||||
|
||||
|
@ -604,13 +620,14 @@ Thus, the algorithm visits each node of $\etree$ one time, with a constant numbe
|
|||
|
||||
|
||||
\subsection{Sample Algorithm}
|
||||
\label{sec:samplemonomial}
|
||||
|
||||
Algorithm ~\ref{alg:sample} takes $\etree$ as input, samples an arbitrary $(\monom, \coef)$ from $\expandtree{\etree}$ with probabilities $\stree_\lchild.\wght$ and $\stree_\rchild.\wght$ for each subtree $\stree$ with $\stree.\type = +$, outputing the tuple $(\monom, \sign(\coef))$. While one cannot compute $\expandtree{\etree}$ in time better than $O(N^k)$, the algorithm, similar to \textsc{OnePass}, uses a technique on $\etree$ which produces a sample from $\expandtree{\etree}$ without ever materializing $\expandtree{\etree}$.
|
||||
Algorithm ~\ref{alg:sample} takes $\etree$ as input, samples an arbitrary $(\monom, \coef)$ from $\expandtree{\etree}$ with probabilities $\stree_\lchild.\wght$ and $\stree_\rchild.\wght$ for each subtree $\stree$ with $\stree.\type = +$, outputting the tuple $(\monom, \sign(\coef))$. While one cannot compute $\expandtree{\etree}$ in time better than $O(N^k)$, the algorithm, similar to \textsc{OnePass}, uses a technique on $\etree$ which produces a sample from $\expandtree{\etree}$ without ever materializing $\expandtree{\etree}$.
|
||||
|
||||
Algorithm ~\ref{alg:sample} selects a monomial from $\expandtree{\etree}$ by the following top-down traversal. For a parent $+$ node, a subtree is chosen over the previously computed weighted sampling distribution. When a parent $\times$ node is visited, both children are visited. All variable leaf nodes of the subgraph traversal are added to a set. Additionally, the product of signs over all coefficient leaf nodes of the subgraph traversal is computed. The algorithm returns a set of the distinct variables of which the monomial is composed and the monomial's sign.
|
||||
|
||||
\begin{Definition}[TreeSet]
|
||||
A TreeSet is a datastructure whose elements form a set, each of which are stored in a binary tree.
|
||||
A TreeSet is a data structure whose elements form a set, each of which are stored in a binary tree.
|
||||
\end{Definition}
|
||||
|
||||
Note that as stated, a TreeSet then facilitates logarithmic insertion.
|
||||
|
@ -659,9 +676,9 @@ First, we need to show that $\sampmon$ indeed returns a monomial $\monom$, such
|
|||
|
||||
For the base case, let the depth $d$ of $\etree$ be $0$. We have that the root node is either a constant $\coef$ for which by line ~\ref{alg:sample-num-return} we return $\{~\}$, or we have that $\etree.\type = \var$ and $\etree.\val = x$, and by line ~\ref{alg:sample-var-return} we return $\{x\}$. Both cases satisfy ~\cref{def:monomial}, and the base case is proven.
|
||||
|
||||
By inductive hyptothesis, assume that for $d \leq k$ for $k \geq 1$, that it is indeed the case that $\sampmon$ returns a monomial.
|
||||
By inductive hypothesis, assume that for $d \leq k$ for $k \geq 1$, that it is indeed the case that $\sampmon$ returns a monomial.
|
||||
|
||||
For the inductive step, let us take a tree $\etree$ with $d = k + 1$. Note that each child has depth $d \leq k$, and by inductive hyptothesis both of them return a valid monomial. Then the root can be either a $+$ or $\times$ node. For the case of a $+$ root node, line ~\ref{alg:sample-plus-bsamp} of $\sampmon$ will choose one of the children of the root. Since by inductive hypothesis it is the case that a monomial is being returned from either child, and only one of these monomials is selected, we have for the case of $+$ root node that a valid monomial is returned by $\sampmon$. When the root is a $\times$ node, lines ~\ref{alg:sample-times-union} and ~\ref{alg:sample-times-product} multiply the monomials returned by the two children of the root, and by definition ~\ref{def:monomial} the product of two monomials is also a monomial, which means that $\sampmon$ returns a vaild monomial for the $\times$ root node, thus concluding the fact that $\sampmon$ indeed returns a monomial.
|
||||
For the inductive step, let us take a tree $\etree$ with $d = k + 1$. Note that each child has depth $d \leq k$, and by inductive hypothesis both of them return a valid monomial. Then the root can be either a $+$ or $\times$ node. For the case of a $+$ root node, line ~\ref{alg:sample-plus-bsamp} of $\sampmon$ will choose one of the children of the root. Since by inductive hypothesis it is the case that a monomial is being returned from either child, and only one of these monomials is selected, we have for the case of $+$ root node that a valid monomial is returned by $\sampmon$. When the root is a $\times$ node, lines ~\ref{alg:sample-times-union} and ~\ref{alg:sample-times-product} multiply the monomials returned by the two children of the root, and by definition ~\ref{def:monomial} the product of two monomials is also a monomial, which means that $\sampmon$ returns a valid monomial for the $\times$ root node, thus concluding the fact that $\sampmon$ indeed returns a monomial.
|
||||
|
||||
%Note that for any monomial sampled by algorithm ~\ref{alg:sample}, the nodes traversed form a subgraph of $\etree$ that is \textit{not} a subtree in the general case. We thus seek to prove that the subgraph traversed produces the correct probability corresponding to the monomial sampled.
|
||||
|
||||
|
@ -689,7 +706,7 @@ and we obtain the desired result.
|
|||
|
||||
|
||||
\paragraph{Run-time Analysis}
|
||||
We now bound the number of recursive calls in $\sampmon$ by $O\left(k\cdot depth(\etree)\right)$. Take an arbitrary sample subgraph of expression tree $\etree$ of degree $k$ and pick an arbitrary level $i$. Call the number of $\times$ nodes in this level $y_i$, and the total number of nodes $x_i$. Given that both children of a $\times$ node are traversed in $\sampmon$ while only one child is traversed for a $+$ parent node, note that the number of nodes on level $i + 1$ in the general case is at most $y_i + x_i$, and the increase in the number of nodes from level $i$ to level $i + 1$ is upperbounded by $x_{i + 1} - x_i \leq y_i$.
|
||||
We now bound the number of recursive calls in $\sampmon$ by $O\left(k\cdot depth(\etree)\right)$. Take an arbitrary sample subgraph of expression tree $\etree$ of degree $k$ and pick an arbitrary level $i$. Call the number of $\times$ nodes in this level $y_i$, and the total number of nodes $x_i$. Given that both children of a $\times$ node are traversed in $\sampmon$ while only one child is traversed for a $+$ parent node, note that the number of nodes on level $i + 1$ in the general case is at most $y_i + x_i$, and the increase in the number of nodes from level $i$ to level $i + 1$ is upper bounded by $x_{i + 1} - x_i \leq y_i$.
|
||||
|
||||
Now, we prove by induction on the depth $d$ of tree $\etree$ the following claim.
|
||||
\begin{Claim}\label{claim:num-nodes-level-i}
|
||||
|
@ -708,7 +725,7 @@ The inductive step is to show that for arbitrary $\etree$ with depth = $d + 1 \l
|
|||
|
||||
By ~\cref{def:degree}, a sampled monomial will have $O(k)$ $\times$ nodes, and this along with ~\cref{claim:num-nodes-level-i} implies $O(k)$ nodes at $\leq$ $depth(\etree)$ levels of the $\sampmon$ subgraph, bounding the number of recursive calls to $O(k \cdot depth(\etree))$.
|
||||
|
||||
Globally, lines ~\ref{alg:sample-global1} and ~\ref{alg:sample-global2} are $O(1)$ time. For the $+$ node, line ~\ref{alg:sample-plus-bsamp} has $O(1)$ time by the fact that $\etree$ is binary. Line ~\ref{alg:sample-plus-union} has $O(\log{k})$ time by nature of the TreeSet datastructure and the fact that by definition any monomial sampled from $\expandtree{\etree}$ has degree $\leq k$ and hence at most $k$ distinct variables, which in turn implies that the TreeSet has $\leq k$ elements in it at any time.
|
||||
Globally, lines ~\ref{alg:sample-global1} and ~\ref{alg:sample-global2} are $O(1)$ time. For the $+$ node, line ~\ref{alg:sample-plus-bsamp} has $O(1)$ time by the fact that $\etree$ is binary. Line ~\ref{alg:sample-plus-union} has $O(\log{k})$ time by nature of the TreeSet data structure and the fact that by definition any monomial sampled from $\expandtree{\etree}$ has degree $\leq k$ and hence at most $k$ distinct variables, which in turn implies that the TreeSet has $\leq k$ elements in it at any time.
|
||||
|
||||
Finally, line ~\ref{alg:sample-times-product} is in $O(1)$ for a product and an assignment operation. When a times node is visited, the same union, product, and assignment operations take place, and we again have $O(\log{k})$ runtime. When a variable leaf node is traversed, the same union operation occurs with $O(\log{k})$ runtime, and a constant leaf node has the above mentioned product and assignment operations. Thus for each node visited, we have $O(\log{k})$ runtime, and the final runtime for $\sampmon$ is $O(\log{k} \cdot k \cdot depth(\etree))$.
|
||||
|
||||
|
|
|
@ -88,7 +88,7 @@ We first argue that $\rpoly_{G}^\kElem(\prob,\ldots, \prob) = \sum\limits_{i = 0
|
|||
%\sum_{\substack{(i_1, j_1),\\\cdots,\\(i_\kElem, j_\kElem) \in E}}X_{i_1}X_{j_1}\cdots X_{i_\kElem}X_{j_\kElem}
|
||||
%\end{equation*}
|
||||
%Since each of $(i_1, j_1),\ldots, (i_\kElem, j_\kElem)$ are from $E$, it follows that the set of $\kElem!$ permutations of the $\kElem$ $X_iX_j$ pairs which form the monomial products are of degree $2\kElem$ with the number of distinct variables in an arbitrary monomial $\leq 2\kElem$.
|
||||
By definition, $\rpoly_{G}^{\kElem}(\vct{X})$ sets every exponent $e > 1$ to $e = 1$, which means that $\deg(\rpoly_{G}^\kElem)\le \deg\poly_G^\kElem=2k$. Thus, if we think of $\prob$ as a variable, then $\rpoly_{G}^{\kElem}(\prob,\dots,\prob)$ is a univariate polynomial of degree at most $\deg(\rpoly_{G}^\kElem)\le 2k$. Thus, we can write
|
||||
By definition, $\rpoly_{G}^{\kElem}(\vct{X})$ sets every exponent $e > 1$ to $e = 1$, which means that $\degree(\rpoly_{G}^\kElem)\le \degree(\poly_G^\kElem)=2k$. Thus, if we think of $\prob$ as a variable, then $\rpoly_{G}^{\kElem}(\prob,\dots,\prob)$ is a univariate polynomial of degree at most $\degree(\rpoly_{G}^\kElem)\le 2k$. Thus, we can write
|
||||
%thereby shrinking the degree a monomial product term in the SOP form of $\poly_{G}^{\kElem}(\vct{X})$ to the exact number of distinct variables the monomial contains. This implies that $\rpoly_{G}^\kElem$ is a polynomial of degree $2\kElem$ and hence $\rpoly_{G}^\kElem(\prob,\ldots, \prob)$ is a polynomial in $\prob$ of degree $2\kElem$. Then it is the case that
|
||||
\begin{equation*}
|
||||
\rpoly_{G}^{\kElem}(\prob,\ldots, \prob) = \sum_{i = 0}^{2\kElem} c_i \prob^i
|
||||
|
|
|
@ -4,43 +4,59 @@
|
|||
\subsection{Polynomial Formulation and Equivalences}
|
||||
|
||||
Since we have shown that computing the expected multiplicity of a result tuple is equivalent to computing the expectation of a polynomial (for that tuple) given a probability distribution over all possible assignments of variables in the polynomial to $\{0,1\}$, we from now on focus on this problem exclusively.
|
||||
Before proceeding, note that the following is assuming \bis (which subsume \tis as a special case). Thus, variables are independent of each other and each variable $X$ is associated with a probability $\vct{p}(X)$.
|
||||
|
||||
Let us use the expression $(x + y)^2$ for a running example in the following definitions.
|
||||
Before proceeding, note that the following is assuming \bis (which subsume \tis as a special case). Thus, variables are independent of each other and each variable $X$ is associated with a probability $\vct{p}(X) = \pd[X = 1]$.
|
||||
Let us use the expression $(x + y)^2$ as a running example in this section.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Definition}[Monomial]\label{def:monomial}
|
||||
A monomial is a product of a fixed set of variables, each raised to a non-negative integer power.
|
||||
A monomial is a product of a set of variables, each raised to a non-negative integer power.
|
||||
\end{Definition}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
For the term $2xy$, by ~\cref{def:monomial} the monomial is $xy$.
|
||||
For instance, the term $2xy$ contains a single monomial $xy$. % \Cref{def:monomial} the monomial is $xy$.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Definition}[Standard Monomial Basis]\label{def:smb}
|
||||
A polynomial is in standard monomial basis when it is fully expanded out such that no product of sums exist and where each unique monomial appears exactly once.
|
||||
A polynomial is in standard monomial basis when it is of the form:
|
||||
\[
|
||||
\sum_{i=1}^n c_i \cdot m_i
|
||||
\]
|
||||
where each $c_i$ is a positive integer and each $m_i$ is a monomial and $m_i \neq m_j$ for $i \neq j$.
|
||||
% fully expanded out such that no product of sums exist and where each unique monomial appears exactly once.
|
||||
\end{Definition}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
The standard monomial basis for the running example is $x^2 +2xy + y^2$. While $x^2 + xy + xy + y^2$ is an expanded form of the expression, it is not the standard monomial basis since $xy$ appears more than once.
|
||||
|
||||
Throughout this paper, we also make the following \textit{assumption}.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Assumption}\label{assump:poly-smb}
|
||||
All polynomials considered are in standard monomial basis, i.e., $\poly(\vct{X}) = \sum\limits_{\vct{d} \in \mathbb{N}^\numvar}q_d \cdot \prod\limits_{i = 1, d_i \geq 1}^{\numvar}X_i^{d_i}$, where $q_d$ is the coefficient for the monomial encoded in $\vct{d}$ and $d_i$ is the $i^{th}$ element of $\vct{d}$.
|
||||
\end{Assumption}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
While the definition of polynomial $\poly(\vct{X})$ over a $\bi$ input doesn't change, we introduce an alternative notation which will come in handy. Given $\ell$ blocks, we write $\poly(\vct{X})$ = $\poly(X_{\block_1, 1},\ldots, X_{\block_1, \abs{\block_1}},$ $\ldots, X_{\block_\ell, \abs{\block_\ell}})$, where $\abs{\block_i}$ denotes the size of $\block_i$, and $\block_{i, j}$ denotes tuple $j$ residing in block $i$ for $j$ in $[\abs{\block_i}]$.
|
||||
The number of tuples in the $\bi$ instance can be (trivially) computed as $\numvar = \sum\limits_{i = 1}^{\ell}\abs{\block_i}$ .
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Definition}[Degree]\label{def:degree}
|
||||
The degree of polynomial $\poly(\vct{X})$ is the maximum sum of the exponents of a monomial, over all monomials when $\poly(\vct{X})$ is in SOP form.
|
||||
\end{Definition}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
The degree of the running example is $2$. In this paper we consider only finite degree polynomials.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Definition}[$\rpoly(\vct{X})$] \label{def:qtilde}
|
||||
Define $\rpoly(X_1,\ldots, X_\numvar)$ as the reduced version of $\poly(X_1,\ldots, X_\numvar)$, of the form
|
||||
$\rpoly(X_1,\ldots, X_\numvar) = $
|
||||
|
||||
\[\poly(X_1,\ldots, X_\numvar) \mod X_1^2-X_1\cdots\mod X_\numvar^2 - X_\numvar.\]
|
||||
\end{Definition}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Example}\label{example:qtilde}
|
||||
Consider when $\poly(x, y) = (x + y)(x + y)$. Then the expanded derivation for $\rpoly(x, y)$ is
|
||||
\begin{align*}
|
||||
|
@ -49,12 +65,14 @@ Consider when $\poly(x, y) = (x + y)(x + y)$. Then the expanded derivation for
|
|||
= ~& x + 2xy + y
|
||||
\end{align*}
|
||||
\end{Example}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
Intuitively, $\rpoly(\textbf{X})$ is the SOP form of $\poly(\textbf{X})$ such that if any $X_j$ term has an exponent $e > 1$, it is reduced to $1$, i.e. $X_j^e\mapsto X_j$ for any $e > 1$.
|
||||
Alternatively, one can gain intuition for $\rpoly$ by thinking of $\rpoly$ as the resulting SOP of $\poly(\vct{X})$ with an idemptent product operator.
|
||||
|
||||
When considering $\bi$ input, it becomes necessary to redefine $\rpoly(\vct{X})$.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Definition}[$\rpoly$ $\bi$ Redefinition]
|
||||
A polynomial $\poly(\vct{X})$ over a $\bi$ instance is reduced to $\rpoly(\vct{X})$ with the following criteria. First, all exponents $e > 1$ are reduced to $e = 1$. Second, all monomials sharing the same $\block$ are dropped. Formally this is expressed as
|
||||
|
||||
|
@ -63,36 +81,44 @@ A polynomial $\poly(\vct{X})$ over a $\bi$ instance is reduced to $\rpoly(\vct{X
|
|||
\end{equation*}
|
||||
for all $i$ in $[\numvar]$ and for all $s$ in $\ell$, such that for all $t, u$ in $[\abs{block_s}]$, $t \neq u$.
|
||||
\end{Definition}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
The usefulness of this reduction will be seen in ~\cref{lem:exp-poly-rpoly}.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Lemma}\label{lem:pre-poly-rpoly}
|
||||
When $\poly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}} \cdot \prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i^{d_i}$, we have then that $\rpoly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \{0,\ldots, B\}^\numvar} q_{\vct{d}}\cdot\prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i$.
|
||||
\end{Lemma}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{proof}
|
||||
Follows by the construction of $\rpoly$ in \cref{def:qtilde}.
|
||||
Follows by the construction of $\rpoly$ in \cref{def:qtilde}. \qed
|
||||
\end{proof}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
\qed
|
||||
|
||||
Note the following fact:
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Proposition}\label{proposition:q-qtilde}
|
||||
\[\text{For all } (X_1,\ldots, X_\numvar) \in \{0, 1\}^\numvar, \poly(X_1,\ldots, X_\numvar) = \rpoly(X_1,\ldots, X_\numvar).\]
|
||||
\end{Proposition}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{proof}[Proof for Proposition ~\ref{proposition:q-qtilde}]
|
||||
Note that any $\poly$ in factorized form is equivalent to its sum of product expansion. For each term in the expanded form, further note that for all $b \in \{0, 1\}$ and all $e \geq 1$, $b^e = b$.
|
||||
Note that any $\poly$ in factorized form is equivalent to its sum of product expansion. For each term in the expanded form, further note that for all $b \in \{0, 1\}$ and all $e \geq 1$, $b^e = b$. \qed
|
||||
\end{proof}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
\qed
|
||||
|
||||
Define all variables $X_i$ in $\poly$ to be independent.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Lemma}\label{lem:exp-poly-rpoly}
|
||||
The expectation over possible worlds in $\poly(\vct{X})$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
|
||||
\begin{equation*}
|
||||
\expct_{\vct{w}}\pbox{\poly(\vct{w})} = \rpoly(\prob_1,\ldots, \prob_\numvar).
|
||||
\end{equation*}
|
||||
\end{Lemma}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
Note that in the preceding lemma, we have assigned $\vct{p}$ (introduced in ~\cref{subsec:def-data}) to the variables $\vct{X}$.
|
||||
|
||||
|
@ -127,11 +153,19 @@ Finally, observe \cref{p1-s5} by construction in \cref{lem:pre-poly-rpoly}, that
|
|||
|
||||
\qed
|
||||
\end{proof}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Corollary}\label{cor:expct-sop}
|
||||
If $\poly$ is given as a sum of monomials, the expectation of $\poly$, i.e., $\expct\pbox{\poly} = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ can be computed in $O(|\poly|)$, where $|\poly|$ denotes the total number of multiplication/addition operators.
|
||||
\end{Corollary}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{proof}[Proof For Corollary ~\ref{cor:expct-sop}]
|
||||
Note that \cref{lem:exp-poly-rpoly} shows that $\expct\pbox{\poly} =$ $\rpoly(\prob_1,\ldots, \prob_\numvar)$. Therefore, if $\poly$ is already in sum of products form, one only needs to compute $\poly(\prob_1,\ldots, \prob_\numvar)$ ignoring exponent terms (note that such a polynomial is $\rpoly(\prob_1,\ldots, \prob_\numvar)$), which indeed has $O(|\poly|)$ compututations.\qed
|
||||
\end{proof}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
%%% Local Variables:
|
||||
%%% mode: latex
|
||||
%%% TeX-master: "main"
|
||||
%%% End:
|
||||
|
|
Loading…
Reference in a new issue