|
|
|
@ -10,67 +10,33 @@ The folowing approximation algorithm applies to \bi, though our bounds are more
|
|
|
|
|
|
|
|
|
|
\subsection{Preliminaries and some more notation}
|
|
|
|
|
|
|
|
|
|
We now introduce useful definitions and notation related to polynomials. We use the following polynomial as an example:
|
|
|
|
|
\begin{equation}
|
|
|
|
|
\label{eq:poly-eg}
|
|
|
|
|
\poly(X, Y) = 2X^2 + 3XY - 2Y^2.
|
|
|
|
|
\end{equation}
|
|
|
|
|
|
|
|
|
|
We now introduce useful definitions and notation related to circuits and polynomials. Kindly note that all proofs and pseudocode can be found in \cref{sec:proofs-approx-alg}.
|
|
|
|
|
\begin{Definition}[Variables in a monomial]\label{def:vars}
|
|
|
|
|
Given a monomial $v$, we use $\var(v)$ to denote the set of variables in $v$.
|
|
|
|
|
\end{Definition}
|
|
|
|
|
\noindent For example the monomial $XY$ has $\var(XY)=\inset{X,Y}$.
|
|
|
|
|
|
|
|
|
|
\revision{
|
|
|
|
|
\begin{Definition}[Pure Expansion]
|
|
|
|
|
The pure expansion of a polynomial $\poly$ is formed by computing all product of sums occurring in $\poly$, without combining like monomials. The pure expansion of $\poly$ generalizes \Cref{def:smb} by allowing monomials $m_i = m_j$ for $i \neq j$.
|
|
|
|
|
\end{Definition}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
\begin{Definition}[Expanded \revision{\circuit}]\label{def:expand-circuit}
|
|
|
|
|
%\revision{$\expansion{\circuit}$} is the reduced pure expansion of $\revision{\circuit}$.
|
|
|
|
|
The logical view of \revision{$\expansion{\circuit}$} is a list of tuples $(\monom, \coef)$, where $\monom$ is a set of variables and $\coef$ is in $\reals$.
|
|
|
|
|
\revision{$\expansion{\circuit}$} has the following recursive definition ($\circ$ is list concatenation).
|
|
|
|
|
\begin{Definition}[$\expansion{\circuit}$]\label{def:expand-circuit}
|
|
|
|
|
The logical view of $\expansion{\circuit}$ is a list of tuples $(\monom, \coef)$, where $\monom$ is a set of variables and $\coef$ is in $\reals$.
|
|
|
|
|
$\expansion{\circuit}$ has the following recursive definition ($\circ$ is list concatenation).
|
|
|
|
|
|
|
|
|
|
$\expansion{\circuit} =
|
|
|
|
|
\begin{cases}
|
|
|
|
|
\expansion{\circuit_\linput} \circ \expansion{\circuit_\rinput} &\textbf{ if }\revision{\circuit.\type = \circplus}\\
|
|
|
|
|
\left\{(\monom_\linput \cup \monom_\rinput, \coef_\linput \cdot \coef_\rinput) ~|~(\monom_\linput, \coef_\linput) \in \expansion{\circuit_\linput}, (\monom_\rinput, \coef_\rinput) \in \expansion{\circuit_\rinput}\right\} &\textbf{ if }\revision{\circuit.\type = \circmult}\\
|
|
|
|
|
\elist{(\emptyset, \revision{\circuit.\val})} &\textbf{ if }\revision{\circuit}.\type = \tnum\\
|
|
|
|
|
\elist{(\{\revision{\circuit}.\val\}, 1)} &\textbf{ if }\revision{\circuit}.\type = \var.\\
|
|
|
|
|
\expansion{\circuit_\linput} \circ \expansion{\circuit_\rinput} &\textbf{ if }\circuit.\type = \circplus\\
|
|
|
|
|
\left\{(\monom_\linput \cup \monom_\rinput, \coef_\linput \cdot \coef_\rinput) ~|~(\monom_\linput, \coef_\linput) \in \expansion{\circuit_\linput}, (\monom_\rinput, \coef_\rinput) \in \expansion{\circuit_\rinput}\right\} &\textbf{ if }\circuit.\type = \circmult\\
|
|
|
|
|
\elist{(\emptyset, \circuit.\val)} &\textbf{ if }\circuit.\type = \tnum\\
|
|
|
|
|
\elist{(\{\circuit.\val\}, 1)} &\textbf{ if }\circuit.\type = \var.\\
|
|
|
|
|
\end{cases}
|
|
|
|
|
$
|
|
|
|
|
|
|
|
|
|
\end{Definition}
|
|
|
|
|
\revision{
|
|
|
|
|
Note that similar in spirit to \Cref{def:reduced-bi-poly}, $\expansion{\circuit}$ reduces all variable exponents $e > 1$ to $e = 1$.
|
|
|
|
|
}
|
|
|
|
|
For further explanation, please refer to \cref{example:expr-tree-T}.
|
|
|
|
|
|
|
|
|
|
In the following, we abuse notation and write $\monom$ to denote the monomial obtained as the products of the variables in the set.
|
|
|
|
|
|
|
|
|
|
\begin{Example}\label{example:expr-tree-T}
|
|
|
|
|
Consider the factorized representation $(X+ 2Y)(2X - Y)$ of the polynomial in~\Cref{eq:poly-eg}.
|
|
|
|
|
Its circuit $\circuit$ is illustrated in \cref{fig:circuit}.
|
|
|
|
|
The pure expansion of the product is $2X^2 - XY + 4XY - 2Y^2$ and the $\expansion{\circuit}$ is $[(X, 2), (XY, -1), (XY, 4), (Y, -2)]$.
|
|
|
|
|
\end{Example}
|
|
|
|
|
$\expansion{\circuit}$ effectively\footnote{The minor difference here is that $\expansion{\circuit}$ encodes the \emph{reduced} form over the SOP expansion of the compressed representation, as opposed to the \abbrSMB representation} encodes the \emph{reduced} form of $\polyf\inparen{\circuit}$, decoupling each monomial into a set of variables $\monom$ and a real coefficient $\coef$.
|
|
|
|
|
However, unlike the constraint on the input to compute $\rpoly$, the input circuit $\circuit$ does not need to be in \abbrSMB/SOP form.
|
|
|
|
|
|
|
|
|
|
\begin{Definition}[Positive \circuit]\label{def:positive-circuit}
|
|
|
|
|
\begin{Definition}[$\abs{\circuit}(\vct{X})$]\label{def:positive-circuit}
|
|
|
|
|
For any circuit $\circuit$, the corresponding
|
|
|
|
|
{\em positive circuit}, denoted $\abs{\circuit}$, is obtained from $\circuit$ as follows. For each leaf node $\ell$ of $\circuit$ where $\ell.\type$ is $\tnum$, update $\ell.\vari{value}$ to $|\ell.\vari{value}|$.
|
|
|
|
|
\end{Definition}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Using the same factorization from \Cref{example:expr-tree-T}, $\polyf(\abs{\circuit}) = (X + 2Y)(2X + Y) = 2X^2 +XY +4XY + 2Y^2 = 2X^2 + 5XY + 2Y^2$. Note that this \textit{is not} the same as the polynomial from~\Cref{eq:poly-eg}.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
\begin{Definition}[Evaluation]\label{def:exp-poly-eval}
|
|
|
|
|
Given an expression tree $\circuit$ and a valuation $\vct{a} \in \mathbb{R}^\numvar$, we define the evaluation of $\circuit$ on $\vct{a}$ as $\circuit(\vct{a}) = \polyf(\circuit)(\vct{a})$.
|
|
|
|
|
\end{Definition}
|
|
|
|
|
Please see \cref{ex:def-pos-circ} for an illustration.
|
|
|
|
|
|
|
|
|
|
\begin{Definition}[\size($\cdot$)]
|
|
|
|
|
The function \size~ takes a circuit $\circuit$ as input and outputs the number of gates (nodes) in \circuit.
|
|
|
|
@ -80,8 +46,7 @@ The function \size~ takes a circuit $\circuit$ as input and outputs the number o
|
|
|
|
|
The function \depth~ has circuit $\circuit$ as input and outputs the number of levels in \circuit.
|
|
|
|
|
\end{Definition}
|
|
|
|
|
|
|
|
|
|
\begin{Definition}[$\degree(\cdot)$]
|
|
|
|
|
\revision{
|
|
|
|
|
\begin{Definition}[$\degree(\cdot)$]\footnote{Note that the degree of $\polyf(\abs{\circuit})$ is always upper bounded by $\deg(\circuit)$ and the latter can be strictly larger (e.g. consider the case when $\circuit$ multiplies two copies of the constant $1$-- here we have $\deg(\circuit)=1$ but degree of $\polyf(\abs{\circuit})$ is $0$).}
|
|
|
|
|
$\degree(\circuit)$ is defined recursively as follows:
|
|
|
|
|
\[\degree(\circuit)=
|
|
|
|
|
\begin{cases}
|
|
|
|
@ -90,21 +55,11 @@ $\degree(\circuit)$ is defined recursively as follows:
|
|
|
|
|
0 & \text{otherwise}.
|
|
|
|
|
\end{cases}
|
|
|
|
|
\]
|
|
|
|
|
}
|
|
|
|
|
%If $\circuit$ has no $+$ or $\times$ gate, then $\deg(\circuit)=0$. Otherwise if
|
|
|
|
|
%The function $\degree(\cdot)$ takes a circuit \circuit as input and outputs the degree of $\polyf(\abs{\circuit})$.
|
|
|
|
|
\end{Definition}
|
|
|
|
|
\revision{Note that the degree of $\polyf(\abs{\circuit})$ is always upper bounded by $\deg(\circuit)$ and the latter can be strictly larger (e.g. consider the case when $\circuit$ multiplies two copies of the constant $1$-- here we have $\deg(\circuit)=1$ but degree of $\polyf(\abs{\circuit})$ is $0$).}
|
|
|
|
|
|
|
|
|
|
\begin{Definition}[Subcircuit]
|
|
|
|
|
A subcircuit of a circuit $\circuit$ is a circuit \subcircuit such that \subcircuit is a DAG \textit{subgraph} of the DAG representing \circuit. The sink of \subcircuit has exactly one gate \gate.
|
|
|
|
|
\end{Definition}
|
|
|
|
|
|
|
|
|
|
Finally, we will need the following notation for the complexity of multiplying large integers:
|
|
|
|
|
\begin{Definition}[$\multc{\cdot}{\cdot}$]
|
|
|
|
|
\begin{Definition}[$\multc{\cdot}{\cdot}$]\footnote{We note that when doing arithmetic operations on the RAM model for input of size $N$, we have that $\multc{O(\log{N})}{O(\log{N})}=O(1)$. More generally we have $\multc{N}{O(\log{N})}=O(N\log{N}\log\log{N})$.}
|
|
|
|
|
In a RAM model of word size of $W$-bits, $\multc{M}{W}$ denotes the complexity of multiplying two integers represented with $M$-bits. (We will assume that for input of size $N$, $W=O(\log{N})$.
|
|
|
|
|
\end{Definition}
|
|
|
|
|
We note that when doing arithmetic operations on the RAM model for input of size $N$, we have that $\multc{O(\log{N})}{O(\log{N})}=O(1)$. More generally we have $\multc{N}{O(\log{N})}=O(N\log{N}\log\log{N})$.
|
|
|
|
|
|
|
|
|
|
\subsection{Our main result}
|
|
|
|
|
In the subsequent subsections we will prove the following theorem.
|
|
|
|
@ -122,9 +77,6 @@ such that
|
|
|
|
|
\end{equation}
|
|
|
|
|
\end{Theorem}
|
|
|
|
|
|
|
|
|
|
\noindent The proof of~\Cref{lem:approx-alg} (which relies on \Cref{lem:one-pass} and \Cref{lem:sample}) can be found in~\Cref{sec:proof-lem-approx-alg}. The proofs for the referenced lemmas are also found in \Cref{sec:proof-one-pass} and \Cref{sec:proof-sample-monom}.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
To get linear runtime results from~\Cref{lem:approx-alg}, we will need to define another parameter modeling the (weighted) number of monomials in $\expansion{\circuit}$ to be `canceled' when it is modded with $\mathcal{B}$ (\Cref{def:mod-set-polys}).
|
|
|
|
|
\begin{Definition}[Parameter $\gamma$]\label{def:param-gamma}
|
|
|
|
|
Given an expression tree $\circuit$, define
|
|
|
|
@ -139,12 +91,9 @@ Let $\poly(\vct{X})$ be as in~\Cref{lem:approx-alg} and let $\gamma=\gamma(\circ
|
|
|
|
|
In particular, if $\prob_0>0$ and $\gamma<1$ are absolute constants then the above runtime simplifies to $O_k\left(\left(\frac 1{\inparen{\error'}^2}\cdot\size(\circuit)\cdot \log{\frac{1}{\conf}}\right)\cdot\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)$.
|
|
|
|
|
\end{Corollary}
|
|
|
|
|
|
|
|
|
|
The proof for~\Cref{cor:approx-algo-const-p} can be seen in~\Cref{sec:proofs-approx-alg}.
|
|
|
|
|
The restriction on $\gamma$ is satisfied by any \ti (where $\gamma=0$) as well as for all three queries of the PDBench \bi benchmark (\Cref{app:subsec:experiment} shows experimentally that $\gamma$ is negligible in practice for these queries).
|
|
|
|
|
We also observe that (i) tuple presence is independent across blocks, so the corresponding probabilities (and hence $\prob_0$) are independent of the number of blocks, and (ii) \bis model uncertain attributes, so block size (and hence $\gamma$) is a function of the ``messiness'' of a dataset, rather than its size.
|
|
|
|
|
Thus, we expect the corollary to hold in general.
|
|
|
|
|
The restriction on $\gamma$ is satisfied by any \ti (where $\gamma=0$) as well as for all three queries of the PDBench \bi benchmark (Please see \Cref{app:subsec:experiment} for experimental results).
|
|
|
|
|
|
|
|
|
|
Finally, we address the $\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}$ term in the runtime. In Appendix\revision{Fill in ref later on}, we show the following:
|
|
|
|
|
Finally, we address the $\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}$ term in the runtime. %In \cref{susec:proof-val-up}, we show the following:
|
|
|
|
|
\begin{Lemma}
|
|
|
|
|
\label{lem:val-ub}
|
|
|
|
|
For any circuit $\circuit$ with $\degree(\circuit)=k$, we have
|
|
|
|
@ -161,54 +110,13 @@ we have
|
|
|
|
|
Note that the above implies that with the assumption $\prob_0>0$ and $\gamma<1$ are absolute constants from \Cref{cor:approx-algo-const-p}, then the runtime there simplies to $O_k\left(\frac 1{\inparen{\error'}^2}\cdot\size(\circuit)^2\cdot \log{\frac{1}{\conf}}\right)$ for general circuits $\circuit$ and to $O_k\left(\frac 1{\inparen{\error'}^2}\cdot\size(\circuit)\cdot \log{\frac{1}{\conf}}\right)$ for the case when $\circuit$ satisfies the special conditions in~\Cref{lem:val-ub}. In~\Cref{app:proof-lem-val-ub} we argue that these conditions are very general and encompass many interesting scenarios.
|
|
|
|
|
|
|
|
|
|
\subsection{Approximating $\rpoly$}
|
|
|
|
|
\approxq (\cref{alg:mon-sam}) modifies \circuit with a call to \onepass. It then samples from $\circuit_{\vari{mod}}\numsamp$ times and uses that information to approximate $\rpoly$.
|
|
|
|
|
|
|
|
|
|
The algorithm to prove~\Cref{lem:approx-alg} follows from the following observation. Given a query polynomial $\poly(\vct{X})=\polyf(\circuit)$ for circuit \circuit over $\bi$, we can exactly represent $\rpoly(\vct{X})$ as follows:
|
|
|
|
|
\begin{equation}
|
|
|
|
|
\label{eq:tilde-Q-bi}
|
|
|
|
|
\rpoly\inparen{X_1,\dots,X_\numvar}=\hspace*{-1mm}\sum_{(\monom,\coef)\in \expansion{\circuit}} \hspace*{-2mm} \indicator{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \coef\cdot\hspace*{-2mm}\prod_{X_i\in \var\inparen{\monom}}\hspace*{-2mm} X_i
|
|
|
|
|
\end{equation}
|
|
|
|
|
Given the above, the algorithm is a sampling based algorithm for the above sum: we sample $(\monom,\coef)\in \expansion{\circuit}$ with probability proportional\footnote{We could have also uniformly sampled from $\expansion{\circuit}$ but this gives better parameters.} to $\abs{\coef}$ and compute $Y=\indicator{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \prod_{X_i\in \var\inparen{\monom}} p_i$. Taking $\numsamp$ samples and computing the average of $Y$ gives us our final estimate.
|
|
|
|
|
The number of samples is computed by (see \Cref{app:subsec-th-mon-samp}):
|
|
|
|
|
\begin{equation*}
|
|
|
|
|
2\exp{\left(-\frac{\samplesize\error^2}{2}\right)}\leq \conf \implies\samplesize \geq \frac{2\log{\frac{2}{\conf}}}{\error^2}.
|
|
|
|
|
\end{equation*}
|
|
|
|
|
|
|
|
|
|
To summarize, \approxq modifies \circuit with a call to \onepass. It then samples from \circuit, $\numsamp$ times and uses that information to approximate $\rpoly$.
|
|
|
|
|
|
|
|
|
|
\begin{algorithm}[t]
|
|
|
|
|
\caption{$\approxq(\circuit, \vct{p}, \conf, \error)$}
|
|
|
|
|
\label{alg:mon-sam}
|
|
|
|
|
\begin{algorithmic}[1]
|
|
|
|
|
\Require \circuit: Circuit
|
|
|
|
|
\Require $\vct{p} = (\prob_1,\ldots, \prob_\numvar)$ $\in [0, 1]^N$
|
|
|
|
|
\Require $\conf$ $\in [0, 1]$
|
|
|
|
|
\Require $\error$ $\in [0, 1]$
|
|
|
|
|
\Ensure \vari{acc} $\in \mathbb{R}$
|
|
|
|
|
|
|
|
|
|
\State $\accum \gets 0$\label{alg:mon-sam-global1}
|
|
|
|
|
\State $\numsamp \gets \ceil{\frac{2 \log{\frac{2}{\conf}}}{\error^2}}$\label{alg:mon-sam-global2}
|
|
|
|
|
\State $(\circuit_\vari{mod}, \vari{size}) \gets $ \onepass($\circuit$)\label{alg:mon-sam-onepass}\Comment{$\onepass$ is \Cref{alg:one-pass-iter}}
|
|
|
|
|
|
|
|
|
|
\For{$\vari{i} \in 1 \text{ to }\numsamp$}\label{alg:sampling-loop}\Comment{Perform the required number of samples}
|
|
|
|
|
\State $(\vari{M}, \vari{sgn}_\vari{i}) \gets $ \sampmon($\circuit_\vari{mod}$)\label{alg:mon-sam-sample}
|
|
|
|
|
\State\Comment{\sampmon \; is \Cref{alg:sample}}
|
|
|
|
|
\If{$\vari{M}$ has at most one variable from each block}\label{alg:check-duplicate-block}
|
|
|
|
|
\State $\vari{Y}_\vari{i} \gets \prod_{X_j\in\var\inparen{\vari{M}}}p_j$\label{alg:mon-sam-assign1}
|
|
|
|
|
\State $\vari{Y}_\vari{i} \gets \vari{Y}_\vari{i} \times\; \vari{sgn}_\vari{i}$\label{alg:mon-sam-product}
|
|
|
|
|
\State $\accum \gets \accum + \vari{Y}_\vari{i}$\Comment{Store the sum over all samples}\label{alg:mon-sam-add}
|
|
|
|
|
\EndIf
|
|
|
|
|
\EndFor
|
|
|
|
|
|
|
|
|
|
\State $\vari{acc} \gets \vari{acc} \times \frac{\vari{size}}{\numsamp}$\label{alg:mon-sam-global3}
|
|
|
|
|
\State \Return \vari{acc}
|
|
|
|
|
\end{algorithmic}
|
|
|
|
|
\end{algorithm}
|
|
|
|
|
|
|
|
|
|
\subsubsection{Correctness}
|
|
|
|
|
|
|
|
|
|
In order to prove~\Cref{lem:approx-alg}, we will need to argue the correctness of~\Cref{alg:mon-sam}. Before we formally do that,
|
|
|
|
|
we first state the lemmas that summarize the relevant properties of $\onepass$ and $\sampmon$, the auxiliary algorithms on which \Cref{alg:mon-sam} relies.
|
|
|
|
|
|
|
|
|
|
In order to prove~\Cref{lem:approx-alg}, we will need to argue the correctness of \approxq, which relies on the correctness of auxiliary algorithms \onepass and \sampmon.
|
|
|
|
|
|
|
|
|
|
\begin{Lemma}\label{lem:one-pass}
|
|
|
|
|
The $\onepass$ function completes in time:
|
|
|
|
@ -234,78 +142,18 @@ For any $\circuit$ with $\degree(poly(|\circuit|)) = k$, algorithm \ref{alg:mon-
|
|
|
|
|
\subsection{\onepass\ Algorithm}
|
|
|
|
|
\label{sec:onepass}
|
|
|
|
|
|
|
|
|
|
The evaluation of $\abs{\circuit}(1,\ldots, 1)$ can be defined recursively, as follows (where $\circuit_\linput$ and $\circuit_\rinput$ are the `left' and `right' inputs of $\circuit$ if they exist):
|
|
|
|
|
|
|
|
|
|
{\small
|
|
|
|
|
\begin{align}
|
|
|
|
|
\label{eq:T-all-ones}
|
|
|
|
|
\abs{\circuit}(1,\ldots, 1) = \begin{cases}
|
|
|
|
|
\abs{\circuit_\linput}(1,\ldots, 1) \cdot \abs{\circuit_\rinput}(1,\ldots, 1) &\textbf{if }\circuit.\type = \revision{\circmult}\\
|
|
|
|
|
\abs{\circuit_\linput}(1,\ldots, 1) + \abs{\circuit_\rinput}(1,\ldots, 1) &\textbf{if }\circuit.\type = \revision{\circplus} \\
|
|
|
|
|
|\circuit.\val| &\textbf{if }\circuit.\type = \tnum\\
|
|
|
|
|
1 &\textbf{if }\circuit.\type = \var.
|
|
|
|
|
\end{cases}
|
|
|
|
|
\end{align}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
It turns out that for proof of~\Cref{lem:sample}, we need to argue that when $\circuit.\type = +$, we indeed have
|
|
|
|
|
\begin{align}
|
|
|
|
|
\label{eq:T-weights}
|
|
|
|
|
\circuit.\lwght &\gets \frac{\abs{\circuit_\linput}(1,\ldots, 1)}{\abs{\circuit_\linput}(1,\ldots, 1) + \abs{\circuit_\rinput}(1,\ldots, 1)};\\
|
|
|
|
|
\circuit.\rwght &\gets \frac{\abs{\circuit_\rinput}(1,\ldots, 1)}{\abs{\circuit_\linput}(1,\ldots, 1)+ \abs{\circuit_\rinput}(1,\ldots, 1)}
|
|
|
|
|
\end{align}
|
|
|
|
|
|
|
|
|
|
\noindent \onepass\ (Algorithm ~\ref{alg:one-pass-iter} in \Cref{sec:proofs-approx-alg}) iteratively visits each gate one time according to the topological ordering of \circuit annotating the \lwght, \rwght, and \prt variables of each node according to the definitions above. Lemma~\ref{lem:one-pass} is proved in~\Cref{sec:proofs-approx-alg}.
|
|
|
|
|
|
|
|
|
|
\subsection{\sampmon\ Algorithm}
|
|
|
|
|
\label{sec:samplemonomial}
|
|
|
|
|
|
|
|
|
|
A naive (slow) implementation of \sampmon\ would first compute $\expansion{\circuit}$ and then sample from it.
|
|
|
|
|
Instead, \Cref{alg:sample} selects a monomial from $\expansion{\circuit}$ by top-down traversal.
|
|
|
|
|
For a parent $+$ gate, the input to be visited is sampled from the weighted distribution precomputed by \onepass.
|
|
|
|
|
When a parent $\times$ node is visited, both inputs are visited.
|
|
|
|
|
The algorithm computes two properties: the set of all variable leaf nodes visited, and the product of signs of visited coefficient leaf nodes.
|
|
|
|
|
Instead, \Cref{alg:sample} selects a monomial from $\expansion{\circuit}$ by top-down traversal of the input \circuit. More details on the traversal can be found in \cref{subsec:sampmon-remarks}.
|
|
|
|
|
|
|
|
|
|
%
|
|
|
|
|
We will assume the TreeSet data structure to maintain sets with logarithmic time insertion and linear time traversal of its elements.
|
|
|
|
|
%
|
|
|
|
|
$\sampmon$ is given in \Cref{alg:sample}, and a proof of its correctness (via \Cref{lem:sample}) is provided in \Cref{sec:proofs-approx-alg}.
|
|
|
|
|
%$\sampmon$ is given in \Cref{alg:sample}, and a proof of its correctness (via \Cref{lem:sample}) is provided in \Cref{sec:proofs-approx-alg}.
|
|
|
|
|
|
|
|
|
|
\begin{algorithm}[t]
|
|
|
|
|
\caption{\sampmon(\circuit)}
|
|
|
|
|
\label{alg:sample}
|
|
|
|
|
\begin{algorithmic}[1]
|
|
|
|
|
\revision{\Require \circuit: Circuit}
|
|
|
|
|
\Ensure \vari{vars}: TreeSet
|
|
|
|
|
\Ensure \vari{sgn} $\in \{-1, 1\}$
|
|
|
|
|
\Comment{\Cref{alg:one-pass-iter} should have been run before this one} % algorithm ~\ref{alg:sample}}
|
|
|
|
|
\State $\vari{vars} \gets \emptyset$ \label{alg:sample-global1}
|
|
|
|
|
\If{$\circuit.\type = +$}\Comment{Sample at every $+$ node}
|
|
|
|
|
\State $\circuit_{\vari{samp}} \gets$ Sample from left input ($\circuit_{\linput}$) and right input ($\circuit_{\rinput}$) w.p. $\circuit.\vari{Lweight}$ and $\circuit.\vari{Rweight}$. \label{alg:sample-plus-bsamp} \Comment{Each call to \sampmon uses fresh randomness}
|
|
|
|
|
\State $(\vari{v}, \vari{s}) \gets \sampmon(\circuit_{\vari{samp}})$\label{alg:sample-plus-traversal}
|
|
|
|
|
\State $\Return ~(\vari{v}, \vari{s})$
|
|
|
|
|
\ElsIf{$\circuit.\type = \times$}\Comment{Multiply the sampled values of all inputs}
|
|
|
|
|
\State $\vari{sgn} \gets 1$\label{alg:sample-global2}
|
|
|
|
|
\For {$input$ in $\circuit.\vari{input}$}\label{alg:sample-times-for-loop}
|
|
|
|
|
\State $(\vari{v}, \vari{s}) \gets \sampmon(input)$
|
|
|
|
|
\State $\vari{vars} \gets \vari{vars} \cup \{\vari{v}\}$\label{alg:sample-times-union}
|
|
|
|
|
\State $\vari{sgn} \gets \vari{sgn} \times \vari{s}$\label{alg:sample-times-product}
|
|
|
|
|
\EndFor
|
|
|
|
|
\State $\Return ~(\vari{vars}, \vari{sgn})$
|
|
|
|
|
\ElsIf{$\circuit.\type = numeric$}\Comment{The leaf is a coefficient}
|
|
|
|
|
%\State $\vari{sgn} \gets \vari{sgn} \times sign(\circuit.\val)$
|
|
|
|
|
\State $\Return ~\left(\{\}, sign(\circuit.\val)\right)$\label{alg:sample-num-return}
|
|
|
|
|
\ElsIf{$\circuit.\type = var$}
|
|
|
|
|
%\State $\vari{vars} \gets \vari{vars} \; \cup \; \{\;\circuit.\val\;\}\label{alg:sample-var-union}$\Comment{Add the variable to the set}
|
|
|
|
|
\State $\Return~\left(\{\circuit.\val\}, 1\right) $\label{alg:sample-var-return}
|
|
|
|
|
\EndIf
|
|
|
|
|
\end{algorithmic}
|
|
|
|
|
\end{algorithm}
|
|
|
|
|
|
|
|
|
|
% \subsection{Experimental results}
|
|
|
|
|
% \label{sec:experiments}
|
|
|
|
|
% We conducted an experiment running modified TPCH queries over uncertain data generated by pdbench~\cite{pdbench}, both of which (data and queries) represent what is typically encountered in practice. Queries were run two times, once filtering $\bi$ cancellations, and then second not filtering the cancellations. The purpose of this was to determine an indication for how many $\bi$ cancellations occur in practice. Details and results can be found in~.
|
|
|
|
|
|
|
|
|
|
%\AR{Experimental stuff about \bi should go in here}
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
|
|
|
|
|
|
%%% Local Variables:
|
|
|
|
|