paper-BagRelationalPDBsAreHard/app_approx-alg-analysis.tex

%root: main.tex

%\input{app_approx-alg-pseudo-code}

\subsection{Proof of Theorem \ref{lem:approx-alg}}\label{sec:proof-lem-approx-alg}

In order to prove \Cref{lem:approx-alg}, we will need to argue the correctness of \approxq, which relies on the correctness of auxiliary algorithms \onepass and \sampmon.

\begin{Lemma}\label{lem:one-pass}
The $\onepass$ function completes in time:
$$O\left(\size(\circuit) \cdot \multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit}}\right)$$
  $\onepass$ guarantees two post-conditions:  First, for each subcircuit $\vari{S}$ of $\circuit$, we have that $\vari{S}.\vari{partial}$ is set to $\abs{\vari{S}}(1,\ldots, 1)$.  Second, when $\vari{S}.\type  = \circplus$, \subcircuit.\lwght $= \frac{\abs{\subcircuit_\linput}(1,\ldots, 1)}{\abs{\subcircuit}(1,\ldots, 1)}$ and likewise for \subcircuit.\rwght.
\end{Lemma}
To prove correctness of \Cref{alg:mon-sam}, we only use the following fact that follows from the above lemma: for the modified circuit ($\circuit_{\vari{mod}}$), $\circuit_{\vari{mod}}.\vari{partial}=\abs{\circuit}(1,\dots,1)$.

\begin{Lemma}\label{lem:sample}
The function $\sampmon$ completes in time
$$O(\log{k} \cdot k \cdot \depth(\circuit)\cdot\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log{\size(\circuit)}})$$
 where $k = \degree(\circuit)$.  The function returns every $\left(\monom, sign(\coef)\right)$ for $(\monom, \coef)\in \expansion{\circuit}$ with probability $\frac{|\coef|}{\abs{\circuit}(1,\ldots, 1)}$.
\end{Lemma}

With the above two lemmas, we are ready to argue the following result: % (proof in \Cref{sec:proofs-approx-alg}):
\begin{Theorem}\label{lem:mon-samp}
For any $\circuit$ with $\degree(poly(|\circuit|)) = k$, algorithm \ref{alg:mon-sam} outputs an estimate $\vari{acc}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ such that
\[\probOf\left(\left|\vari{acc} - \rpoly(\prob_1,\ldots, \prob_\numvar)\right|> \error \cdot \abs{\circuit}(1,\ldots, 1)\right) \leq \conf,\]
 in $O\left(\left(\size(\circuit)+\frac{\log{\frac{1}{\conf}}}{\error^2} \cdot k \cdot\log{k} \cdot \depth(\circuit)\right)\cdot \multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log{\size(\circuit)}}\right)$ time.
\end{Theorem}


Before proving \Cref{lem:mon-samp}, we use it to argue our main result, \Cref{lem:approx-alg}.  
%The algorithm to prove \Cref{lem:approx-alg} follows from the following observation.  Given a query polynomial $\poly(\vct{X})=\polyf(\circuit)$ for circuit \circuit over $\bi$, we can exactly represent $\rpoly(\vct{X})$ as follows:
%\begin{equation}
%\label{eq:tilde-Q-bi}
%\rpoly\inparen{X_1,\dots,X_\numvar}=\hspace*{-1mm}\sum_{(\monom,\coef)\in \expansion{\circuit}} \hspace*{-2mm} \indicator{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \coef\cdot\hspace*{-2mm}\prod_{X_i\in \var\inparen{\monom}}\hspace*{-2mm} X_i
%\end{equation}
%Given the above, the algorithm is a sampling based algorithm for the above sum: we sample $(\monom,\coef)\in \expansion{\circuit}$ with probability proportional\footnote{We could have also uniformly sampled from $\expansion{\circuit}$ but this gives better parameters.} to $\abs{\coef}$ and compute $Y=\indicator{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \prod_{X_i\in \var\inparen{\monom}} p_i$. Taking $\numsamp$ samples and computing the average of $Y$ gives us our final estimate.
%The number of samples is computed by (see \Cref{app:subsec-th-mon-samp}):
%\begin{equation*}
%2\exp{\left(-\frac{\samplesize\error^2}{2}\right)}\leq \conf \implies\samplesize \geq \frac{2\log{\frac{2}{\conf}}}{\error^2}.
%\end{equation*}
\begin{proof}
Set $\mathcal{E}=\approxq({\circuit}, (\prob_1,\dots,\prob_\numvar),$ $\conf, \error')$, where
\[\error' = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)\cdot (1 - \gamma)}{\abs{{\circuit}}(1,\ldots, 1)},\]
 which achieves the claimed accuracy bound on $\mathcal{E}$ due to \Cref{lem:mon-samp}.

The claim on the runtime follows from \Cref{lem:mon-samp} since
\begin{align*}
\frac 1{\inparen{\error'}^2}\cdot \log\inparen{\frac 1\conf}=&\frac{\log{\frac{1}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)}{\abs{{\circuit}}(1,\ldots, 1)}\right)^2}\\
= &\frac{\log{\frac{1}{\conf}}\cdot \abs{{\circuit}}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)},
\end{align*}
%and the runtime then follows, thus upholding  \Cref{lem:approx-alg}.
which completes the proof.
\qed
\end{proof}

We now return to the proof of \Cref{lem:mon-samp}:
\subsection{Proof of Theorem \ref{lem:mon-samp}}\label{app:subsec-th-mon-samp}
\begin{proof}
Consider now the random variables $\randvar_1,\dots,\randvar_\numvar$, where each $\randvar_i$ is the value of $\vari{Y}_{\vari{i}}$ after \Cref{alg:mon-sam-product} is executed. In particular, note that we have
\[Y_i= \onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \prod_{X_i\in \var\inparen{v}} p_i,\]
where the indicator variable handles the check in \Cref{alg:check-duplicate-block}
Then for random variable $\randvar_i$, it is the case that
\begin{align*}
\expct\pbox{\randvar_i} &= \sum\limits_{(\monom, \coef) \in \expansion{{\circuit}} }\frac{\onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot c\cdot\prod_{X_i\in \var\inparen{v}} p_i }{\abs{{\circuit}}(1,\dots,1)} \\
&= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{{\circuit}}(1,\ldots, 1)},
\end{align*}
where in the first equality we use the fact that $\vari{sgn}_{\vari{i}}\cdot \abs{\coef}=\coef$ and the second equality follows from \Cref{eq:tilde-Q-bi} with $X_i$ substituted by $\prob_i$.

Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$.  It is also true that

\[\expct\pbox{\empmean}  
= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\expct\pbox{\randvar_i}
= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{{\circuit}}(1,\ldots, 1)}.\]

Hoeffding's inequality states that if we know that each $\randvar_i$ (which are all independent) always lie in the intervals $[a_i, b_i]$, then it is true that
\begin{equation*}
\probOf\left(\left|\empmean - \expct\pbox{\empmean}\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{\sum_{i = 1}^{\samplesize}(b_i -a_i)^2}\right)}.
\end{equation*}

Line ~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is multiplied with $O(k)$ $\prob_i\in [0, 1]$, which implies the range for each $\randvar_i$ is $[-1, 1]$.
Using Hoeffding's inequality, we then get:
\begin{equation*}
\probOf\left(~\left| \empmean - \expct\pbox{\empmean} ~\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{2^2 \samplesize}\right)} = 2\exp{\left(-\frac{\samplesize\error^2}{2 }\right)}\leq \conf,
\end{equation*}
where the last inequality follows from our choice of $\samplesize$ in \Cref{alg:mon-sam-global2}.

For the claimed probability bound of $\probOf\left(\left|\vari{acc} - \rpoly(\prob_1,\ldots, \prob_\numvar)\right|> \error \cdot \abs{\circuit}(1,\ldots, 1)\right) \leq \conf$, note that in the algorithm, \vari{acc} is exactly $\empmean \cdot \abs{\circuit}(1,\ldots, 1)$.  Multiplying the rest of the terms by the same factor yields the said bound.

This concludes the proof for the first claim of theorem ~\ref{lem:mon-samp}. We prove the claim  on the runtime next.

\paragraph*{Run-time Analysis}
The runtime of the algorithm is dominated by \Cref{alg:mon-sam-onepass} (which by \Cref{lem:one-pass} takes time $O\left({\size(\circuit)}\cdot \multc{\log\left(\abs{\circuit}^2(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)$) and the $\samplesize$ iterations of the loop in \Cref{alg:sampling-loop}. Each iteration's run time is dominated by the call to \Cref{alg:mon-sam-sample} (which by \Cref{lem:sample} takes $O\left(\log{k} \cdot k \cdot {\depth(\circuit)}\cdot \multc{\log\left(\abs{\circuit}^2(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)$
) and \Cref{alg:check-duplicate-block}, which by the subsequent argument takes $O(k\log{k})$ time. We sort the $O(k)$ variables by their block IDs and then check if there is a duplicate block ID or not. Adding up all the times discussed here gives us the desired overall runtime.
\qed
\end{proof}

\subsection{Proof of \Cref{cor:approx-algo-const-p}}
\begin{proof}
The result follows by first noting that by definition of $\gamma$, we have
\[\rpoly(1,\dots,1)= (1-\gamma)\cdot \abs{{\circuit}}(1,\dots,1).\]
Further, since each $\prob_i\ge \prob_0$ and $\poly(\vct{X})$ (and hence $\rpoly(\vct{X})$) has degree at most $k$, we have that
\[ \rpoly(1,\dots,1) \ge \prob_0^k\cdot \rpoly(1,\dots,1).\]
The above two inequalities implies $\rpoly(1,\dots,1) \ge \prob_0^k\cdot (1-\gamma)\cdot \abs{{\circuit}}(1,\dots,1)$.
Applying this bound in the runtime bound in \Cref{lem:approx-alg} gives the first claimed runtime. The final runtime of $O_k\left(\frac 1{\eps^2}\cdot\size(\circuit)\cdot \log{\frac{1}{\conf}}\cdot \multc{\log\left(\abs{\circuit}^2(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)$ follows by noting that $\depth({\circuit})\le \size({\circuit})$ and absorbing all factors that just depend on $k$.
\qed
\end{proof}

\subsection{Proof of \Cref{lem:val-ub}}\label{susec:proof-val-up}
\label{app:proof-lem-val-ub}

%\paragraph{Sufficient condition for $\abs{\circuit}(1,\ldots, 1)$ to be size $O(N)$}
%For our runtime results to be relevant, it must be the case that the sum of the coefficients computed by \onepass is indeed size $O(N)$ since there are $O(\log{N})$ bits in the RAM model where $N$ is the size of the input.  The size of the input here is \size(\circuit).  We show that when \size$(\circuit_\linput) = N_\linput$, \size$(\circuit_\rinput) = N_\rinput$, where $N_\linput + N_\rinput \leq N$, this is indeed the case.

We will prove \Cref{lem:val-ub} by considering the three cases separately. We start by considering the case when $\circuit$ is a tree:
\begin{Lemma}
\label{lem:C-ub-tree}
Let $\circuit$ be a tree (i.e. the sub-circuits corresponding to two children of a node in $\circuit$ are completely disjoint). Then we have
\[\abs{\circuit}(1,\dots,1)\le \left(\size(\circuit)\right)^{\degree(\circuit)+1}.\]
\end{Lemma}
\begin{proof}%[Proof of $\abs{\circuit}(1,\ldots, 1)$ is size $O(N)$]
For notational simplicity define $N=\size(\circuit)$ and $k=\degree(\circuit)$.
To prove this result, we by prove by induction on $\depth(\circuit)$ that $\abs{\circuit}(1,\ldots, 1) \leq N^{k+1 }$.
For the base case, we have that \depth(\circuit) $= 0$, and there can only be one node which must contain a coefficient (or constant) of $1$.  In this case, $\abs{\circuit}(1,\ldots, 1) = 1$, and \size(\circuit) $= 1$, and it is true that $\abs{\circuit}(1,\ldots, 1) = 1 \leq N^{k+1} = 1^{1} = 1$.

Assume for $\ell > 0$ an arbitrary circuit \circuit of $\depth(\circuit) \leq \ell$ that it is true that $\abs{\circuit}(1,\ldots, 1) \leq N^{\deg(\circuit)+1 }$.% for $k \geq 1$ when \depth(C) $\geq 1$.

For the inductive step we consider a circuit \circuit such that $\depth(\circuit) = \ell + 1$.  The sink can only be either a $\circmult$ or $\circplus$ gate.  Consider when sink node is $\circmult$.  Let $k_\linput, k_\rinput$ denote \degree($\circuit_\linput$) and \degree($\circuit_\rinput$) respectively.  %Note that this case does not require the constraint on $N_\linput$ or $N_\rinput$.
%In this case we do not use the fact that $\circuit$ is a tree and just assume that $N_\linput,N_\rinput\le N-1$. 
 Then note that
\begin{align}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1)\cdot \abs{\circuit_\rinput}(1,\ldots, 1) \nonumber\\
&\leq (N-1)^{k_\linput+1} \cdot (N - 1)^{k_\rinput+1}\nonumber\\
 &= (N-1)^{k+1}\label{eq:sumcoeff-times-upper}\\
 &\leq N^{k + 1}.\nonumber
\end{align}
%We derive the upperbound of \Cref{eq:sumcoeff-times-upper} by noting that the maximum value of the LHS occurs when both the base and exponent are maximized.
In the above the first inequality follows from the inductive hypothesis (and the fact that the size of either subtree is at most $N-1$) and \Cref{eq:sumcoeff-times-upper} follows by nothing that for a $\times$ gate we have $k=k_\linput+k_\rinput+1$.

For the case when the sink gate is a $\circplus$ gate, then for $N_\linput = \size(\circuit_\linput)$ and $N_\rinput = \size(\circuit_\rinput)$ we have
\begin{align}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1) \circplus \abs{\circuit_\rinput}(1,\ldots, 1) \nonumber\\
&\leq
N_\linput^{k+1} + N_\rinput^{k+1}\nonumber\\
&\leq (N-1)^{k+1 } \label{eq:sumcoeff-plus-upper}\\
&\leq N^{k+1}.\nonumber
\end{align}
In the above, the first inequality follows from the inductive hypothesis (and the fact that $k_\linput,k_\rinput\le k$).  Note that the RHS of this inequality is maximized when the base and exponent of one of the terms is maximized.  The second inequality follows from this fact as well as the fact that since $\circuit$ is a tree we have $N_\linput+N_\rinput=N-1$ and, lastly, the fact that $k\ge 0$. This completes the proof.
\end{proof}

%\revision{\textbf{THE PART BELOW NEEDS WORK. --Atri}}
The upper bound in \Cref{lem:val-ub} for the general case is a simple variant of the above proof (but we present a proof sketch of the bound below for completeness):
\begin{Lemma}
\label{lem:C-ub-gen}
Let $\circuit$ be a (general) circuit. % tree (i.e. the sub-circuits corresponding to two children of a node in $\circuit$ are completely disjoint). 
Then we have
\[\abs{\circuit}(1,\dots,1)\le 2^{2^{\degree(\circuit)}\cdot \size(\circuit)}.\]
\end{Lemma}
\begin{proof}[Proof Sketch]
We use the same notation as in the proof of \Cref{lem:C-ub-tree}. We will prove by induction on $\depth(\circuit)$ that $\abs{\circuit}(1,\ldots, 1) \leq 2^{2^k\cdot N }$. The base case argument is similar to that in the proof of \Cref{lem:C-ub-tree}. In the inductive case we have that $N_\linput,N_\rinput\le N-1$.

For the case when the sink node is $\times$, we get that
\begin{align*}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1)\circmult \abs{\circuit_\rinput}(1,\ldots, 1) \\
&\leq {2^{2^{k_\linput}\cdot N_\linput}} \circmult {2^{2^{k_\rinput}\cdot N_\rinput}}\\
 &\leq 2^{2\cdot 2^{k-1}\cdot (N-1)}\\
 &\leq 2^{2^k N}.
\end{align*}
In the above the first inequality follows from inductive hypothesis while the second inequality follows from the fact that $k_\linput,k_\rinput\le k-1$ and $N_\linput, N_\rinput\le N-1$.
%$k_\linput+k_\rinput=k$ (and hence $\max(k_\linput,k_\rinput)\le k$) as well as the fact that $k\ge 0$.

Now consider the case when the sink node is $+$, we get that
\begin{align*}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1) \circplus \abs{\circuit_\rinput}(1,\ldots, 1) \\
&\leq 2^{2^{k_\linput}\cdot N_\linput} + 2^{2^{k_\rinput}\cdot N_\rinput}\\
&\leq 2\cdot {2^{2^k(N-1)} } \\
&\leq 2^{2^kN}.
\end{align*}
In the above the first inequality follows from the inductive hypothesis while the second inequality follows from the facts that $k_\linput,k_\rinput\le k$ and $N_\linput,N_\rinput\le N-1$. The final inequality follows from the fact that $k\ge 0$.
\qed
\end{proof}

Finally, we consider the case when $\circuit$ encodes the run of the algorithm from~\cite{DBLP:conf/pods/KhamisNR16} on an FAQ query. We cannot handle the full generality of an FAQ query but we can handle an FAQ query that has a ``core'' join query on $k$ relations and then a subset of the $k$ attributes are ``summed'' out (e.g. the sum could be because of projecting out a subset of attributes from the join query). While the algorithm~\cite{DBLP:conf/pods/KhamisNR16} essentially figures out when to `push in' the sums, in our case since we only care about $\abs{\circuit}(1,\dots,1)$ we will consider the obvious circuit that computes the ``inner join'' using a worst-case optimal join (WCOJ) algorithm like~\cite{NPRR} and then adding in the addition gates. The basic idea is very simple: we will argue that the there are at most $\size(\circuit)^k$ tuples in the join output (each with having a value of $1$ in $\abs{\circuit}(1,\dots,1)$). Then the largest value we can see in $\abs{\circuit}(1,\dots,1)$ is by summing up these at most $\size(\circuit)^k$ values of $1$. Note that this immediately implies the claimed bound in \Cref{lem:val-ub}.

We now sketch the argument for the claim about the join query above. First, we note that the computation of a WCOJ algorithm like~\cite{NPRR} can be expressed as a circuit with {\em multiple} sinks (one for each output tuple). Note that annotation corresponding to $\mathbf{t}$ in $\circuit$ is the polynomial $\prod_{e\in E} R(\pi_e(\mathbf{t}))$ (where $E$ indexes the set of relations). It is easy to see that in this case the value of  $\mathbf{t}$ in $\abs{\circuit}(1,\dots,1)$ will be $1$ (by multiplying $1$ $k$ times). The claim on the number of output tuples follow from the trivial bound of multiplying the input size bound (each relation has at most $n\le \size(\circuit)$ tuples and hence we get an overall bound of $n^k\le\size(\circuit)^k$. Note that we did not really use anything about the WCOJ algorithm except for the fact that $\circuit$ for the join part only is built only of multiplication gates. In fact, we do not need the better WCOJ join size bounds either (since we used the trivial $n^k$ bound). As a final remark, we note that we can build the circuit for the join part by running say the algorithm from~\cite{DBLP:conf/pods/KhamisNR16} on an FAQ query that just has the join query but each tuple is annotated with the corresponding variable $X_i$ (i.e. the semi-ring for the FAQ query is $\mathbb{N}[\mathbf{X}]$).