paper-BagRelationalPDBsAreHard/app_one-pass-analysis.tex

%root: main.tex


\subsection{$\onepass$ Remarks}

Please note that it is \textit{assumed} that the original call to \onepass consists of a call on an input circuit \circuit such that the values of members \prt, \lwght and \rwght have been initialized to Null across all gates.

\input{app_one-pass-eval-notes}

%%%%%%%%%%%REDUCE ALGO%%%%%%%%%%%%%%%%%%%
%\begin{Definition}[Equivalence ($\equiv$)]
%A circuit \circuit is equivalent to a circuit \circuit' if and only if $\polyf(\circuit) = \polyf(\circuit')$.
%\end{Definition}


%For technical reasons, we require the invariant that every subcircuit \subcircuit corresponding to an internal gate of \circuit has $\degree\left(\subcircuit\right) \geq 1$.  \revision{\textbf{AARON:} This is now trivially satisfied by the new definition of $\deg(\circuit)$ so please update this part to remove the stuff on $\reduce$. --Atri} To ensure this, auxiliary algorithm~\ref{alg:reduce} (\reduce) is called to perform any rewrites to \circuit, where an equivalent circuit \circuit' is created and returned by iteratively combining non-variable leaf nodes bottom-up until a parent node is reached which has an input \subcircuit whose subcircuit contains at least one leaf of type \var.  It is trivial to see in such a case that $\subcircuit \equiv \subcircuit'$, and this implies $\circuit \equiv \circuit'$.
%
%\begin{Lemma}\label{lem:reduce}
%In $O(\size(\circuit))$, algorithm \reduce inspects input circuit \circuit and outputs an equivalent version \circuit' of \circuit such that all subcircuits \subcircuit of \circuit' have $\degree(\subcircuit) \geq 1$.
%\end{Lemma}
%
%\begin{proof}[Proof of \Cref{lem:reduce}]
%~\paragraph{\reduce correctness}
%Note that for a source gate \gate, only when $\gate.\type = \var$ is it the case that $\degree(\gate) = 1$, and otherwise $\degree(\gate) = 0$.  Lines~\ref{alg:reduce-add-deg} and~\ref{alg:reduce-no-deg} compute \gate.\degval.
%
%We prove an equivalent circuit \circuit' by induction over the iteration of \topord.  For the base case, consider when we have one node.  In such a case, no rewriting occurs, and \reduce returns \circuit.  It is trivial to note that $\circuit \equiv \circuit$.
%
%For the inductive hypothesis, we assume that for $k \geq 0$ nodes in \topord, the modified circuit $\circuit_k' \equiv \circuit_k$, where $\circuit_k'$ denotes the circuit at the end of iteration $k$.  Similarly, when discussing \Cref{alg:reduce} pseudocode, $\gate_{k}$ denotes the gate in position $k$ of \topord, and $\gate_{k_\linput}$ ($\gate_{k_\rinput}$) denotes the left (right) input of $\gate_{k}$.
%
%We now prove for $k + 1$ gates in \topord that $\circuit_{k + 1}' \equiv \circuit_{k + 1}$.  Note that if the gate $\gate_{k + 1}$ is a source node, then this is again the base case and we are finished.  If $\gate_{k + 1}$ is an internal node, then $\gate_{k + 1}.\type$ must either be $\circmult$ or $\circplus$.
%
%When $\gate_{k + 1}$ is $\circmult$, then it is the case that either $\degree(\gate_{{k + 1}_\linput}) \geq 1$ or $\gate_{{k + 1}_\linput}.\type$ is $\tnum$ and likewise for $\gate_{{k + 1}_\rinput}$.  There are then four possibilities, only one of which will prompt a rewrite, namely when we have that both inputs have $\degree(\gate_{{k + 1}_i}) = 0$.  In such a case, $\gate_{k + 1}.\val \gets \gate_{{k + 1}_\linput}.\val \times \gate_{{k + 1}_\rinput}.\val$, and the inputs are deleted.  Note that since $\gate_{{k + 1}_\linput}.\type = \gate_{{k + 1}_\rinput}.\type = \tnum$ that we have two constants being multiplied, and that for subcircuit $\subcircuit = (\times, \tnum_1, \tnum_2)$ and $\tnum' = \tnum_1 \times \tnum_2$, $\polyf(\subcircuit) = \polyf(\tnum')$ which implies that for the rewritten \subcircuit', $\subcircuit \equiv \subcircuit'$.
%
%A analogous argument applies when $\gate_{k + 1}.\type$ is $\circplus$.\qed
%
%\paragraph{\reduce Run-time Analysis}.
%$O(\size(\circuit))$ trivially follows by the single iterative pass over the \topord of \circuit, where, as can be seen in lines~\ref{alg:reduce-var},~\ref{alg:reduce-num},~\ref{alg:reduce-mult}, and~\ref{alg:reduce-plus} a constant number of operations are performed on each node.\qed
%\end{proof}

\subsection{$\onepass$ Example}
\begin{Example}\label{example:one-pass}
 Let $\etree$ encode the expression $(X_1 + X_2)(X_1 - X_2) + X_2^2$.  After one pass, \Cref{alg:one-pass-iter} would have computed the following weight distribution.  For the two inputs of the root $+$ node $\etree$, $\etree.\lwght = \frac{4}{5}$ and $\etree.\rwght = \frac{1}{5}$.  Similarly, let $\stree$ denote the left-subtree of $\etree_{\lchild}$, $\stree.\lwght = \stree.\rwght = \frac{1}{2}$.  This is depicted in \Cref{fig:expr-tree-T-wght}.
\end{Example}

\begin{figure}[h!]
%	\begin{tikzpicture}[thick, every tree node/.style={default_node, thick, draw=black, black, circle, text width=0.3cm, font=\bfseries, minimum size=0.65cm}, every child/.style={black}, edge from parent/.style={draw, thick},
%level 1/.style={sibling distance=0.95cm},
%level 2/.style={sibling distance=0.7cm},
%%level 2+/.style={sibling distance=0.625cm}
%%level distance = 1.25cm,
%%sibling distance = 1cm,
%%every node/.append style = {anchor=center}
%]
%
%	\Tree [.\node(root){$\boldsymbol{+}$};
%			\edge [wght_color] node[midway, auto= right, font=\bfseries, gray] {$\bsym{\frac{4}{5}}$}; [.\node[highlight_color](tl){$\boldsymbol{\times}$};
%				[.\node(s){$\bsym{+}$};
%					\edge[wght_color] node[pos=0.35, left, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](sl){$\bsym{x_1}$}; ]
%					\edge[wght_color] node[pos=0.35, right, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](sr){$\bsym{x_2}$}; ]
%					]
%				[.\node(sp){$\bsym{+}$};
%					\edge[wght_color] node[pos=0.35, left, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](spl){$\bsym{x_1}$}; ]
%					\edge[wght_color] node[pos=0.35, right, font=\bfseries, gray]{$\bsym{\frac{1}{2}}$}; [.\node[highlight_color](spr){$\bsym{\times}$};
%						[.$\bsym{-1}$ ] [.$\bsym{x_2}$ ]
%						]
%					]
%				]
%			\edge [wght_color] node[midway, auto=left, font=\bfseries, gray] {$\bsym{\frac{1}{5}}$}; [.\node[highlight_color](tr){$\boldsymbol{\times}$};
%				[.$\bsym{x_2}$
%					\edge [draw=none]; [.\node[draw=none]{}; ]
%					\edge [draw=none]; [.\node[draw=none]{}; ]
%				]
%				[.$\bsym{x_2}$ ] ]
%	]
%%	labels for plus node children, with arrows
%	\node[left=2pt of sl, highlight_color, inner sep=0pt] (sl-label) {$\stree_\lchild$};
%	\draw[highlight_color] (sl) -- (sl-label);
%	\node[right=2pt of sr, highlight_color, inner sep=0pt] (sr-label) {$\stree_\rchild$};
%	\draw[highlight_color] (sr) -- (sr-label);
%	\node[below left=2pt of spl, inner sep=0pt, highlight_color](spl-label) {$\stree_\lchild'$};
%	\draw[highlight_color] (spl) -- (spl-label);
%	\node[right=2pt of spr, highlight_color, inner sep=0] (spr-label) {$\stree_\rchild'$};
%	\draw[highlight_color] (spr) -- (spr-label);
%	\node[above left=2pt of tl, inner sep=0pt, highlight_color] (tl-label) {$\etree_\lchild$};
%	\draw[highlight_color] (tl) -- (tl-label);
%	\node[above right=2pt of tr, highlight_color, inner sep=0pt] (tr-label) {$\etree_\rchild$};
%	\node[above = 2pt of root, highlight_color, inner sep=0pt, font=\bfseries] (root-label) {$\etree$};
%	\node[above = 2pt of s, highlight_color, inner sep=0pt, font=\bfseries] (s-label) {$\stree$};
%	\node[above = 2pt of sp, highlight_color, inner sep=0pt, font=\bfseries] (sp-label) {$\stree'$};
%	\draw[highlight_color] (tr) -- (tr-label);
%%	\draw[<-|, highlight_color] (s) -- (s-label);
%%	\draw[<-|, highlight_color] (sp) -- (sp-label);
%%	\draw[<-|, highlight_color]  (root) -- (root-label);
%%\node[above right=0.7cm of TR, highlight_color, inner sep=0pt, font=\bfseries] (tr-comment) {$\etree_\rchild$};
%%		\draw[<-|, highlight_color] (TR) -- (tr-comment);
%	\end{tikzpicture}

%%%%%%%%%%%%%%%%%%%
%New Figure
%%%%%%%%%%%%%%%%%%%


\centering
%\resizebox{0.5\linewidth}{\height}{
	\begin{tikzpicture}[thick]
		%First level
		\node[tree_node] (a1) at (1, 0) {$\boldsymbol{Y}$};
		\node[tree_node] (b1) at (3, 0) {$\boldsymbol{-1}$};
		%\node[tree_node] (d1) at (4.5, 0) {$\boldsymbol{-1}$};
		%Second level
		\node[tree_node] (a2) at (-0.75, 0) {$\boldsymbol{X}$};
		\node[tree_node] (b2) at (1.6,1.25) {$\boldsymbol{\circmult}$};
		\node[tree_node] (c2) at (2.9, 1.25) {$\boldsymbol{\circmult}$};
		%Third level
		\node[tree_node] (a3) at (0.7, 2.5) {$\boldsymbol{\circplus}$};
		\node[tree_node] (b3) at (1.6, 2.5) {$\boldsymbol{\circplus}$};
		%Fourth level
		\node[tree_node] (a4) at (1.5, 3.75) {$\boldsymbol{\circmult}$};
		\node[tree_node] (b4) at (2.8, 3.75) {$\boldsymbol{\circplus}$};

		\draw[->] (a1) edge[right] node{$\frac{1}{2}$} (a3);
		\draw[->] (b1) -- (c2);
		\draw[->] (a1) -- (b2);
		\draw[->] (a1) edge[bend left=15] (c2);
		\draw[->] (a1) edge[bend right=15] (c2);
		\draw[->] (a2) edge[left] node{$\frac{1}{2}$} (a3);
		\draw[->] (a2) edge[below] node{$\frac{1}{2}$} (b3);
		\draw[->] (b2) edge[right] node{$\frac{1}{2}$} (b3);
		\draw[->] (c2) edge[right] node{$\frac{1}{5}$} (b4);
		\draw[->] (a3) -- (a4);
		\draw[->] (b3) -- (a4);
		\draw[->] (a4) edge[above] node{$\frac{4}{5}$} (b4);
%		\draw[->] (b3) -- (a4);
%		\draw[->] (a4) -- (2.25, 2.75);
	\end{tikzpicture}
	%}


		\caption{Weights computed by $\onepass$ in  \Cref{example:one-pass}.}

		\label{fig:expr-tree-T-wght}
\end{figure}


%\subsection{\onepass}

\begin{algorithm}[h!]
	\caption{\onepass$(\circuit)$}
	\label{alg:one-pass-iter}
	\begin{algorithmic}[1]
		\Require \circuit: Circuit
		\Ensure \circuit: Annotated Circuit
		\Ensure \vari{sum} $\in \domN$
		\For{\gate in \topord(\circuit)}\label{alg:one-pass-loop}\Comment{\topord($\cdot$) is the topological order of \circuit}
			\If{\gate.\type $=$ \var}
				\State \gate.\prt $\gets 1$\label{alg:one-pass-var}
			\ElsIf{\gate.\type $=$ \tnum}
				\State \gate.\prt $\gets \abs{\gate.\val}$\label{alg:one-pass-num}
			\ElsIf{\gate.\type $= \circmult$}
				\State \gate.\prt $\gets \gate_\linput.\prt \times \gate_\rinput.\prt$\label{alg:one-pass-mult}
			\Else
				\State \gate.\prt $\gets \gate_\linput.\prt + \gate_\rinput.\prt$\label{alg:one-pass-plus}
				\State \gate.\lwght $\gets \frac{\gate_\linput.\prt}{\gate.\prt}$\label{alg:one-pass-lwght}
				\State \gate.\rwght $\gets \frac{\gate_\rinput.\prt}{\gate.\prt}$\label{alg:one-pass-rwght}
			\EndIf
			\State \vari{sum} $\gets \gate.\prt$
		\EndFor
		\State \Return (\vari{sum}, $\circuit$)
	\end{algorithmic}
\end{algorithm}

\subsection{Proof of \onepass (\Cref{lem:one-pass})}\label{sec:proof-one-pass}
\begin{proof}%[Proof of \Cref{lem:one-pass}]
We prove the correct computation of \prt, \lwght, \rwght values on \circuit by induction over the number of iterations in  the topological order \topord (line~\ref{alg:one-pass-loop}) of the input circuit \circuit.  Note that \topord follows the standard definition of a topological ordering over the DAG structure of \circuit.

For the base case, we have only one gate, which by definition is a source gate and must be either \var or \tnum.  In this case, as per \cref{eq:T-all-ones}, lines~\ref{alg:one-pass-var} and~\ref{alg:one-pass-num} correctly compute \circuit.\prt as $1$ and \circuit.\val respectively.

For the inductive hypothesis, assume that \onepass correctly computes \subcircuit.\prt, \subcircuit.\lwght, and \subcircuit.\rwght for all gates \gate in \circuit with $k \geq 0$ iterations over \topord.
\AH{Notes above:  Algo uses Reduce, but we don't use that anymore.  The figure needs to change to a circuit.}
We now prove for $k + 1$ iterations that \onepass correctly computes the \prt, \lwght, and \rwght values for each gate $\gate_\vari{i}$ in \circuit for $i \in [k + 1]$.
Note that the $\gate_\vari{k + 1}$ must be in the last ordering of all gates $\gate_\vari{i}$.  Note that for \size(\circuit) > 1, if $\gate_{k+1}$ is a leaf node, we are back to the base case.  Otherwise $\gate_{k + 1}$ is an internal node $\gate_\vari{s}.\type = \circplus$ or $\gate_\vari{s}.\type = \circmult$, which both require binary input.

When $\gate_{k+1}.\type = \circplus$, then by line~\ref{alg:one-pass-plus} $\gate_{k+1}$.\prt $= \gate_{{k+1}_\lchild}$.\prt $+ \gate_{{k+1}_\rchild}$.\prt, a correct computation, as per \cref{eq:T-all-ones}.  Further, lines~\ref{alg:one-pass-lwght} and~\ref{alg:one-pass-rwght} compute $\gate_{{k+1}}.\lwght = \frac{\gate_{{k+1}_\lchild}.\prt}{\gate_{{k+1}}.\prt}$ and analogously for $\gate_{{k+1}}.\rwght$.  Note that all values needed for each computation have been correctly computed by the inductive hypothesis.

When $\gate_{k+1}.\type = \circmult$, then line~\ref{alg:one-pass-mult} computes $\gate_{k+1}.\prt = \gate_{{k+1}_\lchild.\prt} \circmult \gate_{{k+1}_\rchild}.\prt$, which indeed by \cref{eq:T-all-ones} is correct.

\paragraph*{Runtime Analysis}
It is known that $\topord(G)$ is computable in linear time.  Next, each of the $\size(\circuit)$ iterations of the loop in \Cref{alg:one-pass-loop} take $O\left( \multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit)}}\right)$ time.  It is easy to see that each of all the numbers which the algorithm computes is at most $\abs{\circuit}(1,\dots,1)$. Hence, by definition each such operation takes $\multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit)}}$ time, which proves the claimed runtime.
\qed
\end{proof}
%In general it is known that an arithmetic computation which requires $M$ bits takes $O(\frac{\log{M}}{\log{N}})$ time for an input size $N$.  Since each of the arithmetic operations at a given gate has a bit size of $O(\log{\abs{\circuit}(1,\ldots, 1)})$,  thus, we obtain the general runtime of $O\left(\size(\circuit)\cdot \frac{\log{\abs{\circuit}(1,\ldots, 1)}}{\log{\size(\circuit)}}\right)$.


%%%Moved the stuff below to earlier in the appendix
\iffalse
\paragraph*{Sufficient condition for $\abs{\circuit}(1,\ldots, 1)$ to be size $O(N)$}
For our runtime results to be relevant, it must be the case that the sum of the coefficients computed by \onepass is indeed size $O(N)$ since there are $O(\log{N})$ bits in the RAM model where $N$ is the size of the input.  The size of the input here is \size(\circuit).  We show that when \size$(\circuit_\linput) = N_\linput$, \size$(\circuit_\rinput) = N_\rinput$, where $N_\linput + N_\rinput \leq N$, this is indeed the case.

\begin{proof}%[Proof of $\abs{\circuit}(1,\ldots, 1)$ is size $O(N)$]
To prove this result, we start by proving that $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k }$ for \degree(\circuit) $= k$.
For the base case, we have that \depth(\circuit) $= 0$, and there can only be one node which must contain a coefficient (or constant) of $1$.  In this case, $\abs{\circuit}(1,\ldots, 1) = 1$, and \size(\circuit) $= 1$, and it is true that $\abs{\circuit}(1,\ldots, 1) = 1 \leq N^{2^k} = 1^{2^0} = 1$.

Assume for $\ell > 0$ an arbitrary circuit \circuit of $\depth(\circuit) \leq \ell$ that it is true that $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k }$.% for $k \geq 1$ when \depth(C) $\geq 1$.

For the inductive step we consider a circuit \circuit such that $\depth(\circuit) \leq \ell + 1$.  The sink can only be either a $\circmult$ or $\circplus$ gate.  Consider when sink node is $\circmult$.  Let $k_\linput, k_\rinput$ denote \degree($\circuit_\linput$) and \degree($\circuit_\rinput$) respectively.  Note that this case does not require the constraint on $N_\linput$ or $N_\rinput$.
\begin{align}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1)\circmult \abs{\circuit_\rinput}(1,\ldots, 1) \leq (N-1)^{2^{k_\linput}} \circmult (N - 1)^{2^{k_\rinput}}\nonumber\\
 &\leq (N-1)^{2^{k}-1}\label{eq:sumcoeff-times-upper}\\
 &\leq N^{2^k}.\nonumber
\end{align}
We derive the upperbound of \Cref{eq:sumcoeff-times-upper} by noting that the maximum value of the LHS occurs when both the base and exponent are maximized.

For the case when the sink node is a $\circplus$ node, then we have
\begin{align}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1) \circplus \abs{\circuit_\rinput}(1,\ldots, 1) \leq
N_\linput^{2^{k_\linput}} + N_\rinput^{2^{k_\rinput}}\nonumber\\
&\leq N_\linput^{2^k } + N_\rinput\label{eq:sumcoeff-plus-upper}\\
&\leq N^{2^k}.\nonumber
\end{align}
Similar to the $\circmult$ case, \Cref{eq:sumcoeff-plus-upper} upperbounds its LHS by the fact that the maximum base and exponent combination is always greater than or equal to the sum of lower base/exponent combinations.  The final equality is true given the constraint over the inputs.

Since $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k}$ for all circuits such that all $\circplus$ gates share at most one gate with their sibling (across their respective subcircuits), then $\log{N^{2^k}} = 2^k \cdot \log{N}$ which for fixed $k$ yields the desired $O(\log{N})$ bits for $O(1)$ arithmetic operations.% for the given query class.
\end{proof}
\fi