paper-BagRelationalPDBsAreHard/app_onepass-analysis.tex

%root: main.tex


\subsection{$\onepass$ Remarks}

Please note that it is \textit{assumed} that the original call to \onepass consists of a call on an input circuit \circuit such that the values of members \prt, \lwght and \rwght have been initialized to Null across all gates.

\input{app_onepass_eval-notes}

\subsection{$\onepass$ Example}
\begin{Example}\label{example:one-pass}
 Let $\etree$ encode the expression $(X + Y)(X - Y) + Y^2$.  After one pass, \Cref{alg:one-pass-iter} would have computed the following weight distribution.  For the two inputs of the sink gate $\circuit$, $\circuit.\lwght = \frac{4}{5}$ and $\circuit.\rwght = \frac{1}{5}$.  Similarly, for $\stree$ denoting the left input $\circuit_{\lchild}$ of \circuit, $\stree.\lwght = \stree.\rwght = \frac{1}{2}$.  This is depicted in \Cref{fig:expr-tree-T-wght}.
\end{Example}

\begin{figure}[h!]
\centering
	\begin{tikzpicture}[thick]
		%First level
		\node[tree_node] (a1) at (1, 0) {$\boldsymbol{Y}$};
		\node[tree_node] (b1) at (3, 0) {$\boldsymbol{-1}$};
		%Second level
		\node[tree_node] (a2) at (-0.75, 0) {$\boldsymbol{X}$};
		\node[tree_node] (b2) at (1.6,1.25) {$\boldsymbol{\circmult}$};
		\node[tree_node] (c2) at (2.9, 1.25) {$\boldsymbol{\circmult}$};
		%Third level
		\node[tree_node] (a3) at (0.7, 2.5) {$\boldsymbol{\circplus}$};
		\node[tree_node] (b3) at (1.6, 2.5) {$\boldsymbol{\circplus}$};
		%Fourth level
		\node[tree_node] (a4) at (1.5, 3.75) {$\boldsymbol{\circmult}$};
		\node[tree_node] (b4) at (2.8, 4) {$\boldsymbol{\circplus}$};
		\node[above right=0.15cm of b4, inner sep=0pt, font=\bfseries](labelC) {$\circuit$};

		\draw[->] (a1) edge[right] node{$\frac{1}{2}$} (a3);
		\draw[->] (b1) -- (b2);
		\draw[->] (a1) -- (b2);
		\draw[->] (a1) edge[bend left=15] (c2);
		\draw[->] (a1) edge[bend right=15] (c2);
		\draw[->] (a2) edge[left] node{$\frac{1}{2}$} (a3);
		\draw[->] (a2) edge[below] node{$\frac{1}{2}$} (b3);
		\draw[->] (b2) edge[right] node{$\frac{1}{2}$} (b3);
		\draw[->] (c2) edge[right] node{$\frac{1}{5}$} (b4);
		\draw[->] (a3) -- (a4);
		\draw[->] (b3) -- (a4);
		\draw[->] (a4) edge[above] node{$\frac{4}{5}$} (b4);
		\draw[black] (b4) -- (labelC);
	\end{tikzpicture}
		\caption{Weights computed by $\onepass$ in  \Cref{example:one-pass}.}

		\label{fig:expr-tree-T-wght}
\end{figure}

\begin{algorithm}[h!]
	\caption{\onepass$(\circuit)$}
	\label{alg:one-pass-iter}
	\begin{algorithmic}[1]
		\Require \circuit: Circuit
		\Ensure \circuit: Annotated Circuit
		\Ensure \vari{sum} $\in \domN$
		\For{\gate in \topord(\circuit)}\label{alg:one-pass-loop}\Comment{\topord($\cdot$) is the topological order of \circuit}
			\If{\gate.\type $=$ \var}
				\State \gate.\prt $\gets 1$\label{alg:one-pass-var}
			\ElsIf{\gate.\type $=$ \tnum}
				\State \gate.\prt $\gets \abs{\gate.\val}$\label{alg:one-pass-num}
			\ElsIf{\gate.\type $= \circmult$}
				\State \gate.\prt $\gets \gate_\linput.\prt \times \gate_\rinput.\prt$\label{alg:one-pass-mult}
			\Else
				\State \gate.\prt $\gets \gate_\linput.\prt + \gate_\rinput.\prt$\label{alg:one-pass-plus}
				\State \gate.\lwght $\gets \frac{\gate_\linput.\prt}{\gate.\prt}$\label{alg:one-pass-lwght}
				\State \gate.\rwght $\gets \frac{\gate_\rinput.\prt}{\gate.\prt}$\label{alg:one-pass-rwght}
			\EndIf
			\State \vari{sum} $\gets \gate.\prt$
		\EndFor
		\State \Return (\vari{sum}, $\circuit$)
	\end{algorithmic}
\end{algorithm}

\subsection{Proof of \onepass (\Cref{lem:one-pass})}\label{sec:proof-one-pass}
\begin{proof}
We prove the correct computation of \prt, \lwght, \rwght values on \circuit by induction over the number of iterations in  the topological order \topord (line~\ref{alg:one-pass-loop}) of the input circuit \circuit.  \topord follows the standard definition of a topological ordering over the DAG structure of \circuit.

For the base case, we have only one gate, which by definition is a source gate and must be either \var or \tnum.  In this case, as per \cref{eq:T-all-ones}, lines~\ref{alg:one-pass-var} and~\ref{alg:one-pass-num} correctly compute \circuit.\prt as $1$.

For the inductive hypothesis, assume that \onepass correctly computes \subcircuit.\prt, \subcircuit.\lwght, and \subcircuit.\rwght for all gates \gate in \circuit with $k \geq 0$ iterations over \topord.
We now prove for $k + 1$ iterations that \onepass correctly computes the \prt, \lwght, and \rwght values for each gate $\gate_\vari{i}$ in \circuit for $i \in [k + 1]$.
The $\gate_\vari{k + 1}$ must be in the last ordering of all gates $\gate_\vari{i}$.  When \size(\circuit) > 1, if $\gate_{k+1}$ is a leaf node, we are back to the base case.  Otherwise $\gate_{k + 1}$ is an internal node
which requires binary input.

When $\gate_{k+1}.\type = \circplus$, then by line~\ref{alg:one-pass-plus} $\gate_{k+1}$.\prt $= \gate_{{k+1}_\lchild}$.\prt $+ \gate_{{k+1}_\rchild}$.\prt, a correct computation, as per \cref{eq:T-all-ones}.  Further, lines~\ref{alg:one-pass-lwght} and~\ref{alg:one-pass-rwght} compute $\gate_{{k+1}}.\lwght = \frac{\gate_{{k+1}_\lchild}.\prt}{\gate_{{k+1}}.\prt}$ and analogously for $\gate_{{k+1}}.\rwght$.  All values needed for each computation have been correctly computed by the inductive hypothesis.

When $\gate_{k+1}.\type = \circmult$, then line~\ref{alg:one-pass-mult} computes $\gate_{k+1}.\prt = \gate_{{k+1}_\lchild.\prt} \circmult \gate_{{k+1}_\rchild}.\prt$, which indeed by \cref{eq:T-all-ones} is correct.  This concludes the proof of correctness.

\paragraph*{Runtime Analysis}
It is known that $\topord(G)$ is computable in linear time.  There are $\size(\circuit)$ iterations.  Each iteration has runtime $O\left( \multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log\inparen{\size(\circuit)}}\right)$ time.  This can be seen since each of all the numbers which the algorithm computes is at most $\abs{\circuit}(1,\dots,1)$. Hence, by definition each such operation takes $\multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit)}}$ time, which proves the claimed runtime.
\qed
\end{proof}

\iffalse
\paragraph*{Sufficient condition for $\abs{\circuit}(1,\ldots, 1)$ to be size $O(N)$}
For our runtime results to be relevant, it must be the case that the sum of the coefficients computed by \onepass is indeed size $O(N)$ since there are $O(\log{N})$ bits in the RAM model where $N$ is the size of the input.  The size of the input here is \size(\circuit).  We show that when \size$(\circuit_\linput) = N_\linput$, \size$(\circuit_\rinput) = N_\rinput$, where $N_\linput + N_\rinput \leq N$, this is indeed the case.

\begin{proof}
To prove this result, we start by proving that $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k }$ for \degree(\circuit) $= k$.
For the base case, we have that \depth(\circuit) $= 0$, and there can only be one node which must contain a coefficient (or constant) of $1$.  In this case, $\abs{\circuit}(1,\ldots, 1) = 1$, and \size(\circuit) $= 1$, and it is true that $\abs{\circuit}(1,\ldots, 1) = 1 \leq N^{2^k} = 1^{2^0} = 1$.

Assume for $\ell > 0$ an arbitrary circuit \circuit of $\depth(\circuit) \leq \ell$ that it is true that $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k }$.

For the inductive step we consider a circuit \circuit such that $\depth(\circuit) \leq \ell + 1$.  The sink can only be either a $\circmult$ or $\circplus$ gate.  Consider when sink node is $\circmult$.  Let $k_\linput, k_\rinput$ denote \degree($\circuit_\linput$) and \degree($\circuit_\rinput$) respectively.  Note that this case does not require the constraint on $N_\linput$ or $N_\rinput$.
\begin{align}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1)\circmult \abs{\circuit_\rinput}(1,\ldots, 1) \leq (N-1)^{2^{k_\linput}} \circmult (N - 1)^{2^{k_\rinput}}\nonumber\\
 &\leq (N-1)^{2^{k}-1}\label{eq:sumcoeff-times-upper}\\
 &\leq N^{2^k}.\nonumber
\end{align}
We derive the upperbound of \Cref{eq:sumcoeff-times-upper} by noting that the maximum value of the LHS occurs when both the base and exponent are maximized.

For the case when the sink node is a $\circplus$ node, then we have
\begin{align}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1) \circplus \abs{\circuit_\rinput}(1,\ldots, 1) \leq
N_\linput^{2^{k_\linput}} + N_\rinput^{2^{k_\rinput}}\nonumber\\
&\leq N_\linput^{2^k } + N_\rinput\label{eq:sumcoeff-plus-upper}\\
&\leq N^{2^k}.\nonumber
\end{align}
Similar to the $\circmult$ case, \Cref{eq:sumcoeff-plus-upper} upperbounds its LHS by the fact that the maximum base and exponent combination is always greater than or equal to the sum of lower base/exponent combinations.  The final equality is true given the constraint over the inputs.

Since $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k}$ for all circuits such that all $\circplus$ gates share at most one gate with their sibling (across their respective subcircuits), then $\log{N^{2^k}} = 2^k \cdot \log{N}$ which for fixed $k$ yields the desired $O(\log{N})$ bits for $O(1)$ arithmetic operations.
\end{proof}
\fi