paper-BagRelationalPDBsAreHard/prob-def.tex

%root: main.tex
%!TEX root=./main.tex

\subsection{Formalizing \Cref{prob:intro-stmt}}\label{sec:expression-trees}
We focus on the problem of computing $\expct_{\worldvec\sim\pdassign}\pbox{\poly\inparen{\vct{\randWorld}}}$ from now on. %, assume implicit $\query, \tupset, \tup$, and  drop them from $\apolyqdt$ (i.e., $\poly\inparen{\vct{X}}$ will denote a polynomial).
%
\Cref{prob:intro-stmt} asks if there exists a linear time approximation algorithm in the size of a given circuit \circuit that encodes $\poly\inparen{\vct{X}}$.  Recall that in this work we
 represent lineage polynomials via {\em arithmetic circuits}~\cite{arith-complexity}, a standard way to represent polynomials over fields (particularly in the field of algebraic complexity), which we use for polynomials over $\mathbb N$ in the obvious way.  Since we are specifically using circuits to model lineage polynomials, we can refer to these circuits as lineage circuits.  However, when the meaning is clear, we will drop the term lineage and only refer to them as circuits.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Circuit]\label{def:circuit}
A circuit $\circuit$ is a Directed Acyclic Graph (DAG) with source gates (in degree of $0$) drawn from either $\domN$ or $\vct{X} = \inparen{X_\tup}_{\tup\in\tupset}$ and one sink gate for each result tuple.  Internal gates have binary input and are either sum ($\circplus$) or product ($\circmult$) gates.
%
Each gate has the following members: \type, \vari{input}, %\val,
\vpartial, \degval, \vari{Lweight}, and \vari{Rweight}, where \type is the value type $\{\circplus, \circmult, \var, \tnum\}$ and \vari{input} the list of inputs. Source gates have an extra member \val for the value.  $\circuit_\linput$ ($\circuit_\rinput$) denotes the left (right) input of \circuit.
\end{Definition}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\colorlet{figray}{black!65}
\colorlet{fillred}{red!45}
\colorlet{fillblue}{blue!45}
\colorlet{fillbrown}{green!45}
\begin{wrapfigure}{r}{0.2\textwidth}
	%\begin{figure}[t!]
		\centering
		%see https://tex.stackexchange.com/questions/26846/how-to-scale-a-tikzpicture-including-texts#26852
		%for a nice explanation of scaling a tikzpicture
		\begin{tikzpicture}[thick, scale=0.67, every node/.style={transform shape}]
			\node[tree_node] (a1) at (0, 0) {$\boldsymbol{X}$};
			\node[tree_node] (b1) at (1.5, 0) {$\boldsymbol{2}$};
			\node[tree_node] (c1) at (3, 0) {$\boldsymbol{Y}$};
			\node[tree_node] (d1) at (4.5, 0) {$\boldsymbol{-1}$};

			\node[tree_node] (a2) at (0.75, 0.75) {$\boldsymbol{\circmult}$};
			\node[tree_node] (b2) at (2.25, 0.75) {$\boldsymbol{\circmult}$};
			\node[tree_node, fill = fillblue] (c2) at (3.75, 0.75) {$\boldsymbol{\circmult}$};

			\node[tree_node, fill = fillred] (a3) at (0.55, 1.5) {$\boldsymbol{\circplus}$};
			\node[tree_node, fill = fillbrown] (b3) at (3.75, 1.5) {$\boldsymbol{\circplus}$};

			\node[tree_node] (a4) at (2.25, 2.25) {$\boldsymbol{\circmult}$};
			%%%%%%%%%%%%%%
			%|C| label nodes
			\node[draw=none, figray%, left=of a1
			](a1l) at (0, -0.75) {$1$};
			\node[draw=none, figray] (b1l) at (1.5, -0.75) {$2$};
			\node[draw=none, figray] (c1l) at (3, -0.75) {$1$};
			\node[draw=none, figray] (d1l) at (4.5, -0.75) {$1$};

			\node[draw=none, figray] (a2l) at (0.75, 0) {$2$};
			\node[draw=none, figray] (b2l) at (2.25, 0) {$2$};
			\node[draw=none, figray] (c2l) at (3.75, 0) {$1$};
			\node[draw=none, figray] (a3l) at (-0.2, 1.5) {$3$};
			\node[draw=none, figray] (b3l) at (4.5, 1.5) {$3$};
			\node[draw=none, figray] (a4l) at (2.8, 2.7) {$9$};
			% |C| label lines
			\draw[figray] (a1l) -- (a1);
			\draw[figray] (b1) -- (b1l);
			\draw[figray] (c1) -- (c1l);
			\draw[figray] (d1) -- (d1l);
			\draw[figray] (a2) -- (a2l);
			\draw[figray] (b2) -- (b2l);
			\draw[figray] (c2) -- (c2l);
			\draw[figray] (a3) -- (a3l);
			\draw[figray] (b3) -- (b3l);
			\draw[figray] (a4) -- (a4l);
			%%%%%%%%%%%%%
			\draw[->] (a1) -- (a2);
			\draw[->] (a1) -- (a3);
			\draw[->] (b1) -- (a2);
			\draw[->] (b1) -- (b2);
			\draw[->] (c1) -- (c2);
			\draw[->] (c1) -- (b2);
			\draw[->] (d1) -- (c2);
			\draw[->] (a2) -- (b3);
			\draw[->] (b2) -- (a3);
			\draw[->] (c2) -- (b3);
			\draw[->] (a3) -- (a4);
			\draw[->] (b3) -- (a4);
			\draw[->] (a4) -- (2.25, 2.75);
		\end{tikzpicture}
		%\setlength{\abovecaptionskip}{-0.025cm}
		\savecaptionspace{
		\caption{Circuit encoding of $(X + 2Y)(2X - Y)$.}
		\label{fig:circuit}
		}{-0.025cm}{-0.58cm}
		%\vspace{-0.58cm}
	\end{wrapfigure}

We refer to the structure when the underlying DAG is a tree (with edges pointing towards the root) as an expression tree \etree.  The circuits $\inparen{1}$ and $\inparen{2}$ in column $\poly$ of \Cref{fig:two-step} are both expression trees.  %encode their respective polynomials in column $\poly$.
Members not described in~\Cref{def:circuit} are defined and used in the appendix.  %In such a case, the root of \etree is analogous to the sink of \circuit.  The fields \vari{partial}, \degval, \vari{Lweight}, and \vari{Rweight} are used in the proofs of \Cref{sec:proofs-approx-alg}.

%Note that the ciricuit \circuit representing $AX$ and the circuit \circuit' representing $B\inparen{Y+Z}$ each encode a tree, with edges pointing towards the root.

The function $\polyf\inparen{\cdot}$ (\Cref{def:poly-func}) maps a circuit to its corresponding polynomial.  We next define its inverse:
%of the function $\polyf(\cdot)$.% (\Cref{def:poly-func}).%, which maps a circuit to the polynomial it encodes.

\begin{Definition}[Circuit Set]\label{def:circuit-set}
$\circuitset{\polyX}$ is the set of all possible circuits $\circuit$ such that $\polyf(\circuit) = \polyX$.
\end{Definition}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\Cref{fig:circuit} depicts a circuit \circuit in $\circuitset{2X^2+3XY-2Y^2}$.   Light-text annotations and the colors
%denote the computation of $\abs{\circuit}\inparen{1, \ldots, 1}$ which we introduce
can be ignored until~\Cref{sec:algo}. %One can think of $\circuitset{\polyX}$ as the infinite set of circuits where for each element \circuit, $\polyf\inparen{\circuit} = \polyX$.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\medskip
 We are now ready to formally state the final version of \Cref{prob:intro-stmt}.%our \textbf{main problem}.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{Definition}[The Expected Result Multiplicity Problem]\label{def:the-expected-multipl}
Let $\pdb$ be an arbitrary \abbrCTIDB and $\vct{X}$ be the set of variables annotating tuples in $\tupset$.  Fix an $\raPlus$ query $\query$ and a result tuple $\tup$.
  The \expectProblem is defined as follows:%\\[-7mm]
\begin{flalign*}
&\textbf{Input}: \circuit \in \circuitset{\poly\pbox{\query, \tupset, \tup}\inparen{\vct{X}}}\\% \text{ for }\poly\inparen{\vct{X}} = \textcolor{red}{CHANGE}\poly\pbox{\query,\tupset,\tup}&\\
&\textbf{Output}: \expct_{\vct{W} \sim \bpd}\pbox{\poly\pbox{\query, \tupset, \tup}\inparen{\vct{W}}}.&
\end{flalign*}
\end{Definition}

\input{circuits-model-runtime}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End: