paper-BagRelationalPDBsAreHard/prob-def.tex

%root: main.tex
%!TEX root=./main.tex

\subsection{Formalizing \Cref{prob:intro-stmt}}\label{sec:expression-trees}
We focus on the problem of computing $\expct_{\worldvec\sim\pdassign}\pbox{\apolyqdt\inparen{\vct{\randWorld}}}$ from now on, assume implicit $\query, \tupset, \tup$, and  drop them from $\apolyqdt$ (i.e., $\poly\inparen{\vct{X}}$ will denote a polynomial).

\Cref{prob:intro-stmt} asks if there exists a linear time approximation algorithm in the size of a given circuit \circuit which encodes $\poly\inparen{\vct{X}}$.  Recall that in this work we
 represent lineage polynomials via {\em arithmetic circuits}~\cite{arith-complexity}, a standard way to represent polynomials over fields (particularly in the field of algebraic complexity) that we use for polynomials over $\mathbb N$ in the obvious way.  Since we are specifically using circuits to model lineage polynomials, we can refer to these circuits as lineage circuits.  However, when the meaning is clear, we will drop the term lineage and only refer to them as circuits.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Circuit]\label{def:circuit}
A circuit $\circuit$ is a Directed Acyclic Graph (DAG) whose source gates (in degree of $0$) consist of elements in either $\domN$ or $\vct{X} = \inparen{X_1,\ldots,X_\numvar}$.  For each result tuple there exists one sink gate.  The internal gates have binary input and are either sum ($\circplus$) or product ($\circmult$) gates.
%
Each gate has the following members: \type, \vari{input}, \val, \vpartial, \degval, \vari{Lweight}, and \vari{Rweight}, where \type is the value type $\{\circplus, \circmult, \var, \tnum\}$ and \vari{input} the list of inputs. Source gates have an extra member \val storing the value.  $\circuit_\linput$ ($\circuit_\rinput$) denotes the left (right) input of \circuit.
\end{Definition}
When the underlying DAG is a tree (with edges pointing towards the root), the structure is an expression tree \etree.  In such a case, the root of \etree is analogous to the sink of \circuit.  The fields \vari{partial}, \degval, \vari{Lweight}, and \vari{Rweight} are used in the proofs of \Cref{sec:proofs-approx-alg}.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

The circuits in \Cref{fig:two-step} encode their respective polynomials in column $\poly$.
Note that the ciricuit \circuit representing $AX$ and the circuit \circuit' representing $B\inparen{Y+Z}$ each encode a tree, with edges pointing towards the root.


	\begin{figure}[t!]
		\centering
		\begin{tikzpicture}[thick]
			\node[tree_node] (a1) at (0, 0) {$\boldsymbol{X}$};
			\node[tree_node] (b1) at (1.5, 0) {$\boldsymbol{2}$};
			\node[tree_node] (c1) at (3, 0) {$\boldsymbol{Y}$};
			\node[tree_node] (d1) at (4.5, 0) {$\boldsymbol{-1}$};

			\node[tree_node] (a2) at (0.75, 0.75) {$\boldsymbol{\circmult}$};
			\node[tree_node] (b2) at (2.25, 0.75) {$\boldsymbol{\circmult}$};
			\node[tree_node] (c2) at (3.75, 0.75) {$\boldsymbol{\circmult}$};

			\node[tree_node] (a3) at (0.55, 1.5) {$\boldsymbol{\circplus}$};
			\node[tree_node] (b3) at (3.75, 1.5) {$\boldsymbol{\circplus}$};

			\node[tree_node] (a4) at (2.25, 2.25) {$\boldsymbol{\circmult}$};

			\draw[->] (a1) -- (a2);
			\draw[->] (a1) -- (a3);
			\draw[->] (b1) -- (a2);
			\draw[->] (b1) -- (b2);
			\draw[->] (c1) -- (c2);
			\draw[->] (c1) -- (b2);
			\draw[->] (d1) -- (c2);
			\draw[->] (a2) -- (b3);
			\draw[->] (b2) -- (a3);
			\draw[->] (c2) -- (b3);
			\draw[->] (a3) -- (a4);
			\draw[->] (b3) -- (a4);
			\draw[->] (a4) -- (2.25, 2.75);
		\end{tikzpicture}
		\caption{Circuit encoding of $(X + 2Y)(2X - Y)$}
		\label{fig:circuit}
		\vspace{-0.53cm}
	\end{figure}
We next formally define the relationship of circuits with polynomials.  While the definition assumes one sink for notational convenience, it easily generalizes to the multiple sinks case.
\begin{Definition}[$\polyf(\cdot)$]\label{def:poly-func}
$\polyf(\circuit)$ maps the sink of circuit $\circuit$ to its corresponding polynomial (in \abbrSMB).  $\polyf(\cdot)$ is recursively defined on $\circuit$ as follows, with addition and multiplication following the standard interpretation for polynomials:
\begin{equation*}
	\polyf(\circuit) = \begin{cases}
					\polyf(\circuit_\lchild) + \polyf(\circuit_\rchild)			&\text{ if \circuit.\type } = \circplus\\
					\polyf(\circuit_\lchild) \cdot \polyf(\circuit_\rchild)		&\text{ if \circuit.\type } = \circmult\\
					\circuit.\val									&\text{ if \circuit.\type } = \var \text{ OR } \tnum.
				\end{cases}
\end{equation*}
\end{Definition}

$\circuit$ need not encode $\poly\inparen{\vct{X}}$ in the same, default \abbrSMB representation.  For instance, $\circuit$ could encode the factorized representation $(X + 2Y)(2X - Y)$ of $\poly\inparen{\vct{X}} = 2X^2+3XY-2Y^2$, as shown in \Cref{fig:circuit}, while $\polyf(\circuit) = \poly\inparen{\vct{X}}$ is always the equivalent \abbrSMB representation.

\begin{Definition}[Circuit Set]\label{def:circuit-set}
$\circuitset{\polyX}$ is the set of all possible circuits $\circuit$ such that $\polyf(\circuit) = \polyX$.
\end{Definition}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

The circuit of \Cref{fig:circuit} is an element of $\circuitset{2X^2+3XY-2Y^2}$.  One can think of $\circuitset{\polyX}$ as the infinite set of circuits where for each element \circuit, $\polyf\inparen{\circuit} = \polyX$.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\medskip
\noindent We are now ready to formally state the final version of \Cref{prob:intro-stmt}.%our \textbf{main problem}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{Definition}[The Expected Result Multiplicity Problem]\label{def:the-expected-multipl}
Let $\pdb'$ be an arbitrary \abbrCTIDB and $\vct{X}$ be the set of variables annotating tuples in $\tupset'$.  Fix an $\raPlus$ query $\query$ and a result tuple $\tup$.
  The \expectProblem is defined as follows:%\\[-7mm]
\begin{flalign*}
&\textbf{Input}: \circuit \in \circuitset{\polyX} \text{ for }\poly'\inparen{\vct{X}} = \poly'\pbox{\query,\tupset',\tup}&\\
&\textbf{Output}: \expct_{\vct{W} \sim \bpd}\pbox{\poly'\pbox{\query, \tupset', \tup}\inparen{\vct{W}}}.&
\end{flalign*}
\end{Definition}

\input{circuits-model-runtime}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End: