paper-BagRelationalPDBsAreHard/prob-def.tex

%root: main.tex
%!TEX root=./main.tex

\subsection{Formalizing \Cref{prob:intro-stmt}}\label{sec:expression-trees}
We focus on the problem of computing $\expct_{\worldvec\sim\pdassign}\pbox{\apolyqdt\inparen{\vct{\randWorld}}}$ from now on, assume implicit $\query, \tupset, \tup$, and  drop them from $\apolyqdt$ (i.e., $\poly\inparen{\vct{X}}$ will denote a polynomial).

\Cref{prob:intro-stmt} asks if there exists a linear time approximation algorithm in the size of a given circuit \circuit which encodes $\poly\inparen{\vct{X}}$.  Recall that in this work we
 represent lineage polynomials via {\em arithmetic circuits}~\cite{arith-complexity}, a standard way to represent polynomials over fields (particularly in the field of algebraic complexity) that we use for polynomials over $\mathbb N$ in the obvious way.  Since we are specifically using circuits to model lineage polynomials, we can refer to these circuits as lineage circuits.  However, when the meaning is clear, we will drop the term lineage and only refer to them as circuits.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Circuit]\label{def:circuit}
A circuit $\circuit$ is a Directed Acyclic Graph (DAG) with source gates (in degree of $0$) drawn from either $\domN$ or $\vct{X} = \inparen{X_1,\ldots,X_\numvar}$ and one sink gate for each result tuple.  Internal gates have binary input and are either sum ($\circplus$) or product ($\circmult$) gates.
%
Each gate has the following members: \type, \vari{input}, %\val,
\vpartial, \degval, \vari{Lweight}, and \vari{Rweight}, where \type is the value type $\{\circplus, \circmult, \var, \tnum\}$ and \vari{input} the list of inputs. Source gates have an extra member \val for the value.  $\circuit_\linput$ ($\circuit_\rinput$) denotes the left (right) input of \circuit.
\end{Definition}
We refer to the structure when the underlying DAG is a tree (with edges pointing towards the root) as an expression tree \etree.  Members not described in the definition are defined and used in the appendix proofs.  %In such a case, the root of \etree is analogous to the sink of \circuit.  The fields \vari{partial}, \degval, \vari{Lweight}, and \vari{Rweight} are used in the proofs of \Cref{sec:proofs-approx-alg}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
The circuits $\inparen{1}$ and $\inparen{2}$ in column $\poly$ of \Cref{fig:two-step} are both expression trees.%encode their respective polynomials in column $\poly$.
%Note that the ciricuit \circuit representing $AX$ and the circuit \circuit' representing $B\inparen{Y+Z}$ each encode a tree, with edges pointing towards the root.

The function $\polyf\inparen{\cdot}$ (\Cref{def:poly-func}) maps a circuit to its corresponding polynomial.  We next define its inverse:
%of the function $\polyf(\cdot)$.% (\Cref{def:poly-func}).%, which maps a circuit to the polynomial it encodes.

\begin{Definition}[Circuit Set]\label{def:circuit-set}
$\circuitset{\polyX}$ is the set of all possible circuits $\circuit$ such that $\polyf(\circuit) = \polyX$.
\end{Definition}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

The circuit of \Cref{fig:circuit} is an element of $\circuitset{2X^2+3XY-2Y^2}$.  %One can think of $\circuitset{\polyX}$ as the infinite set of circuits where for each element \circuit, $\polyf\inparen{\circuit} = \polyX$.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\medskip
\noindent We are now ready to formally state the final version of \Cref{prob:intro-stmt}.%our \textbf{main problem}.

\begin{wrapfigure}{r}{0.3\textwidth}
	%\begin{figure}[t!]
		\centering
		%see https://tex.stackexchange.com/questions/26846/how-to-scale-a-tikzpicture-including-texts#26852
		%for a nice explanation of scaling a tikzpicture
		\begin{tikzpicture}[thick, scale=0.67, every node/.style={transform shape}]
			\node[tree_node] (a1) at (0, 0) {$\boldsymbol{X}$};
			\node[tree_node] (b1) at (1.5, 0) {$\boldsymbol{2}$};
			\node[tree_node] (c1) at (3, 0) {$\boldsymbol{Y}$};
			\node[tree_node] (d1) at (4.5, 0) {$\boldsymbol{-1}$};

			\node[tree_node] (a2) at (0.75, 0.75) {$\boldsymbol{\circmult}$};
			\node[tree_node] (b2) at (2.25, 0.75) {$\boldsymbol{\circmult}$};
			\node[tree_node] (c2) at (3.75, 0.75) {$\boldsymbol{\circmult}$};

			\node[tree_node] (a3) at (0.55, 1.5) {$\boldsymbol{\circplus}$};
			\node[tree_node] (b3) at (3.75, 1.5) {$\boldsymbol{\circplus}$};

			\node[tree_node] (a4) at (2.25, 2.25) {$\boldsymbol{\circmult}$};

			\draw[->] (a1) -- (a2);
			\draw[->] (a1) -- (a3);
			\draw[->] (b1) -- (a2);
			\draw[->] (b1) -- (b2);
			\draw[->] (c1) -- (c2);
			\draw[->] (c1) -- (b2);
			\draw[->] (d1) -- (c2);
			\draw[->] (a2) -- (b3);
			\draw[->] (b2) -- (a3);
			\draw[->] (c2) -- (b3);
			\draw[->] (a3) -- (a4);
			\draw[->] (b3) -- (a4);
			\draw[->] (a4) -- (2.25, 2.75);
		\end{tikzpicture}
		%\setlength{\abovecaptionskip}{-0.025cm}
		\savecaptionspace{
		\caption{Circuit encoding of $(X + 2Y)(2X - Y)$}
		\label{fig:circuit}
		}{-0.025cm}{-0.58cm}
		%\vspace{-0.58cm}
	\end{wrapfigure}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{Definition}[The Expected Result Multiplicity Problem]\label{def:the-expected-multipl}
Let $\pdb$ be an arbitrary \abbrCTIDB and $\vct{X}$ be the set of variables annotating tuples in $\tupset$.  Fix an $\raPlus$ query $\query$ and a result tuple $\tup$.
  The \expectProblem is defined as follows:%\\[-7mm]
\begin{flalign*}
&\textbf{Input}: \circuit \in \circuitset{\polyX} \text{ for }\poly\inparen{\vct{X}} = \poly\pbox{\query,\tupset,\tup}&\\
&\textbf{Output}: \expct_{\vct{W} \sim \bpd}\pbox{\poly\pbox{\query, \tupset, \tup}\inparen{\vct{W}}}.&
\end{flalign*}
\end{Definition}

\input{circuits-model-runtime}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End: