paper-BagRelationalPDBsAreHard/prob-def.tex

136 lines
7.1 KiB
TeX
Raw Normal View History

%root: main.tex
\subsection{Problem Definition}\label{sec:expression-trees}
We first formally define circuits, an encoding of polynomials that we use throughout the paper. Since we are particularly using \emph{lineage} circuits, we drop the term lineage and only refer to them as circuits.
For illustrative purposes consider the polynomial $\poly(\vct{X}) = 2X^2 + 3XY - 2Y^2$ over $\vct{X} = [X, Y]$.
We represent query polynomials via {\em arithmetic circuits}~\cite{arith-complexity}, a standard way to represent polynomials over fields (particularly in the field of algebraic complexity) that we use for polynomials over $\mathbb N$ in the obvious way.
\begin{Definition}[Circuit]\label{def:circuit}
A circuit $\circuit$ is a Directed Acyclic Graph (DAG) whose source nodes (in degree of $0$) consist of elements in either $\reals$ or $\vct{X}$. The internal nodes and sink node of $\circuit$ have binary input and are either sum ($\circplus$) or product ($\circmult$) gates.
$\circuit$ additionally has the following members: \type, \val, \vari{partial}, \vari{input}, \degval and \vari{Lweight}, \vari{Rweight}, where \type is the type of value stored in the node $\circuit$ (i.e. one of $\{\circplus, \circmult, \var, \tnum\}$, \val is the value stored (a constant or variable), and \vari{input} is the list of \circuit 's inputs where $\circuit_\linput$ is the left input and $\circuit_\rinput$ the right input. The member \degval holds the degree of \circuit. When the underlying DAG is a tree (with edges pointing towards the root), we will refer to the structure as an expression tree \etree. Note that in such a case, the root of \etree is analogous to the sink of the \circuit.
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
As stated in ~\Cref{def:circuit}, every internal node has at most two in-edges, is labeled as an addition or a multiplication node, and has no limit on its outdegree.
Note that if we limit the outdegree to one, then we get expression trees.
\begin{Example}
The circuit \circuit in ~\Cref{fig:circuit-express-tree} encodes the polynomial $XY + WZ$. Note that such an encoding lends itself naturally to having all gates with an outdegree of $1$. Note further that \circuit is indeed a tree with edges pointing towards the root.
\end{Example}
\begin{figure}[t]
\begin{subfigure}[b]{0.45\linewidth}
\centering
\begin{tikzpicture}[thick]
\node[tree_node] (a1) at (0, 0){$\boldsymbol{X}$};
\node[tree_node] (b1) at (1, 0){$\boldsymbol{Y}$};
\node[tree_node] (c1) at (2, 0){$\boldsymbol{W}$};
\node[tree_node] (d1) at (3, 0){$\boldsymbol{Z}$};
\node[tree_node] (a2) at (0.5, 1){$\boldsymbol{\circmult}$};
\node[tree_node] (b2) at (2.5, 1){$\boldsymbol{\circmult}$};
\node[tree_node] (a3) at (1.5, 2){$\boldsymbol{\circplus}$};
\draw[->] (a1) -- (a2);
\draw[->] (b1) -- (a2);
\draw[->] (c1) -- (b2);
\draw[->] (d1) -- (b2);
\draw[->] (a2) -- (a3);
\draw[->] (b2) -- (a3);
\draw[->] (a3) -- (1.5, 2.5);
\end{tikzpicture}
\caption{Circuit encoding $XY + WZ$, a special case of an expression tree}
\label{fig:circuit-express-tree}
\end{subfigure}
\hspace{5mm}
\begin{subfigure}[b]{0.45\linewidth}
\centering
\begin{tikzpicture}[thick]
\node[tree_node] (a1) at (0, 0) {$\boldsymbol{X}$};
\node[tree_node] (b1) at (1.5, 0) {$\boldsymbol{2}$};
\node[tree_node] (c1) at (3, 0) {$\boldsymbol{Y}$};
\node[tree_node] (d1) at (4.5, 0) {$\boldsymbol{-1}$};
\node[tree_node] (a2) at (0.75, 0.75) {$\boldsymbol{\circmult}$};
\node[tree_node] (b2) at (2.25, 0.75) {$\boldsymbol{\circmult}$};
\node[tree_node] (c2) at (3.75, 0.75) {$\boldsymbol{\circmult}$};
\node[tree_node] (a3) at (0.55, 1.5) {$\boldsymbol{\circplus}$};
\node[tree_node] (b3) at (3.75, 1.5) {$\boldsymbol{\circplus}$};
\node[tree_node] (a4) at (2.25, 2.25) {$\boldsymbol{\circmult}$};
\draw[->] (a1) -- (a2);
\draw[->, thick] (a1) -- (a3);
\draw[->] (b1) -- (a2);
\draw[->] (b1) -- (b2);
\draw[->] (c1) -- (c2);
\draw[->] (c1) -- (b2);
\draw[->] (d1) -- (c2);
\draw[->] (a2) -- (b3);
\draw[->] (b2) -- (a3);
\draw[->] (c2) -- (b3);
\draw[->] (a3) -- (a4);
\draw[->] (b3) -- (a4);
\draw[->] (a4) -- (2.25, 2.75);
\end{tikzpicture}
\caption{Circuit encoding of $(X + 2Y)(2X - Y)$}
\label{fig:circuit}
\end{subfigure}
\caption{ }
\end{figure}
We ignore the remaining fields (\vari{partial}, \vari{Lweight}, and \vari{Rweight}) until \Cref{sec:algo}.
The semantics of circuits follows the obvious interpretation. We next define its realtionship with polynomials formally:
\begin{Definition}[$\polyf(\cdot)$]\label{def:poly-func}
Denote $\polyf(\circuit)$ to be the function from circuit $\circuit$ to its corresponding polynomial. $\polyf(\cdot)$ is recursively defined on $\circuit$ as follows, with addition and multiplication following the standard interpretation for polynomials:
\begin{equation*}
\polyf(\circuit) = \begin{cases}
\polyf(\circuit_\lchild) + \polyf(\circuit_\rchild) &\text{ if \circuit.\type } = \circplus\\
\polyf(\circuit_\lchild) \cdot \polyf(\circuit_\rchild) &\text{ if \circuit.\type } = \circmult\\
\circuit.\val &\text{ if \circuit.\type } = \var \text{ OR } \tnum.
\end{cases}
\end{equation*}
\end{Definition}
Note that $\circuit$ need not encode an expression in standard monomial basis, while as stated previously a polynomial is considered to be in SMB, and the output of \polyf($\cdot$) is therefore in SMB. For instance, $\circuit$ could represent a compressed form of the running example, such as $(X + 2Y)(2X - Y)$
, as shown in \Cref{fig:circuit}.
\begin{Definition}[Circuit Set]\label{def:circuit-set}
$\circuitset{\smb}$ is the set of all possible circuits $\circuit$ such that $\polyf(\circuit) = \poly(\vct{X})$.
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
The circuit of \Cref{fig:circuit} is an element of $\circuitset{\smb}$. One can think of $\circuitset{\smb}$ as the infinite set of circuits each of which model an encoding (factorization) equal to $\polyf(\circuit)$.
%\supset \{2X^2 + 3XY - 2Y^2, (X + 2Y)(2X - Y), X(2X - Y) + 2Y(2X - Y), 2X(X + 2Y) - Y(X + 2Y)\}$.
Note that ~\Cref{def:circuit-set} implies that $\circuit \in \circuitset{\polyf(\circuit)}$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\medskip
\noindent We are now ready to formally state our \textbf{main problem}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[The Expected Result Multiplicity Problem]\label{def:the-expected-multipl}
Let $\vct{X} = (X_1, \ldots, X_n)$, and $\pdb$ be an $\semNX$-PDB over $\vct{X}$ with probability distribution $\pd$ over assignments $\vct{X} \to [0,1]$, $\query$ an n-ary query, and $t$ an n-ary tuple.
The \expectProblem is defined as follows:
\hspace*{5mm}\textbf{Input}: A circuit $\circuit \in \circuitset{\smb}$ for $\poly(\vct{X}) = \query(\pxdb)(t)$\hspace*{5mm}\textbf{Output}: $\expct_{\vct{W} \sim \pd}[\poly(\vct{W})]$
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End: