%root: main.tex %!TEX root=./main.tex \subsection{Problem Definition}\label{sec:expression-trees} We first formally define circuits, an encoding of polynomials that we use throughout the paper. Since we are particularly using \emph{lineage} circuits, we drop the term lineage and only refer to them as circuits. % For illustrative purposes consider the polynomial $\poly(\vct{X}) = 2X^2 + 3XY - 2Y^2$ over $\vct{X} = [X, Y]$. We represent query polynomials via {\em arithmetic circuits}~\cite{arith-complexity}, a standard way to represent polynomials over fields (particularly in the field of algebraic complexity) that we use for polynomials over $\mathbb N$ in the obvious way. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Definition}[Circuit]\label{def:circuit} A circuit $\circuit$ is a Directed Acyclic Graph (DAG) whose source gates (in degree of $0$) consist of elements in either $\domN$ or $\vct{X}$. The internal gates have binary input and are either sum ($\circplus$) or product ($\circmult$) gates. % Each internal node in a circuit $\circuit$ has the following members: \type, \vpartial, \vari{input}, \degval, \vari{Lweight}, and \vari{Rweight}, where \type is the type of value stored in the gate (one of $\{\circplus, \circmult, \var, \tnum\}$ and \vari{input} is the list of the gate's inputs. The source gates have an additional member \val, which holds the value stored (constant or variable). We use $\circuit_\linput$ to denote the left input and $\circuit_\rinput$ the right input of a sink of circuit $\circuit$. %The member \degval holds the degree of \circuit. When the underlying DAG is a tree (with edges pointing towards the root), we will refer to the structure as an expression tree \etree. Note that in such a case, the root of \etree is analogous to the sink of \circuit. \end{Definition} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% As stated in \Cref{def:circuit}, every internal node has at most two in-edges, is labeled as an addition or a multiplication node, and has no limit on its outdegree. Note that if we limit the outdegree to one, then we get expression trees. We ignore the fields \vari{partial}, \degval, \vari{Lweight}, and \vari{Rweight} until \Cref{sec:algo}.\AH{We omit degree here too, which {\emph I think} is used only in appendix proofs.} \begin{Example} The circuit \circuit in \Cref{fig:circuit-express-tree} encodes the polynomial $XY + WZ$. Note that circuit \circuit encodes a tree, with edges pointing towards the root. \end{Example} \begin{figure}[t] \begin{subfigure}[b]{0.45\linewidth} \centering \begin{tikzpicture}[thick] \node[tree_node] (a1) at (0, 0){$\boldsymbol{X}$}; \node[tree_node] (b1) at (1, 0){$\boldsymbol{Y}$}; \node[tree_node] (c1) at (2, 0){$\boldsymbol{W}$}; \node[tree_node] (d1) at (3, 0){$\boldsymbol{Z}$}; \node[tree_node] (a2) at (0.5, 1){$\boldsymbol{\circmult}$}; \node[tree_node] (b2) at (2.5, 1){$\boldsymbol{\circmult}$}; \node[tree_node] (a3) at (1.5, 2){$\boldsymbol{\circplus}$}; \draw[->] (a1) -- (a2); \draw[->] (b1) -- (a2); \draw[->] (c1) -- (b2); \draw[->] (d1) -- (b2); \draw[->] (a2) -- (a3); \draw[->] (b2) -- (a3); \draw[->] (a3) -- (1.5, 2.5); \end{tikzpicture} \caption{Circuit encoding $XY + WZ$, a special case of an expression tree} \label{fig:circuit-express-tree} \end{subfigure} \hspace{5mm} \begin{subfigure}[b]{0.45\linewidth} \centering \begin{tikzpicture}[thick] \node[tree_node] (a1) at (0, 0) {$\boldsymbol{X}$}; \node[tree_node] (b1) at (1.5, 0) {$\boldsymbol{2}$}; \node[tree_node] (c1) at (3, 0) {$\boldsymbol{Y}$}; \node[tree_node] (d1) at (4.5, 0) {$\boldsymbol{-1}$}; \node[tree_node] (a2) at (0.75, 0.75) {$\boldsymbol{\circmult}$}; \node[tree_node] (b2) at (2.25, 0.75) {$\boldsymbol{\circmult}$}; \node[tree_node] (c2) at (3.75, 0.75) {$\boldsymbol{\circmult}$}; \node[tree_node] (a3) at (0.55, 1.5) {$\boldsymbol{\circplus}$}; \node[tree_node] (b3) at (3.75, 1.5) {$\boldsymbol{\circplus}$}; \node[tree_node] (a4) at (2.25, 2.25) {$\boldsymbol{\circmult}$}; \draw[->] (a1) -- (a2); \draw[->] (a1) -- (a3); \draw[->] (b1) -- (a2); \draw[->] (b1) -- (b2); \draw[->] (c1) -- (c2); \draw[->] (c1) -- (b2); \draw[->] (d1) -- (c2); \draw[->] (a2) -- (b3); \draw[->] (b2) -- (a3); \draw[->] (c2) -- (b3); \draw[->] (a3) -- (a4); \draw[->] (b3) -- (a4); \draw[->] (a4) -- (2.25, 2.75); \end{tikzpicture} \caption{Circuit encoding of $(X + 2Y)(2X - Y)$} \label{fig:circuit} \end{subfigure} \caption{Example circuit encodings} \end{figure} The semantics of circuits follows the obvious interpretation. We next define its relationship with polynomials formally: \begin{Definition}[$\polyf(\cdot)$]\label{def:poly-func} Denote $\polyf(\circuit)$ to be the function from circuit $\circuit$ to its corresponding polynomial (in \abbrSMB).\footnote{Recall our assumption that unless otherwise mentioned, all polynomials are considered in $\abbrSMB$.} $\polyf(\cdot)$ is recursively defined on $\circuit$ as follows, with addition and multiplication following the standard interpretation for polynomials: \begin{equation*} \polyf(\circuit) = \begin{cases} \polyf(\circuit_\lchild) + \polyf(\circuit_\rchild) &\text{ if \circuit.\type } = \circplus\\ \polyf(\circuit_\lchild) \cdot \polyf(\circuit_\rchild) &\text{ if \circuit.\type } = \circmult\\ \circuit.\val &\text{ if \circuit.\type } = \var \text{ OR } \tnum. \end{cases} \end{equation*} \end{Definition} Note that $\circuit$ need not encode an expression in SMB. For instance, $\circuit$ could represent a compressed form of the running example, such as $(X + 2Y)(2X - Y)$, as shown in \Cref{fig:circuit}, while $\polyf(\circuit) = 2X^2+3XY-2Y^2$. \begin{Definition}[Circuit Set]\label{def:circuit-set} $\circuitset{\polyX}$ is the set of all possible circuits $\circuit$ such that $\polyf(\circuit) = \polyX$.\footnote{Again, the representation of $\polyX$ is $\abbrSMB$.} \end{Definition} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% The circuit of \Cref{fig:circuit} is an element of $\circuitset{2X^2+3XY-2Y^2}$. One can think of $\circuitset{\polyX}$ as the infinite set of circuits each of which equal $\polyX$ when represented in $\abbrSMB$. Note that \Cref{def:circuit-set} implies that $\circuit \in \circuitset{\polyf(\circuit)}$. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \medskip \noindent We are now ready to formally state our \textbf{main problem}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Definition}[The Expected Result Multiplicity Problem]\label{def:the-expected-multipl} Let $\vct{X} = (X_1, \ldots, X_n)$, and $\pxdb$ be an $\semNX$-PDB over $\vct{X}$ with probability distribution $\pd$ over assignments $\vct{X} \to \{0,1\}$, $\query$ an n-ary query, and $t$ an n-ary tuple. The \expectProblem is defined as follows:\\[-7mm] \begin{center} \textbf{Input}: A circuit $\circuit \in \circuitset{\polyX}$ for $\polyX = \query(\pxdb)(t)$ \hspace*{5mm}\textbf{Output}: $\expct_{\vct{W} \sim \pd}[\poly(\vct{W})]$ \end{center} \end{Definition} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% Local Variables: %%% mode: latex %%% TeX-master: "main" %%% End: