shorten
parent
ab6c53c52e
commit
0f704e7377
|
@ -34,7 +34,7 @@ For further explanation, please refer to \cref{example:expr-tree-T}.
|
|||
|
||||
\begin{Definition}[$\abs{\circuit}(\vct{X})$]\label{def:positive-circuit}
|
||||
For any circuit $\circuit$, the corresponding
|
||||
{\em positive circuit}, denoted $\abs{\circuit}$, is obtained from $\circuit$ as follows. For each leaf node $\ell$ of $\circuit$ where $\ell.\type$ is $\tnum$, update $\ell.\vari{value}$ to $|\ell.\vari{value}|$.
|
||||
{\em positive circuit}, denoted $\abs{\circuit}$, is obtained from $\circuit$ as follows. For each leaf node $\ell$ of $\circuit$ where $\ell.\type$ is $\tnum$, update $\ell.\vari{value}$ to $|\ell.\vari{value}|$.
|
||||
\end{Definition}
|
||||
Please see \cref{ex:def-pos-circ} for an illustration.
|
||||
|
||||
|
@ -97,14 +97,13 @@ Finally, we address the $\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\l
|
|||
\begin{Lemma}
|
||||
\label{lem:val-ub}
|
||||
For any circuit $\circuit$ with $\degree(\circuit)=k$, we have
|
||||
\[\abs{\circuit}(1,\ldots, 1)\le 2^{2^k\cdot \size(\circuit)}.\]
|
||||
$\abs{\circuit}(1,\ldots, 1)\le 2^{2^k\cdot \size(\circuit)}.$
|
||||
Further, under either of the following conditions:
|
||||
\begin{enumerate}
|
||||
\item $\circuit$ is a tree,
|
||||
\item $\circuit$ encodes the run of the algorithm in~\cite{DBLP:conf/pods/KhamisNR16} on an FAQ query,
|
||||
\end{enumerate}
|
||||
we have
|
||||
\[\abs{\circuit}(1,\ldots, 1)\le \size(\circuit)^{O(k)}.\]
|
||||
we have $\abs{\circuit}(1,\ldots, 1)\le \size(\circuit)^{O(k)}.$
|
||||
\end{Lemma}
|
||||
|
||||
Note that the above implies that with the assumption $\prob_0>0$ and $\gamma<1$ are absolute constants from \Cref{cor:approx-algo-const-p}, then the runtime there simplies to $O_k\left(\frac 1{\inparen{\error'}^2}\cdot\size(\circuit)^2\cdot \log{\frac{1}{\conf}}\right)$ for general circuits $\circuit$ and to $O_k\left(\frac 1{\inparen{\error'}^2}\cdot\size(\circuit)\cdot \log{\frac{1}{\conf}}\right)$ for the case when $\circuit$ satisfies the special conditions in~\Cref{lem:val-ub}. In~\Cref{app:proof-lem-val-ub} we argue that these conditions are very general and encompass many interesting scenarios.
|
||||
|
@ -128,7 +127,7 @@ To prove correctness of~\Cref{alg:mon-sam}, we only use the following fact that
|
|||
\begin{Lemma}\label{lem:sample}
|
||||
The function $\sampmon$ completes in time
|
||||
$$O(\log{k} \cdot k \cdot \depth(\circuit)\cdot\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log{\size(\circuit)}})$$
|
||||
where $k = \degree(\circuit)$. The function returns every $\left(\monom, sign(\coef)\right)$ for $(\monom, \coef)\in \expansion{\circuit}$ with probability $\frac{|\coef|}{\abs{\circuit}(1,\ldots, 1)}$.
|
||||
where $k = \degree(\circuit)$. The function returns every $\left(\monom, sign(\coef)\right)$ for $(\monom, \coef)\in \expansion{\circuit}$ with probability $\frac{|\coef|}{\abs{\circuit}(1,\ldots, 1)}$.
|
||||
\end{Lemma}
|
||||
|
||||
With the above two lemmas, we are ready to argue the following result (proof in~\Cref{sec:proofs-approx-alg}):
|
||||
|
|
|
@ -30,10 +30,12 @@ We adopt a minimalistic compute-bound model of query evaluation drawn from the w
|
|||
%
|
||||
\noindent\resizebox{1\linewidth}{!}{
|
||||
\begin{minipage}{1.0\linewidth}
|
||||
\begin{align*}
|
||||
\qruntime{R,D} & = |R| &
|
||||
\qruntime{\sigma Q, D} & = \qruntime{Q,D} &
|
||||
\qruntime{\pi Q, D} & = \qruntime{Q,D} + \abs{Q(D)}
|
||||
\end{align*}\\[-15mm]
|
||||
\begin{align*}
|
||||
\qruntime{R,D} & = |R| \\
|
||||
\qruntime{\sigma Q, D} & = \qruntime{Q,D} \\
|
||||
\qruntime{\pi Q, D} & = \qruntime{Q,D} + \abs{Q(D)} \\
|
||||
\qruntime{Q \cup Q', D} & = \qruntime{Q, D} + \qruntime{Q', D} +\abs{Q(D)}+\abs{Q'(D)} \\
|
||||
\qruntime{Q_1 \bowtie \ldots \bowtie Q_n, D} & = \qruntime{Q_1, D} + \ldots + \qruntime{Q_n,D} + \abs{Q_1(D) \bowtie \ldots \bowtie Q_n(D)}
|
||||
\end{align*}
|
||||
|
|
|
@ -3,18 +3,25 @@
|
|||
%\onecolumn
|
||||
\subsection{Reduced Polynomials and Equivalences}
|
||||
|
||||
We now introduce some terminology for polynomials and develop a reduced form for polynomials --- a closed form of the polynomial's expectation over probability distributions derived from a \bi or \ti.
|
||||
We now introduce some terminology % for polynomials
|
||||
and develop a reduced form for polynomials --- a closed form of the polynomial's expectation over probability distributions derived from a \bi or \ti.
|
||||
%We will use $(X + Y)^2$ as a running example.
|
||||
Recall that a polynomial over $\vct{X}=(X_1,\dots,X_n)$ is formally defined as:
|
||||
\[Q(X_1,\dots,X_n)=\sum_{\vct{i}=(i_1,\dots,i_n)\in \semN^n} c_{\vct{i}}\cdot \prod_{j=1}^n X_j^{i_j}.\]
|
||||
\begin{equation}
|
||||
\label{eq:sop-form}
|
||||
Q(X_1,\dots,X_n)=\sum_{\vct{i}=(i_1,\dots,i_n)\in \semN^n} c_{\vct{i}}\cdot \prod_{j=1}^n X_j^{i_j}.
|
||||
\end{equation}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Definition}[Standard Monomial Basis]\label{def:smb}
|
||||
%A monomial is a product of variable terms, each raised to a non-negative integer power.
|
||||
% A polynomial in \termSMB (\abbrSMB) has the form: $\sum_{i=1}^n c_i \cdot m_i$ for each of its $n$ terms, where each $c_i \neq 0$ is an integer and each $m_i$ is a monomial and $m_i \neq m_j$ for $i \neq j$. We use $\smbOf{\poly}$ to denote the \abbrSMB of $\poly$.
|
||||
The term $\prod_{j=1}^n X_j^{i_j}$ is a {\em monomial}. A polynomial $Q(\vct{X})$ is in standard monomial basis (or SMB) if in the above sum only terms with $c_{\vct{i}}\ne 0$ appear.
|
||||
The term $\prod_{j=1}^n X_j^{i_j}$ is a {\em monomial}. A polynomial $Q(\vct{X})$ is in standard monomial basis (SMB) if % in the above sum
|
||||
terms with $c_{\vct{i}}\ne 0$ are removed from \Cref{eq:sop-form}.
|
||||
\end{Definition}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
In this paper we consider the default representation of a polynomial to be in \abbrSMB. Sometimes when we want to stress that we want to use the SMB representation of a polynomial $\poly$ we will explicitly state $\smbOf{\poly}$.
|
||||
We consider \abbrSMB as the default representation of a polynomial. % When we want to stress the use of the SMB representation,
|
||||
We use $\smbOf{\poly}$ to denote the SMB form of a polynomial $\poly$.
|
||||
|
||||
%The \abbrSMB for the running example is $X^2 +2XY + Y^2$. Note that the example's SOP expansion $X^2 + XY + XY + Y^2$ is is not $\smbOf{(X+Y)^2}$ since $XY$ appears twice.
|
||||
|
||||
|
@ -25,7 +32,7 @@ The degree of polynomial $\poly(\vct{X})$ is the largest $\sum_{j=1}^n i_j$ such
|
|||
\end{Definition}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
The degree of the polynomial $X^2+2XY+Y^2$ is $2$.
|
||||
The degree of the polynomial $X^2+2XY+Y^2$ is $2$.
|
||||
Product terms in lineage arise only as a consequence of join operations, so intuitively, the degree of a lineage polynomial is analogous to the largest number of joins in any clause of the UCQ query that created it.
|
||||
In this paper we consider only finite degree polynomials.
|
||||
%
|
||||
|
@ -38,13 +45,13 @@ In this paper we consider only finite degree polynomials.
|
|||
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%
|
||||
We call a polynomial $\query(\vct{X})$ a \emph{\bi-lineage polynomial} (resp., \emph{\ti-lineage polynomial}, or simply lineage polynomial), if
|
||||
%\AH{Why is it required for the tuple to be n-ary? I think this slightly confuses me since we have n tuples.}
|
||||
%\AH{Why is it required for the tuple to be n-ary? I think this slightly confuses me since we have n tuples.}
|
||||
% OK: agreed w/ AH, this can be treated as implicit
|
||||
there exists a $\raPlus$ query $\query$, \bi $\pxdb$ (\ti $\pxdb$, or $\semNX$-PDB $\pxdb$), and tuple $\tup$ such that $\query(\vct{X}) = \query(\pxdb)(\tup)$. % Before proceeding, note that the following is assume that polynomials are \bis (which subsume \tis as a special case).
|
||||
As a special case of \bis, the following applies to \tis as well.
|
||||
In a \bi $\pxdb$, tuples are partitioned into $\ell$ blocks $\block_1, \ldots, \block_\ell$ where tuple $t_{i,j} \in \block_i$ is associated with a probability $\prob_{\tup_{i,j}} = \pd[X_{i,j} = 1]$, and is annotated with a unique variable $X_{i,j}$.\footnote{
|
||||
Although only a single independent, $[\abs{\block_i}+1]$-valued variable is customarily used per block, we decompose it into $\abs{\block_i}$ correlated $\{0,1\}$-valued variables per block that can be used directly in polynomials (without an indicator function). For $t_j \in b_i$, the event $(X_{i,j} = 1)$ corresponds to the event $(X_i = j)$ in the customary annotation scheme.
|
||||
}
|
||||
}
|
||||
Because blocks are independent and tuples from the same block are disjoint, the probabilities $\prob_{\tup_{i,j}}$ and the blocks induce the probability distribution $\pd$ of $\pxdb$.
|
||||
We will write a \bi-lineage polynomial $\poly(\vct{X})$ for a \bi with $\ell$ blocks as
|
||||
$\poly(\vct{X})$ = $\poly(X_{1, 1},\ldots, X_{1, \abs{\block_1}},$ $\ldots, X_{\ell, \abs{\block_\ell}})$, where $\abs{\block_i}$ denotes the size of $\block_i$.\footnote{Later on in the paper, especially in~\Cref{sec:algo}, we will overload notation and rename the variables as $X_1,\dots,X_n$, where $n=\sum_{i=1}^\ell \abs{b_i}$.}
|
||||
|
@ -93,16 +100,16 @@ Given the set of BIDB variables $\inset{X_{i,j}}$, define
|
|||
%
|
||||
\begin{Definition}[Reduced \bi Polynomials]\label{def:reduced-bi-poly}
|
||||
Let $\poly(\vct{X})$ be a \bi-lineage polynomial.
|
||||
The reduced form $\rpoly(\vct{X})$ of $\poly(\vct{X})$ is:
|
||||
\begin{equation*}
|
||||
\rpoly(\vct{X}) = \poly(\vct{X}) \mod \inparen{\mathcal{T} \cup \mathcal{B}}%X_i^2 - X_i \mod X_{\block_s, t}X_{\block_s, u}
|
||||
\end{equation*}
|
||||
The reduced form $\rpoly(\vct{X})$ of $\poly(\vct{X})$ is: $\rpoly(\vct{X}) = \poly(\vct{X}) \mod \inparen{\mathcal{T} \cup \mathcal{B}}$
|
||||
% \begin{equation*}
|
||||
% \rpoly(\vct{X}) = \poly(\vct{X}) \mod \inparen{\mathcal{T} \cup \mathcal{B}}%X_i^2 - X_i \mod X_{\block_s, t}X_{\block_s, u}
|
||||
% \end{equation*}
|
||||
%for all $i$ in $[\numvar]$ and for all $s$ in $\ell$, such that for all $t, u$ in $[\abs{\block_s}]$, $t \neq u$.
|
||||
\end{Definition}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%
|
||||
|
||||
All exponents $e > 1$ in $\smbOf{\poly(\vct{X})}$ are reduced to $e = 1$ via mod $\mathcal{T}$. Performing the modulus of $\rpoly(\vct{X})$ with $\mathcal{B}$ ensures the disjoint condition of \bi, removing monomials with lineage variables from the same block.%, (recall the constraint on tuples from the same block being disjoint in a \bi).% any monomial containing more than one tuple from a block has $0$ probability and can be ignored).
|
||||
All exponents $e > 1$ in $\smbOf{\poly(\vct{X})}$ are reduced to $e = 1$ via mod $\mathcal{T}$. Performing the modulus of $\rpoly(\vct{X})$ with $\mathcal{B}$ ensures the disjoint condition of \bi, removing monomials with lineage variables from the same block.%, (recall the constraint on tuples from the same block being disjoint in a \bi).% any monomial containing more than one tuple from a block has $0$ probability and can be ignored).
|
||||
%
|
||||
For the special case of \tis, the second step is not necessary since every block contains a single tuple.
|
||||
%Alternatively, one can think of $\rpoly$ as the \abbrSMB of $\poly(\vct{X})$ when the product operator is idempotent.
|
||||
|
@ -138,7 +145,7 @@ Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blo
|
|||
%
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Definition}[Valid Worlds]
|
||||
For probability distribution $\probDist$, % and its corresponding probability mass function $\probOf$,
|
||||
For probability distribution $\probDist$, % and its corresponding probability mass function $\probOf$,
|
||||
the set of valid worlds $\eta$ consists of all the worlds with probability value greater than $0$; i.e., for variable vector $\vct{W}$
|
||||
\[
|
||||
\eta = \comprehension{\vct{w}}{\probOf[\vct{W} = \vct{w}] > 0}
|
||||
|
@ -168,8 +175,8 @@ Let $\pxdb$ be a \bi over variables $\vct{X} = \{X_1, \ldots, X_\numvar\}$ and w
|
|||
\end{Lemma}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
Note that in the preceding lemma, we have assigned $\vct{p}$
|
||||
%(introduced in \Cref{subsec:def-data})
|
||||
Note that in the preceding lemma, we have assigned $\vct{p}$
|
||||
%(introduced in \Cref{subsec:def-data})
|
||||
to the variables $\vct{X}$. Intuitively, \Cref{lem:exp-poly-rpoly} states that when we replace each variable $X_i$ with its probability $\prob_i$ in the reduced form of a \bi-lineage polynomial and evaluate the resulting expression in $\mathbb{R}$, then the result is the expectation of the polynomial.
|
||||
|
||||
|
||||
|
|
30
prob-def.tex
30
prob-def.tex
|
@ -10,10 +10,10 @@ For illustrative purposes consider the polynomial $\poly(\vct{X}) = 2X^2 + 3XY -
|
|||
We represent query polynomials via {\em arithmetic circuits}~\cite{arith-complexity}, a standard way to represent polynomials over fields (particularly in the field of algebraic complexity) that we use for polynomials over $\mathbb N$ in the obvious way.
|
||||
|
||||
\begin{Definition}[Circuit]\label{def:circuit}
|
||||
A circuit $\circuit$ is a Directed Acyclic Graph (DAG) whose source nodes (in degree of $0$) consist of elements in either $\reals$ or $\vct{X}$. The internal nodes and (the single) sink node of $\circuit$ (corresponding to the result tuple $t$) have binary input and are either sum ($\circplus$) or product ($\circmult$) gates.
|
||||
A circuit $\circuit$ is a Directed Acyclic Graph (DAG) whose source nodes (in degree of $0$) consist of elements in either $\reals$ or $\vct{X}$. The internal nodes and (the single) sink node of $\circuit$ (corresponding to the result tuple $t$) have binary input and are either sum ($\circplus$) or product ($\circmult$) gates.
|
||||
|
||||
$\circuit$ additionally has the following members: \type, \vari{val}, \vari{partial}, \vari{input}, \degval and \vari{Lweight}, \vari{Rweight}, where \type is the type of value stored in the node $\circuit$ (i.e. one of $\{\circplus, \circmult, \var, \tnum\}$, \val is the value stored (a constant or variable), and \vari{input} is the list of \circuit 's inputs where $\circuit_\linput$ is the left input and $\circuit_\rinput$ the right input.
|
||||
%The member \degval holds the degree of \circuit.
|
||||
$\circuit$ additionally has the following members: \type, \vari{val}, \vari{partial}, \vari{input}, \degval and \vari{Lweight}, \vari{Rweight}, where \type is the type of value stored in the node $\circuit$ (i.e. one of $\{\circplus, \circmult, \var, \tnum\}$, \val is the value stored (a constant or variable), and \vari{input} is the list of \circuit 's inputs where $\circuit_\linput$ is the left input and $\circuit_\rinput$ the right input.
|
||||
%The member \degval holds the degree of \circuit.
|
||||
When the underlying DAG is a tree (with edges pointing towards the root), we will refer to the structure as an expression tree \etree. Note that in such a case, the root of \etree is analogous to the sink of \circuit.
|
||||
\end{Definition}
|
||||
|
||||
|
@ -21,7 +21,7 @@ When the underlying DAG is a tree (with edges pointing towards the root), we wil
|
|||
|
||||
|
||||
As stated in \Cref{def:circuit}, every internal node has at most two in-edges, is labeled as an addition or a multiplication node, and has no limit on its outdegree.
|
||||
Note that if we limit the outdegree to one, then we get expression trees.
|
||||
Note that if we limit the outdegree to one, then we get expression trees.
|
||||
We ignore the fields \vari{partial}, \vari{Lweight}, and \vari{Rweight} until \Cref{sec:algo}.
|
||||
|
||||
|
||||
|
@ -37,12 +37,12 @@ The circuit \circuit in \Cref{fig:circuit-express-tree} encodes the polynomial $
|
|||
\node[tree_node] (b1) at (1, 0){$\boldsymbol{Y}$};
|
||||
\node[tree_node] (c1) at (2, 0){$\boldsymbol{W}$};
|
||||
\node[tree_node] (d1) at (3, 0){$\boldsymbol{Z}$};
|
||||
|
||||
|
||||
\node[tree_node] (a2) at (0.5, 1){$\boldsymbol{\circmult}$};
|
||||
\node[tree_node] (b2) at (2.5, 1){$\boldsymbol{\circmult}$};
|
||||
|
||||
|
||||
\node[tree_node] (a3) at (1.5, 2){$\boldsymbol{\circplus}$};
|
||||
|
||||
|
||||
\draw[->] (a1) -- (a2);
|
||||
\draw[->] (b1) -- (a2);
|
||||
\draw[->] (c1) -- (b2);
|
||||
|
@ -62,16 +62,16 @@ The circuit \circuit in \Cref{fig:circuit-express-tree} encodes the polynomial $
|
|||
\node[tree_node] (b1) at (1.5, 0) {$\boldsymbol{2}$};
|
||||
\node[tree_node] (c1) at (3, 0) {$\boldsymbol{Y}$};
|
||||
\node[tree_node] (d1) at (4.5, 0) {$\boldsymbol{-1}$};
|
||||
|
||||
|
||||
\node[tree_node] (a2) at (0.75, 0.75) {$\boldsymbol{\circmult}$};
|
||||
\node[tree_node] (b2) at (2.25, 0.75) {$\boldsymbol{\circmult}$};
|
||||
\node[tree_node] (c2) at (3.75, 0.75) {$\boldsymbol{\circmult}$};
|
||||
|
||||
|
||||
\node[tree_node] (a3) at (0.55, 1.5) {$\boldsymbol{\circplus}$};
|
||||
\node[tree_node] (b3) at (3.75, 1.5) {$\boldsymbol{\circplus}$};
|
||||
|
||||
\node[tree_node] (a4) at (2.25, 2.25) {$\boldsymbol{\circmult}$};
|
||||
|
||||
|
||||
\node[tree_node] (a4) at (2.25, 2.25) {$\boldsymbol{\circmult}$};
|
||||
|
||||
\draw[->] (a1) -- (a2);
|
||||
\draw[->, thick] (a1) -- (a3);
|
||||
\draw[->] (b1) -- (a2);
|
||||
|
@ -89,7 +89,7 @@ The circuit \circuit in \Cref{fig:circuit-express-tree} encodes the polynomial $
|
|||
\caption{Circuit encoding of $(X + 2Y)(2X - Y)$}
|
||||
\label{fig:circuit}
|
||||
\end{subfigure}
|
||||
\caption{ }
|
||||
\caption{Example circuit encodings}
|
||||
\end{figure}
|
||||
|
||||
|
||||
|
@ -113,8 +113,8 @@ $\circuitset{\smb}$ is the set of all possible circuits $\circuit$ such that $\p
|
|||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
The circuit of \Cref{fig:circuit} is an element of $\circuitset{2X^2+3XY-2Y^2}$. One can think of $\circuitset{\smb}$ as the infinite set of circuits each of which model an encoding (factorization) equal to $\polyf(\circuit)$.
|
||||
%\supset \{2X^2 + 3XY - 2Y^2, (X + 2Y)(2X - Y), X(2X - Y) + 2Y(2X - Y), 2X(X + 2Y) - Y(X + 2Y)\}$.
|
||||
The circuit of \Cref{fig:circuit} is an element of $\circuitset{2X^2+3XY-2Y^2}$. One can think of $\circuitset{\smb}$ as the infinite set of circuits each of which model an encoding (factorization) equal to $\polyf(\circuit)$.
|
||||
%\supset \{2X^2 + 3XY - 2Y^2, (X + 2Y)(2X - Y), X(2X - Y) + 2Y(2X - Y), 2X(X + 2Y) - Y(X + 2Y)\}$.
|
||||
Note that \Cref{def:circuit-set} implies that $\circuit \in \circuitset{\polyf(\circuit)}$.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
|
Loading…
Reference in New Issue