shorten

2021-04-08 21:30:03 -05:00 · 2021-04-08 21:30:03 -05:00 · 0f704e7377
parent ab6c53c52e
commit 0f704e7377
4 changed files with 46 additions and 38 deletions
--- a/approx_alg.tex
+++ b/approx_alg.tex
@ -34,7 +34,7 @@ For further explanation, please refer to \cref{example:expr-tree-T}.

 \begin{Definition}[$\abs{\circuit}(\vct{X})$]\label{def:positive-circuit}
 For any circuit $\circuit$, the corresponding
-{\em positive circuit}, denoted $\abs{\circuit}$, is obtained from $\circuit$ as follows. For each leaf node $\ell$ of $\circuit$ where $\ell.\type$ is $\tnum$, update $\ell.\vari{value}$ to $|\ell.\vari{value}|$. 
+{\em positive circuit}, denoted $\abs{\circuit}$, is obtained from $\circuit$ as follows. For each leaf node $\ell$ of $\circuit$ where $\ell.\type$ is $\tnum$, update $\ell.\vari{value}$ to $|\ell.\vari{value}|$.
 \end{Definition}
 Please see \cref{ex:def-pos-circ} for an illustration.

@ -97,14 +97,13 @@ Finally, we address the $\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\l
 \begin{Lemma}
 \label{lem:val-ub}
 For any circuit $\circuit$ with $\degree(\circuit)=k$, we have
-\[\abs{\circuit}(1,\ldots, 1)\le 2^{2^k\cdot \size(\circuit)}.\]
+$\abs{\circuit}(1,\ldots, 1)\le 2^{2^k\cdot \size(\circuit)}.$
 Further, under either of the following conditions:
 \begin{enumerate}
 \item $\circuit$ is a tree,
 \item $\circuit$ encodes the run of the algorithm in~\cite{DBLP:conf/pods/KhamisNR16} on an FAQ query,
 \end{enumerate}
-we have
-\[\abs{\circuit}(1,\ldots, 1)\le  \size(\circuit)^{O(k)}.\]
+we have $\abs{\circuit}(1,\ldots, 1)\le  \size(\circuit)^{O(k)}.$
 \end{Lemma}

 Note that the above implies that with the assumption $\prob_0>0$ and $\gamma<1$ are absolute constants from \Cref{cor:approx-algo-const-p}, then the runtime there simplies to $O_k\left(\frac 1{\inparen{\error'}^2}\cdot\size(\circuit)^2\cdot \log{\frac{1}{\conf}}\right)$ for general circuits $\circuit$ and to $O_k\left(\frac 1{\inparen{\error'}^2}\cdot\size(\circuit)\cdot \log{\frac{1}{\conf}}\right)$ for the case when $\circuit$ satisfies the special conditions in~\Cref{lem:val-ub}. In~\Cref{app:proof-lem-val-ub} we argue that these conditions are very general and encompass many interesting scenarios.
@ -128,7 +127,7 @@ To prove correctness of~\Cref{alg:mon-sam}, we only use the following fact that
 \begin{Lemma}\label{lem:sample}
 The function $\sampmon$ completes in time
 $$O(\log{k} \cdot k \cdot \depth(\circuit)\cdot\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log{\size(\circuit)}})$$
- where $k = \degree(\circuit)$.  The function returns every $\left(\monom, sign(\coef)\right)$ for $(\monom, \coef)\in \expansion{\circuit}$ with probability $\frac{|\coef|}{\abs{\circuit}(1,\ldots, 1)}$. 
+ where $k = \degree(\circuit)$.  The function returns every $\left(\monom, sign(\coef)\right)$ for $(\monom, \coef)\in \expansion{\circuit}$ with probability $\frac{|\coef|}{\abs{\circuit}(1,\ldots, 1)}$.
 \end{Lemma}

 With the above two lemmas, we are ready to argue the following result (proof in~\Cref{sec:proofs-approx-alg}):
--- a/circuits-model-runtime.tex
+++ b/circuits-model-runtime.tex
@ -30,10 +30,12 @@ We adopt a minimalistic compute-bound model of query evaluation drawn from the w
 %
 \noindent\resizebox{1\linewidth}{!}{
 \begin{minipage}{1.0\linewidth}
+  \begin{align*}
+\qruntime{R,D}                               & = |R|                                                        &
+                                                                                                              \qruntime{\sigma Q, D}                       & = \qruntime{Q,D}                                             &
+                                                                                                                                                                                                                            \qruntime{\pi Q, D}                          & = \qruntime{Q,D} + \abs{Q(D)}
+  \end{align*}\\[-15mm]
 \begin{align*}
-\qruntime{R,D}                               & = |R|                                                        \\
-\qruntime{\sigma Q, D}                       & = \qruntime{Q,D}                                             \\
-\qruntime{\pi Q, D}                          & = \qruntime{Q,D} + \abs{Q(D)}                                \\
 \qruntime{Q \cup Q', D}                      & = \qruntime{Q, D} + \qruntime{Q', D} +\abs{Q(D)}+\abs{Q'(D)} \\
 \qruntime{Q_1 \bowtie \ldots \bowtie Q_n, D} & = \qruntime{Q_1, D} + \ldots + \qruntime{Q_n,D} + \abs{Q_1(D) \bowtie \ldots \bowtie Q_n(D)}
 \end{align*}
--- a/poly-form.tex
+++ b/poly-form.tex
@ -3,18 +3,25 @@
 %\onecolumn
 \subsection{Reduced Polynomials and Equivalences}

-We now introduce some terminology for polynomials and develop a reduced form for polynomials --- a closed form of the polynomial's expectation over probability distributions derived from a \bi or \ti.
+We now introduce some terminology % for polynomials
+and develop a reduced form for polynomials --- a closed form of the polynomial's expectation over probability distributions derived from a \bi or \ti.
 %We will use $(X + Y)^2$ as a running example.
 Recall that a polynomial over $\vct{X}=(X_1,\dots,X_n)$ is formally defined as:
-\[Q(X_1,\dots,X_n)=\sum_{\vct{i}=(i_1,\dots,i_n)\in \semN^n} c_{\vct{i}}\cdot \prod_{j=1}^n X_j^{i_j}.\]
+\begin{equation}
+  \label{eq:sop-form}
+Q(X_1,\dots,X_n)=\sum_{\vct{i}=(i_1,\dots,i_n)\in \semN^n} c_{\vct{i}}\cdot \prod_{j=1}^n X_j^{i_j}.
+\end{equation}

+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Definition}[Standard Monomial Basis]\label{def:smb}
 %A monomial is a product of variable terms, each raised to a non-negative integer power.
 %  A polynomial in \termSMB (\abbrSMB) has the form: $\sum_{i=1}^n c_i \cdot m_i$ for each of its $n$ terms, where each $c_i \neq 0$ is an integer and each $m_i$ is a monomial and $m_i \neq m_j$ for $i \neq j$. We use $\smbOf{\poly}$ to denote the \abbrSMB of $\poly$.
-The term $\prod_{j=1}^n X_j^{i_j}$ is a {\em monomial}. A polynomial $Q(\vct{X})$ is in standard monomial basis (or SMB) if in the above sum only terms with $c_{\vct{i}}\ne 0$ appear.
+The term $\prod_{j=1}^n X_j^{i_j}$ is a {\em monomial}. A polynomial $Q(\vct{X})$ is in standard monomial basis (SMB) if % in the above sum
+ terms with $c_{\vct{i}}\ne 0$ are removed from \Cref{eq:sop-form}.
 \end{Definition}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-In this paper we consider the default representation of a polynomial to be in \abbrSMB. Sometimes when we want to stress that we want to use the SMB representation of a polynomial $\poly$ we will explicitly state $\smbOf{\poly}$.
+We consider \abbrSMB as the default representation of a polynomial. % When we want to stress the use of the SMB representation,
+We use $\smbOf{\poly}$ to denote the SMB form of a polynomial $\poly$.

 %The \abbrSMB for the running example is $X^2 +2XY + Y^2$.  Note that the example's SOP expansion $X^2 + XY + XY + Y^2$ is is not $\smbOf{(X+Y)^2}$ since $XY$ appears twice.

@ -25,7 +32,7 @@ The degree of polynomial $\poly(\vct{X})$ is the largest $\sum_{j=1}^n i_j$ such
 \end{Definition}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-The degree of the polynomial $X^2+2XY+Y^2$ is $2$. 
+The degree of the polynomial $X^2+2XY+Y^2$ is $2$.
 Product terms in lineage arise only as a consequence of join operations, so intuitively, the degree of a lineage polynomial is analogous to the largest number of joins in any clause of the UCQ query that created it.
 In this paper we consider only finite degree polynomials.
 %
@ -38,13 +45,13 @@ In this paper we consider only finite degree polynomials.
 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %
 We call a polynomial $\query(\vct{X})$ a \emph{\bi-lineage polynomial} (resp., \emph{\ti-lineage polynomial}, or simply lineage polynomial), if
-%\AH{Why is it required for the tuple to be n-ary?  I think this slightly confuses me since we have n tuples.} 
+%\AH{Why is it required for the tuple to be n-ary?  I think this slightly confuses me since we have n tuples.}
 % OK: agreed w/ AH, this can be treated as implicit
 there exists a $\raPlus$ query $\query$, \bi $\pxdb$ (\ti $\pxdb$, or $\semNX$-PDB $\pxdb$), and tuple $\tup$ such that $\query(\vct{X}) = \query(\pxdb)(\tup)$. % Before proceeding, note that the following is assume that polynomials are  \bis (which subsume \tis as a special case).
 As a special case of \bis, the following applies to \tis as well.
 In a \bi $\pxdb$, tuples are partitioned into $\ell$ blocks $\block_1, \ldots, \block_\ell$ where tuple $t_{i,j} \in \block_i$ is associated with a probability $\prob_{\tup_{i,j}} = \pd[X_{i,j} = 1]$, and is annotated with a unique variable $X_{i,j}$.\footnote{
  Although only a single independent, $[\abs{\block_i}+1]$-valued variable is customarily used per block, we decompose it into $\abs{\block_i}$ correlated $\{0,1\}$-valued variables per block that can be used directly in polynomials (without an indicator function).  For $t_j \in b_i$, the event $(X_{i,j} = 1)$ corresponds to the event $(X_i = j)$ in the customary annotation scheme.
-} 
+}
 Because blocks are independent and tuples from the same block are disjoint, the probabilities $\prob_{\tup_{i,j}}$ and the blocks induce the probability distribution $\pd$ of $\pxdb$.
 We will write a \bi-lineage polynomial $\poly(\vct{X})$ for a \bi with $\ell$ blocks as
 $\poly(\vct{X})$ = $\poly(X_{1, 1},\ldots, X_{1, \abs{\block_1}},$ $\ldots, X_{\ell, \abs{\block_\ell}})$, where $\abs{\block_i}$ denotes the size of $\block_i$.\footnote{Later on in the paper, especially in~\Cref{sec:algo}, we will overload notation and rename the variables as $X_1,\dots,X_n$, where $n=\sum_{i=1}^\ell \abs{b_i}$.}
@ -93,16 +100,16 @@ Given the set of BIDB variables $\inset{X_{i,j}}$, define
 %
 \begin{Definition}[Reduced \bi Polynomials]\label{def:reduced-bi-poly}
  Let $\poly(\vct{X})$ be a \bi-lineage polynomial.
-  The reduced form $\rpoly(\vct{X})$ of $\poly(\vct{X})$ is:
-\begin{equation*}
-\rpoly(\vct{X}) = \poly(\vct{X}) \mod \inparen{\mathcal{T} \cup \mathcal{B}}%X_i^2 - X_i \mod X_{\block_s, t}X_{\block_s, u}
-\end{equation*}
+  The reduced form $\rpoly(\vct{X})$ of $\poly(\vct{X})$ is: $\rpoly(\vct{X}) = \poly(\vct{X}) \mod \inparen{\mathcal{T} \cup \mathcal{B}}$
+% \begin{equation*}
+% \rpoly(\vct{X}) = \poly(\vct{X}) \mod \inparen{\mathcal{T} \cup \mathcal{B}}%X_i^2 - X_i \mod X_{\block_s, t}X_{\block_s, u}
+% \end{equation*}
 %for all $i$ in $[\numvar]$ and for all $s$ in $\ell$, such that for all $t, u$ in $[\abs{\block_s}]$, $t \neq u$.
 \end{Definition}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %

-All exponents $e > 1$ in $\smbOf{\poly(\vct{X})}$ are reduced to $e = 1$ via mod $\mathcal{T}$.  Performing the modulus of $\rpoly(\vct{X})$ with $\mathcal{B}$ ensures the disjoint condition of \bi, removing monomials with lineage variables from the same block.%, (recall the constraint on tuples from the same block being disjoint in a \bi).% any monomial containing more than one tuple from a block has $0$ probability and can be ignored). 
+All exponents $e > 1$ in $\smbOf{\poly(\vct{X})}$ are reduced to $e = 1$ via mod $\mathcal{T}$.  Performing the modulus of $\rpoly(\vct{X})$ with $\mathcal{B}$ ensures the disjoint condition of \bi, removing monomials with lineage variables from the same block.%, (recall the constraint on tuples from the same block being disjoint in a \bi).% any monomial containing more than one tuple from a block has $0$ probability and can be ignored).
 %
 For the special case of \tis, the second step is not necessary since every block contains a single tuple.
 %Alternatively, one can think of $\rpoly$ as the \abbrSMB of $\poly(\vct{X})$ when the product operator is idempotent.
@ -138,7 +145,7 @@ Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blo
 %
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Definition}[Valid Worlds]
-For probability distribution $\probDist$, % and its corresponding probability mass function $\probOf$, 
+For probability distribution $\probDist$, % and its corresponding probability mass function $\probOf$,
 the set of valid worlds $\eta$ consists of all the worlds with probability value greater than $0$; i.e., for variable vector $\vct{W}$
 \[
 \eta = \comprehension{\vct{w}}{\probOf[\vct{W} = \vct{w}] > 0}
@ -168,8 +175,8 @@ Let $\pxdb$ be a \bi over variables $\vct{X} = \{X_1, \ldots, X_\numvar\}$ and w
 \end{Lemma}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-Note that in the preceding lemma, we have assigned $\vct{p}$ 
-%(introduced in \Cref{subsec:def-data}) 
+Note that in the preceding lemma, we have assigned $\vct{p}$
+%(introduced in \Cref{subsec:def-data})
 to the variables $\vct{X}$. Intuitively, \Cref{lem:exp-poly-rpoly} states that when we replace each variable $X_i$ with its probability $\prob_i$ in the reduced form of a \bi-lineage polynomial and evaluate the resulting expression in $\mathbb{R}$, then the result is the expectation of the polynomial.


--- a/prob-def.tex
+++ b/prob-def.tex
@ -10,10 +10,10 @@ For illustrative purposes consider the polynomial $\poly(\vct{X}) = 2X^2 + 3XY -
 We represent query polynomials via {\em arithmetic circuits}~\cite{arith-complexity}, a standard way to represent polynomials over fields (particularly in the field of algebraic complexity) that we use for polynomials over $\mathbb N$ in the obvious way.

 \begin{Definition}[Circuit]\label{def:circuit}
-A circuit $\circuit$ is a Directed Acyclic Graph (DAG) whose source nodes (in degree of $0$) consist of elements in either $\reals$ or $\vct{X}$.  The internal nodes and (the single) sink node of $\circuit$ (corresponding to the result tuple $t$) have binary input and are either sum ($\circplus$) or product ($\circmult$) gates.  
+A circuit $\circuit$ is a Directed Acyclic Graph (DAG) whose source nodes (in degree of $0$) consist of elements in either $\reals$ or $\vct{X}$.  The internal nodes and (the single) sink node of $\circuit$ (corresponding to the result tuple $t$) have binary input and are either sum ($\circplus$) or product ($\circmult$) gates.

-$\circuit$ additionally has the following members: \type, \vari{val}, \vari{partial}, \vari{input}, \degval and \vari{Lweight}, \vari{Rweight}, where \type is the type of value stored in the node $\circuit$ (i.e. one of $\{\circplus, \circmult, \var, \tnum\}$, \val is the value stored (a constant or variable), and \vari{input} is the list of \circuit 's inputs where $\circuit_\linput$ is the left input and $\circuit_\rinput$ the right input.  
-%The member \degval holds the degree of \circuit.  
+$\circuit$ additionally has the following members: \type, \vari{val}, \vari{partial}, \vari{input}, \degval and \vari{Lweight}, \vari{Rweight}, where \type is the type of value stored in the node $\circuit$ (i.e. one of $\{\circplus, \circmult, \var, \tnum\}$, \val is the value stored (a constant or variable), and \vari{input} is the list of \circuit 's inputs where $\circuit_\linput$ is the left input and $\circuit_\rinput$ the right input.
+%The member \degval holds the degree of \circuit.
 When the underlying DAG is a tree (with edges pointing towards the root), we will refer to the structure as an expression tree \etree.  Note that in such a case, the root of \etree is analogous to the sink of \circuit.
 \end{Definition}

@ -21,7 +21,7 @@ When the underlying DAG is a tree (with edges pointing towards the root), we wil


 As stated in \Cref{def:circuit}, every internal node has at most two in-edges, is labeled as an addition or a multiplication node, and has no limit on its outdegree.
-Note that if we limit the outdegree to one, then we get expression trees. 
+Note that if we limit the outdegree to one, then we get expression trees.
 We ignore the fields \vari{partial}, \vari{Lweight}, and \vari{Rweight} until \Cref{sec:algo}.


@ -37,12 +37,12 @@ The circuit \circuit in \Cref{fig:circuit-express-tree} encodes the polynomial $
 			\node[tree_node] (b1) at (1, 0){$\boldsymbol{Y}$};
 			\node[tree_node] (c1) at (2, 0){$\boldsymbol{W}$};
 			\node[tree_node] (d1) at (3, 0){$\boldsymbol{Z}$};
-	
+
 			\node[tree_node] (a2) at (0.5, 1){$\boldsymbol{\circmult}$};
 			\node[tree_node] (b2) at (2.5, 1){$\boldsymbol{\circmult}$};
-	
+
 			\node[tree_node] (a3) at (1.5, 2){$\boldsymbol{\circplus}$};
-	
+
 			\draw[->] (a1) -- (a2);
 			\draw[->] (b1) -- (a2);
 			\draw[->] (c1) -- (b2);
@ -62,16 +62,16 @@ The circuit \circuit in \Cref{fig:circuit-express-tree} encodes the polynomial $
 			\node[tree_node] (b1) at (1.5, 0) {$\boldsymbol{2}$};
 			\node[tree_node] (c1) at (3, 0) {$\boldsymbol{Y}$};
 			\node[tree_node] (d1) at (4.5, 0) {$\boldsymbol{-1}$};
-	
+
 			\node[tree_node] (a2) at (0.75, 0.75) {$\boldsymbol{\circmult}$};
 			\node[tree_node] (b2) at (2.25, 0.75) {$\boldsymbol{\circmult}$};
 			\node[tree_node] (c2) at (3.75, 0.75) {$\boldsymbol{\circmult}$};
-	
+
 			\node[tree_node] (a3) at (0.55, 1.5) {$\boldsymbol{\circplus}$};
 			\node[tree_node] (b3) at (3.75, 1.5) {$\boldsymbol{\circplus}$};
-	
-			\node[tree_node] (a4) at (2.25, 2.25) {$\boldsymbol{\circmult}$};		
-	
+
+			\node[tree_node] (a4) at (2.25, 2.25) {$\boldsymbol{\circmult}$};
+
 			\draw[->] (a1) -- (a2);
 			\draw[->, thick] (a1) -- (a3);
 			\draw[->] (b1) -- (a2);
@ -89,7 +89,7 @@ The circuit \circuit in \Cref{fig:circuit-express-tree} encodes the polynomial $
 		\caption{Circuit encoding of $(X + 2Y)(2X - Y)$}
 		\label{fig:circuit}
 	\end{subfigure}
-	\caption{ }
+	\caption{Example circuit encodings}
 \end{figure}


@ -113,8 +113,8 @@ $\circuitset{\smb}$ is the set of all possible circuits $\circuit$ such that $\p

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-The circuit of \Cref{fig:circuit} is an element of $\circuitset{2X^2+3XY-2Y^2}$.  One can think of $\circuitset{\smb}$ as the infinite set of circuits each of which model an encoding (factorization) equal to $\polyf(\circuit)$.   
-%\supset \{2X^2 + 3XY - 2Y^2, (X + 2Y)(2X - Y), X(2X - Y) + 2Y(2X - Y), 2X(X + 2Y) - Y(X + 2Y)\}$.  
+The circuit of \Cref{fig:circuit} is an element of $\circuitset{2X^2+3XY-2Y^2}$.  One can think of $\circuitset{\smb}$ as the infinite set of circuits each of which model an encoding (factorization) equal to $\polyf(\circuit)$.
+%\supset \{2X^2 + 3XY - 2Y^2, (X + 2Y)(2X - Y), X(2X - Y) + 2Y(2X - Y), 2X(X + 2Y) - Y(X + 2Y)\}$.
 Note that \Cref{def:circuit-set} implies that $\circuit \in \circuitset{\polyf(\circuit)}$.

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%