appendix D

This commit is contained in:
Boris Glavic 2020-12-20 17:50:56 -06:00
parent 5526ba5334
commit cc7c5fdb8a

View file

@ -1,7 +1,8 @@
%!TEX root=./main.tex
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Missing details from Section~\ref{sec:background}}\label{sec:proofs-background}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Supplementary Material for~\Cref{prop:expection-of-polynom}}\label{subsec:supp-mat-background}
To justify the use of $\semNX$-databases, we need to show that we can encode any $\semN$-PDB in this way and that the query semantics over this representation coincides with query semantics over $\semN$-PDB. For that it will be opportune to define representation systems for $\semN$-PDBs.\BG{cite}
@ -608,7 +609,7 @@ It is easy to check that except for~\Cref{alg:sample-times-union}, all other lin
More specifically consider $\onepass$. The algorithm (as well as its analysis) basically uses the fact that one can compute the corresponding polynomial at all $1$s input with a simple recursive formula (\cref{eq:T-all-ones}), and that we can compute a probability distribution based on these weights (as in~\cref{eq:T-weights}). It can be verified that all the arguments go through if we replace $\etree_\lchild$ and $\etree_\rchild$ for expression tree $\etree$ with the two incoming nodes of the sink for the given lineage circuit. Another way to look at this is we could `unroll' the recursion in $\onepass$ and think of the algorithm as doing the evaluation at each node bottom up from leaves to the root in the expression tree. For lineage circuits, we start from the source nodes and do the computation in the topological order till we reach the sink(s).
The argument for $\sampmon$ is similar. Since we argued that $\onepass$ works as intended for lineage circuits since~\Cref{alg:one-pass} only recurses on children of the current node in the expression tree and we can generalize it to lineage circuits by recursing to the two children of the current node in the lineage circuit. Alternatively, as we have already used in the proof of~\Cref{lem:sample}, we can think of the sampling algorithm sampling a sub-graph of the expression tree. For lineage circuits, we can think of $\sampmon$ as sampling the same sub-graph. Alternatively, one can implicitly expand the circuit lineage into a (larger but) equivalent expression tree. Since $\sampmon$ only explores one sub-graph during its run we can think of its run on a lineage circuit as being done on the implicit equivalent expression tree\footnote{
The argument for $\sampmon$ is similar. Since we argued that $\onepass$ works as intended for lineage circuits since~\Cref{alg:one-pass} only recurses on children of the current node in the expression tree and we can generalize it to lineage circuits by recursing to the two children of the current node in the lineage circuit. Alternatively, as we have already used in the proof of~\Cref{lem:sample}, we can think of the sampling algorithm as sampling a sub-graph of the expression tree. For lineage circuits, we can think of $\sampmon$ as sampling the same sub-graph. Alternatively, one can implicitly expand the circuit lineage into a (larger but) equivalent expression tree. Since $\sampmon$ only explores one sub-graph during its run we can think of its run on a lineage circuit as being done on the implicit equivalent expression tree\footnote{
Recall that $\sampmon$ scales only in the depth of the expression and its polynomial degree ($k$). There exist polynomials that can be encoded in size $\Omega(\log k)$, but we follow convention in assuming that the circuit size is asymptotically larger than $k$ and thus treat the degree (i.e., join width) as a constant.
}. Hence, all of the results on $\sampmon$ on expression trees carry over to lineage circuits.
@ -616,122 +617,130 @@ Thus, we have argued that~\Cref{lem:approx-alg} also holds if we use a lineage c
\subsection{Representing Polynomials with Lineage Circuits}\label{app:subsec-rep-poly-lin-circ}
\newcommand{\getpoly}[1]{\textbf{lin}\inparen{#1}}
Each vertex $v \in V_Q$ in the arithmetic circuit for $\tuple{V_Q, E_Q, \phi_Q, \ell_Q}$ encodes a polynomial, realized as
Each vertex $v \in V_{Q,\pxdb}$ in the arithmetic circuit for
$$\getpoly{v} = \begin{cases}
\sum_{v' : (v',v) \in E_Q} \getpoly{v'} & \textbf{if } \ell(v) = +\\
\prod_{v' : (v',v) \in E_Q} \getpoly{v'} & \textbf{if } \ell(v) = \times\\
\[\tuple{V_{Q,\pxdb}, E_{Q,\pxdb}, \phi_{Q,\pxdb}, \ell_{Q,\pxdb}}\]
encodes a polynomial, realized as
\[\getpoly{v} = \begin{cases}
\sum_{v' : (v',v) \in E_{Q,\pxdb}} \getpoly{v'} & \textbf{if } \ell(v) = +\\
\prod_{v' : (v',v) \in E_{Q,\pxdb}} \getpoly{v'} & \textbf{if } \ell(v) = \times\\
\ell(v) & \textbf{otherwise}
\end{cases}$$
\end{cases}\]
We define the circuit for a select-union-project-join $Q$ recursively by cases as follows. In each case, let $\tuple{V_{Q_i}, E_{Q_i}, \phi_{Q_i}, \ell_{Q_i}}$ denote the circuit for subquery $Q_i$.
We define the circuit for a select-union-project-join $Q$ recursively by cases as follows. In each case, let $\tuple{V_{Q_i,\pxdb}, E_{Q_i,\pxdb}, \phi_{Q_{i},\pxdb}, \ell_{Q_i,\pxdb}}$ denote the circuit for subquery $Q_i$.
\caseheading{Base Relation}
Let $Q$ be a base relation $R$. We define one node for each tuple. Formally, let $V_Q = \comprehension{v_t}{t\in R}$, let $\phi_Q(t) = v_t$, let $\ell_Q(v_t) = R(t)$, and let $E_Q = \emptyset$.
Let $Q$ be a base relation $R$. We define one node for each tuple. Formally, let $V_{Q,\pxdb} = \comprehension{v_t}{t\in R}$, let $\phi_{Q,\pxdb}(t) = v_t$, let $\ell_{Q,\pxdb}(v_t) = R(t)$, and let $E_{Q,\pxdb} = \emptyset$.
This circuit has $|R|$ vertices.
\caseheading{Selection}
Let $Q = \sigma_\theta \inparen{Q_1}$.
We re-use the circuit for $Q_1$. %, but define a new distinguished node $v_0$ with label $0$ and make it the sink node for all tuples that fail the selection predicate.
Formally, let $V_Q = V_{Q_1}$, let $\ell_Q(v_0) = 0$, and let $\ell_Q(v) = \ell_{Q_1}(v)$ for any $v \in V_{Q_1}$. Let $E_Q = E_{Q_1}$, and define
$$\phi_Q(t) =
\phi_{Q_1}(t) \text{ for } t \text{ s.t.}\; \theta(t).$$
Formally, let $V_{Q,\pxdb} = V_{Q_1,\pxdb}$, let $\ell_{Q,\pxdb}(v_0) = 0$, and let $\ell_{Q,\pxdb}(v) = \ell_{Q_1,\pxdb}(v)$ for any $v \in V_{Q_1,\pxdb}$. Let $E_{Q,\pxdb} = E_{Q_1,\pxdb}$, and define
$$\phi_{Q,\pxdb}(t) =
\phi_{Q_{1}, \pxdb}(t) \text{ for } t \text{ s.t.}\; \theta(t).$$
Dead sinks are iteratively removed, and so
%\AH{While not explicit, I assume a reviewer would know that the notation above discards tuples/vertices not satisfying the selection predicate.}
%v_0 & \textbf{otherwise}
%\end{cases}$$
this circuit has at most $|V_{Q_1}|$ vertices.
this circuit has at most $|V_{Q_1,\pxdb}|$ vertices.
\caseheading{Projection}
Let $Q = \pi_{\vct A} {Q_1}$.
We extend the circuit for ${Q_1}$ with a new set of sum vertices (i.e., vertices with label $+$) for each tuple in $Q$, and connect them to the corresponding sink nodes of the circuit for ${Q_1}$.
Naively, let $V_Q = V_{Q_1} \cup \comprehension{v_t}{t \in \pi_{\vct A} {Q_1}}$, let $\phi_Q(t) = v_t$, and let $\ell_Q(v_t) = +$. Finally let
$$E_Q = E_{Q_1} \cup \comprehension{(\phi_{Q_1}(t'), v_t)}{t = \pi_{\vct A} t', t' \in {Q_1}, t \in \pi_{\vct A} {Q_1}}$$
Naively, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup \comprehension{v_t}{t \in \pi_{\vct A} {Q_1}}$, let $\phi_{Q,\pxdb}(t) = v_t$, and let $\ell_{Q,\pxdb}(v_t) = +$. Finally let
$$E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup \comprehension{(\phi_{Q_{1}, \pxdb}(t'), v_t)}{t = \pi_{\vct A} t', t' \in {Q_1}, t \in \pi_{\vct A} {Q_1}}$$
This formulation will produce vertices with an in-degree greater than two, a problem that we correct by replacing every vertex with an in-degree over two by an equivalent fan-in tree. The resulting structure has at most $|{Q_1}|-1$ new vertices.
% \AH{Is the rightmost operator \emph{supposed} to be a $-$? In the beginning we add $|\pi_{\vct A}{Q_1}|$ vertices.}
The corrected circuit thus has at most $|V_{Q_1}|+|{Q_1}|$ vertices.
The corrected circuit thus has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
\caseheading{Union}
Let $Q = {Q_1} \cup {Q_2}$.
We merge graphs and produce a sum vertex for all tuples in both sides of the union.
Formally, let $V_Q = V_{Q_1} \cup V_{Q_2} \cup \comprehension{v_t}{t \in {Q_1} \cap {Q_2}}$, let $\ell_Q(v_t) = +$, and let
$$E_Q = E_{Q_1} \cup E_{Q_2} \cup \comprehension{(\phi_{Q_1}(t), v_t), (\phi_{Q_2}(t), v_t)}{t \in {Q_1} \cap {Q_2}}$$
$$\phi_Q(t) = \begin{cases}
Formally, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup V_{Q_2,\pxdb} \cup \comprehension{v_t}{t \in {Q_1} \cap {Q_2}}$, let $\ell_{Q,\pxdb}(v_t) = +$, and let
\[E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup E_{Q_2,\pxdb} \cup \comprehension{(\phi_{Q_{1}, \pxdb}(t), v_t), (\phi_{Q_{2}, \pxdb}(t), v_t)}{t \in {Q_1} \cap {Q_2}}\]
\[
\phi_{Q,\pxdb}(t) = \begin{cases}
v_t & \textbf{if } t \in {Q_1} \cap {Q_1}\\
\phi_{Q_1}(t) & \textbf{if } t \not \in {Q_2}\\
\phi_{Q_2}(t) & \textbf{if } t \not \in {Q_1}\\
\end{cases}$$
This circuit has $|V_{Q_1}|+|V_{Q_2}|+|{Q_1} \cap {Q_2}|$ vertices.
\phi_{Q_{1}, \pxdb}(t) & \textbf{if } t \not \in {Q_2}\\
\phi_{Q_{2}, \pxdb}(t) & \textbf{if } t \not \in {Q_1}\\
\end{cases}\]
This circuit has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ vertices.
\caseheading{$k$-ary Join}
Let $Q = {Q_1} \bowtie \ldots \bowtie {Q_k}$.
We merge graphs and produce a multiplication vertex for all tuples resulting from the join
Naively, let $V_Q = V_{Q_1} \cup \ldots \cup V_{Q_k} \cup \comprehension{v_t}{t \in {Q_1} \bowtie \ldots \bowtie {Q_k}}$, let
Naively, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup \ldots \cup V_{Q_k,\pxdb} \cup \comprehension{v_t}{t \in {Q_1} \bowtie \ldots \bowtie {Q_k}}$, let
{\small
\begin{multline*}
E_Q = E_{Q_1} \cup \ldots \cup E_{Q_k} \cup
E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup \ldots \cup E_{Q_k,\pxdb} \cup
\left\{\;
(\phi_{Q_1}(\pi_{\sch({Q_1})}t), v_t), \right.\\
\ldots, (\phi_{Q_k}(\pi_{\sch({Q_k})}t), v_t)
(\phi_{Q_{1}, \pxdb}(\pi_{\sch({Q_1})}t), v_t), \right.\\
\ldots, (\phi_{Q_k,\pxdb}(\pi_{\sch({Q_k})}t), v_t)
\;\left|\;t \in {Q_1} \bowtie \ldots \bowtie {Q_k}\;\right\}
\end{multline*}
}
Let $\ell_Q(v_t) = \times$, and let $\phi_Q(t) = v_t$
Let $\ell_{Q,\pxdb}(v_t) = \times$, and let $\phi_{Q,\pxdb}(t) = v_t$
As in projection, newly created vertices will have an in-degree of $k$, and a fan-in tree is required.
There are $|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ such vertices, so the corrected circuit has $|V_{Q_1}|+\ldots+|V_{Q_k}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
There are $|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ such vertices, so the corrected circuit has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Proof for~\Cref{lem:circuits-model-runtime}}\label{app:subsec-lem-lin-vs-qplan}
Proof by induction. The base case is a base relation: $Q = R$ and is trivially true since $|V_R| = |R|$.
For the inductive step, we assume that we have circuits for subplans $Q_1, \ldots, Q_n$ such that $|V_{Q_i}| \leq (k_i-1)\qruntime{Q_i}$ where $k_i$ is the degree of $Q_i$.
Proof by induction. The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |R|$.
For the inductive step, we assume that we have circuits for subplans $Q_1, \ldots, Q_n$ such that $|V_{Q_i,\pxdb}| \leq (k_i-1)\qruntime{Q_i,\pxdb}$ where $k_i$ is the degree of $Q_i$.
\caseheading{Selection}
Assume that $Q = \sigma_\theta(Q_1)$.
In the circuit for $Q$, $|V_Q| = |V_{Q_1}|$ vertices, so from the inductive assumption and $\qruntime{Q} = \qruntime{Q_1}$ by definition, we have $|V_Q| \leq (k-1) \qruntime{Q} $.
In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\pxdb}|$ vertices, so from the inductive assumption and $\qruntime{Q,\pxdb} = \qruntime{Q_1,\pxdb}$ by definition, we have $|V_{Q,\pxdb}| \leq (k-1) \qruntime{Q,\pxdb} $.
% \AH{Technically, $\kElem$ is the degree of $\poly_1$, but I guess this is a moot point since one can argue that $\kElem$ is also the degree of $\poly$.}
% OK: Correct
\caseheading{Projection}
Assume that $Q = \pi_{\vct A}(Q_1)$.
The circuit for $Q$ has at most $|V_{Q_1}|+|{Q_1}|$ vertices.
The circuit for $Q$ has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
% \AH{The combination of terms above doesn't follow the details for projection above.}
\begin{align*}
|V_{Q}| & \leq |V_{Q_1}| + |Q_1|\\
%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1} \geq |Q_1|$}
%& \leq |V_{Q_1}| + 2 \qruntime{Q_1}\\
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\
%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1,\pxdb} \geq |Q_1|$}
%& \leq |V_{Q_1,\pxdb}| + 2 \qruntime{Q_1,\pxdb}\\
\intertext{(From the inductive assumption)}
& \leq (k-1)\qruntime{Q_1} + \abs{Q_1}\\
\intertext{(By definition of $\qruntime{Q}$)}
& \le (k-1)\qruntime{Q}.
& \leq (k-1)\qruntime{Q_1,\pxdb} + \abs{Q_1}\\
\intertext{(By definition of $\qruntime{Q,\pxdb}$)}
& \le (k-1)\qruntime{Q,\pxdb}.
\end{align*}
\caseheading{Union}
Assume that $Q = Q_1 \cup Q_2$.
The circuit for $Q$ has $|V_{Q_1}|+|V_{Q_2}|+|{Q_1} \cap {Q_2}|$ vertices.
The circuit for $Q$ has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ vertices.
\begin{align*}
|V_{Q}| & \leq |V_{Q_1}|+|V_{Q_2}|+|{Q_1}|+|{Q_2}|\\
%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1} \geq |Q_1|$}
%& \leq |V_{Q_1}|+|V_{Q_2}|+\qruntime{Q_1}+\qruntime{Q_2}|\\
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\
%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1,\pxdb} \geq |Q_1|$}
%& \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+\qruntime{Q_1,\pxdb}+\qruntime{Q_2,\pxdb}|\\
\intertext{(From the inductive assumption)}
& \leq (k-1)(\qruntime{Q_1} + \qruntime{Q_2}) + (b_1 + b_2)
\intertext{(By definition of $\qruntime{Q}$)}
& \leq (k-1)(\qruntime{Q}).
& \leq (k-1)(\qruntime{Q_1,\pxdb} + \qruntime{Q_2,\pxdb}) + (b_1 + b_2)
\intertext{(By definition of $\qruntime{Q,\pxdb}$)}
& \leq (k-1)(\qruntime{Q,\pxdb}).
\end{align*}
\caseheading{$k$-ary Join}
Assume that $Q = Q_1 \bowtie \ldots \bowtie Q_k$.
The circuit for $Q$ has $|V_{Q_1}|+\ldots+|V_{Q_k}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
The circuit for $Q$ has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
\begin{align*}
|V_{Q}| & = |V_{Q_1}|+\ldots+|V_{Q_k}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
|V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
\intertext{From the inductive assumption and noting $\forall i: k_i \leq k-1$}
& \leq (k-1)\qruntime{Q_1}+\ldots+(k-1)\qruntime{Q_k}+\\
& \leq (k-1)\qruntime{Q_1,\pxdb}+\ldots+(k-1)\qruntime{Q_k,\pxdb}+\\
&\;\;\; (k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
& \leq (k-1)(\qruntime{Q_1}+\ldots+\qruntime{Q_k}+\\
& \leq (k-1)(\qruntime{Q_1,\pxdb}+\ldots+\qruntime{Q_k,\pxdb}+\\
&\;\;\;|{Q_1} \bowtie \ldots \bowtie {Q_k}|)\\
\intertext{(By definition of $\qruntime{Q}$)}
& = (k-1)\qruntime{Q}.
\intertext{(By definition of $\qruntime{Q,\pxdb}$)}
& = (k-1)\qruntime{Q,\pxdb}.
\end{align*}
The property holds for all recursive queries, and the proof holds.
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End: