An algorithm for LC
This commit is contained in:
parent
f6ba1e8a2a
commit
bd13ad5569
153
appendix.tex
153
appendix.tex
|
@ -62,74 +62,140 @@ encodes a polynomial, realized as
|
|||
\end{cases}\]
|
||||
|
||||
|
||||
We define the circuit for a select-union-project-join $Q$ recursively by cases as follows. In each case, let $\tuple{V_{Q_i,\pxdb}, E_{Q_i,\pxdb}, \phi_{Q_{i},\pxdb}, \ell_{Q_i,\pxdb}}$ denote the circuit for subquery $Q_i$.
|
||||
We define the circuit for a select-union-project-join $Q$ recursively by cases as follows. In each case, let $\tuple{V_{Q_i,\pxdb}, E_{Q_i,\pxdb}, \phi_{Q_{i},\pxdb}, \ell_{Q_i,\pxdb}}$ denote the circuit for subquery $Q_i$. We implicitly include in all circuits a global zero node $v_0$ s.t., $\ell_{Q, \pxdb}(v_0) = 0$ for any $Q,\pxdb$.
|
||||
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{\abbrStepOne$(\query, \dbbase)$}
|
||||
\label{alg:lc}
|
||||
\begin{algorithmic}[1]
|
||||
\Require $\query$: query
|
||||
\Require $\dbbase$: a \dbbaseName
|
||||
\Ensure $\circuit = \tuple{E, V, \ell, \phi}$: a circuit encoding the lineage of each tuple in $\query(\dbbase)$
|
||||
|
||||
\If{$\query$ is $R$}
|
||||
\State $V = \comprehension{v_t}{t \in \dbbase.R}$
|
||||
\State $E = \emptyset$
|
||||
\For{$t \in \dbbase.R$}
|
||||
\State $\phi(t) = v_t$ \Comment{$v_t$ as defined above}
|
||||
\State $\ell(v_t) = R(t)$
|
||||
\EndFor
|
||||
\ElsIf{$\query$ is $\sigma_\theta(\query')$}
|
||||
\State $\tuple{V, E, \phi', \ell} = \abbrStepOne(\query', \dbbase)$
|
||||
\For{$t \in \dbbase.R$}
|
||||
\If{$\theta(t)$}
|
||||
\State $\phi(t) = \phi'(t)$
|
||||
\Else
|
||||
\State $\phi(t) = v_0$
|
||||
\EndIf
|
||||
\EndFor
|
||||
\ElsIf{$\query$ is $\pi_{\vec{A}}(\query')$}
|
||||
\State $\tuple{V', E', \phi', \ell'} = \abbrStepOne(\query', \dbbase)$
|
||||
\State $V = V' \cup \comprehension{v_t}{t \in \pi_{\vec{A}}(\query)}$
|
||||
\State $E = E' \cup \comprehension{(\phi(t'), v_t)}{t \in \pi_{\vec{A}}t', t' \in \query', t \in \pi_{\vec{A}}(\query')}$
|
||||
\Comment{Nodes with in-degrees above 2 are corrected (with logarithmic overhead) with an equivalent fan-in tree.}
|
||||
\For{$t \in \pi_{\vec{A}}(\query')$}
|
||||
\State $\phi(t) = v_t$ \Comment{$v_t$ as defined above}
|
||||
\State $\ell(v_t) = +$
|
||||
\EndFor
|
||||
\ElsIf{$\query$ is $\query_1 \cup \query_2$}
|
||||
\State $\tuple{V_1, E_1, \phi_1, \ell_1} = \abbrStepOne(\query_1, \dbbase)$
|
||||
\State $\tuple{V_2, E_2, \phi_2, \ell_2} = \abbrStepOne(\query_2, \dbbase)$
|
||||
\State $V = V_1 \cup V_2 \cup \comprehension{v_t}{t \in \query_1 \cap \query_2}$
|
||||
\State $E = E_1 \cup E_2 \cup \comprehension{(\phi_1(t), v_t), (\phi_2(t), v_t)}{t \in \query_1 \cap \query_2}$
|
||||
\State $\phi = \phi_1 \cup \phi_2$
|
||||
\State $\ell = \ell_1 \cup \ell_2$
|
||||
\For{$t \in \query_1 \cap \query_2$}
|
||||
\State $\phi(t) = v_t$ \Comment{$v_t$ as defined above}
|
||||
\State $\ell(v_t) = +$
|
||||
\EndFor
|
||||
\ElsIf{$\query$ is $\query_1 \bowtie \ldots \bowtie \query_k$}
|
||||
\For{$i \in [1, k]$}
|
||||
$\tuple{V_i, E_i, \phi_i, \ell_i} = \abbrStepOne(\query_i, \dbbase)$
|
||||
\EndFor
|
||||
\State $V = V_1 \cup \ldots \cup V_k \cup \comprehension{v_t}{t \in \query_1 \bowtie \ldots \bowtie \query_k}$
|
||||
\State $E = E_1 \cup \ldots \cup E_k \cup \bigcup_{i \in [1,k]}
|
||||
\comprehension{(\phi_i(\pi_{sch}(\query_i)(t))}{t \in \query_1 \bowtie \ldots \bowtie \query_k}$\Comment{Nodes with in-degrees above 2 are corrected (with $\log_2(k)$ overhead) with an equivalent fan-in tree.}
|
||||
\State $\phi = \phi_1 \cup \ldots \cup \phi_k$
|
||||
\State $\ell = \ell_1 \cup \ldots \cup \phi_k$
|
||||
\For{$t \in \query_1 \bowtie \ldots \bowtie \query_k$}
|
||||
\State $\phi(t) = v_t$
|
||||
\State $\ell(v_t) = \times$
|
||||
\EndFor
|
||||
|
||||
\EndIf
|
||||
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
|
||||
\Cref{alg:lc} defines how the circuit for a query result is constructed. We quickly review the number of vertices emitted in each case.
|
||||
|
||||
\caseheading{Base Relation}
|
||||
Let $Q$ be a base relation $R$. We define one node for each tuple. Formally, let $V_{Q,\pxdb} = \comprehension{v_t}{t\in R}$, let $\phi_{Q,\pxdb}(t) = v_t$, let $\ell_{Q,\pxdb}(v_t) = R(t)$, and let $E_{Q,\pxdb} = \emptyset$.
|
||||
% Let $Q$ be a base relation $R$. We define one node for each tuple. Formally, let $V_{Q,\pxdb} = \comprehension{v_t}{t\in R}$, let $\phi_{Q,\pxdb}(t) = v_t$, let $\ell_{Q,\pxdb}(v_t) = R(t)$, and let $E_{Q,\pxdb} = \emptyset$.
|
||||
This circuit has $|D_\Omega.R|$ vertices.
|
||||
|
||||
\caseheading{Selection}
|
||||
Let $Q = \sigma_\theta \inparen{Q_1}$.
|
||||
We re-use the circuit for $Q_1$. %, but define a new distinguished node $v_0$ with label $0$ and make it the sink node for all tuples that fail the selection predicate.
|
||||
Formally, let $V_{Q,\pxdb} = V_{Q_1,\pxdb}$, let $\ell_{Q,\pxdb}(v_0) = 0$, and let $\ell_{Q,\pxdb}(v) = \ell_{Q_1,\pxdb}(v)$ for any $v \in V_{Q_1,\pxdb}$. Let $E_{Q,\pxdb} = E_{Q_1,\pxdb}$, and define
|
||||
$$\phi_{Q,\pxdb}(t) =
|
||||
\phi_{Q_{1}, \pxdb}(t) \text{ for } t \text{ s.t.}\; \theta(t).$$
|
||||
Dead sinks are iteratively removed, and so
|
||||
%\AH{While not explicit, I assume a reviewer would know that the notation above discards tuples/vertices not satisfying the selection predicate.}
|
||||
%v_0 & \textbf{otherwise}
|
||||
%\end{cases}$$
|
||||
% Let $Q = \sigma_\theta \inparen{Q_1}$.
|
||||
% We re-use the circuit for $Q_1$. %, but define a new distinguished node $v_0$ with label $0$ and make it the sink node for all tuples that fail the selection predicate.
|
||||
% Let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup \{v_0\}$, and let $\ell_{Q,\pxdb}(v) = \ell_{Q_1,\pxdb}(v)$ for any $v \in V_{Q_1,\pxdb}$. Let $E_{Q,\pxdb} = E_{Q_1,\pxdb}$, and define
|
||||
% $$\phi_{Q,\pxdb}(t) =
|
||||
% \phi_{Q_{1}, \pxdb}(t) \text{ for } t \text{ s.t.}\; \theta(t) \text{ and } \phi_{Q,\pxdb}(t) = v_0 \text{ otherwise}.$$
|
||||
If we assume dead sinks are iteratively garbage collected,
|
||||
this circuit has at most $|V_{Q_1,\pxdb}|$ vertices.
|
||||
|
||||
\caseheading{Projection}
|
||||
Let $Q = \pi_{\vct A} {Q_1}$.
|
||||
We extend the circuit for ${Q_1}$ with a new set of sum vertices (i.e., vertices with label $+$) for each tuple in $Q$, and connect them to the corresponding sink nodes of the circuit for ${Q_1}$.
|
||||
Naively, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup \comprehension{v_t}{t \in \pi_{\vct A} {Q_1}}$, let $\phi_{Q,\pxdb}(t) = v_t$, and let $\ell_{Q,\pxdb}(v_t) = +$. Finally let
|
||||
$$E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup \comprehension{(\phi_{Q_{1}, \pxdb}(t'), v_t)}{t = \pi_{\vct A} t', t' \in {Q_1}, t \in \pi_{\vct A} {Q_1}}$$
|
||||
% Let $Q = \pi_{\vct A} {Q_1}$.
|
||||
% We extend the circuit for ${Q_1}$ with a new set of sum vertices (i.e., vertices with label $+$) for each tuple in $Q$, and connect them to the corresponding sink nodes of the circuit for ${Q_1}$.
|
||||
% Naively, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup \comprehension{v_t}{t \in \pi_{\vct A} {Q_1}}$, let $\phi_{Q,\pxdb}(t) = v_t$, and let $\ell_{Q,\pxdb}(v_t) = +$. Finally let
|
||||
% $$E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup \comprehension{(\phi_{Q_{1}, \pxdb}(t'), v_t)}{t = \pi_{\vct A} t', t' \in {Q_1}, t \in \pi_{\vct A} {Q_1}}$$
|
||||
This formulation will produce vertices with an in-degree greater than two, a problem that we correct by replacing every vertex with an in-degree over two by an equivalent fan-in tree. The resulting structure has at most $|{Q_1}|-1$ new vertices.
|
||||
% \AH{Is the rightmost operator \emph{supposed} to be a $-$? In the beginning we add $|\pi_{\vct A}{Q_1}|$ vertices.}
|
||||
The corrected circuit thus has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
|
||||
|
||||
\caseheading{Union}
|
||||
Let $Q = {Q_1} \cup {Q_2}$.
|
||||
We merge graphs and produce a sum vertex for all tuples in both sides of the union.
|
||||
Formally, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup V_{Q_2,\pxdb} \cup \comprehension{v_t}{t \in {Q_1} \cap {Q_2}}$, let $\ell_{Q,\pxdb}(v_t) = +$, and let
|
||||
\[E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup E_{Q_2,\pxdb} \cup \comprehension{(\phi_{Q_{1}, \pxdb}(t), v_t), (\phi_{Q_{2}, \pxdb}(t), v_t)}{t \in {Q_1} \cap {Q_2}}\]
|
||||
\[
|
||||
\phi_{Q,\pxdb}(t) = \begin{cases}
|
||||
v_t & \textbf{if } t \in {Q_1} \cap {Q_1}\\
|
||||
\phi_{Q_{1}, \pxdb}(t) & \textbf{if } t \not \in {Q_2}\\
|
||||
\phi_{Q_{2}, \pxdb}(t) & \textbf{if } t \not \in {Q_1}\\
|
||||
\end{cases}\]
|
||||
% Let $Q = {Q_1} \cup {Q_2}$.
|
||||
% We merge graphs and produce a sum vertex for all tuples in both sides of the union.
|
||||
% Formally, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup V_{Q_2,\pxdb} \cup \comprehension{v_t}{t \in {Q_1} \cap {Q_2}}$, let $\ell_{Q,\pxdb}(v_t) = +$, and let
|
||||
% \[E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup E_{Q_2,\pxdb} \cup \comprehension{(\phi_{Q_{1}, \pxdb}(t), v_t), (\phi_{Q_{2}, \pxdb}(t), v_t)}{t \in {Q_1} \cap {Q_2}}\]
|
||||
% \[
|
||||
% \phi_{Q,\pxdb}(t) = \begin{cases}
|
||||
% v_t & \textbf{if } t \in {Q_1} \cap {Q_1}\\
|
||||
% \phi_{Q_{1}, \pxdb}(t) & \textbf{if } t \not \in {Q_2}\\
|
||||
% \phi_{Q_{2}, \pxdb}(t) & \textbf{if } t \not \in {Q_1}\\
|
||||
% \end{cases}\]
|
||||
This circuit has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ vertices.
|
||||
|
||||
\caseheading{$k$-ary Join}
|
||||
Let $Q = {Q_1} \bowtie \ldots \bowtie {Q_k}$.
|
||||
We merge graphs and produce a multiplication vertex for all tuples resulting from the join
|
||||
Naively, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup \ldots \cup V_{Q_k,\pxdb} \cup \comprehension{v_t}{t \in {Q_1} \bowtie \ldots \bowtie {Q_k}}$, let
|
||||
{\small
|
||||
\begin{multline*}
|
||||
E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup \ldots \cup E_{Q_k,\pxdb} \cup
|
||||
\left\{\;
|
||||
(\phi_{Q_{1}, \pxdb}(\pi_{\sch({Q_1})}t), v_t), \right.\\
|
||||
\ldots, (\phi_{Q_k,\pxdb}(\pi_{\sch({Q_k})}t), v_t)
|
||||
\;\left|\;t \in {Q_1} \bowtie \ldots \bowtie {Q_k}\;\right\}
|
||||
\end{multline*}
|
||||
}
|
||||
Let $\ell_{Q,\pxdb}(v_t) = \times$, and let $\phi_{Q,\pxdb}(t) = v_t$
|
||||
% Let $Q = {Q_1} \bowtie \ldots \bowtie {Q_k}$.
|
||||
% We merge graphs and produce a multiplication vertex for all tuples resulting from the join
|
||||
% Naively, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup \ldots \cup V_{Q_k,\pxdb} \cup \comprehension{v_t}{t \in {Q_1} \bowtie \ldots \bowtie {Q_k}}$, let
|
||||
% {\small
|
||||
% \begin{multline*}
|
||||
% E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup \ldots \cup E_{Q_k,\pxdb} \cup
|
||||
% \left\{\;
|
||||
% (\phi_{Q_{1}, \pxdb}(\pi_{\sch({Q_1})}t), v_t), \right.\\
|
||||
% \ldots, (\phi_{Q_k,\pxdb}(\pi_{\sch({Q_k})}t), v_t)
|
||||
% \;\left|\;t \in {Q_1} \bowtie \ldots \bowtie {Q_k}\;\right\}
|
||||
% \end{multline*}
|
||||
% }
|
||||
% Let $\ell_{Q,\pxdb}(v_t) = \times$, and let $\phi_{Q,\pxdb}(t) = v_t$
|
||||
As in projection, newly created vertices will have an in-degree of $k$, and a fan-in tree is required.
|
||||
There are $|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ such vertices, so the corrected circuit has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{Lemma}\label{lem:circ-model-runtime}
|
||||
\label{lem:circuits-model-runtime}
|
||||
Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\dbbase$, and query plan $Q$, the runtime of $Q$ over $\dbbase$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq (k-1)\qruntime{Q, \dbbase}$, where $k$ is the maximal degree of any polynomial in $Q(\pxdb)$.
|
||||
Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\dbbase$, and query plan $Q$, the runtime of $Q$ over $\dbbase$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq (k-1)\qruntime{Q, \dbbase}+1$, where $k$ is the maximal degree of any polynomial in $Q(\pxdb)$.
|
||||
\end{Lemma}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%\noindent The proof is shown in \Cref{app:subsec-lem-lin-vs-qplan}.
|
||||
|
||||
%\subsection{Proof for \Cref{lem:circuits-model-runtime}}\label{app:subsec-lem-lin-vs-qplan}
|
||||
\begin{proof}
|
||||
Proof by induction. The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |D_\Omega.R|$.
|
||||
We prove by induction that $\abs{V_{Q,\pxdb} - \{v_0\}} \leq (k-1)\qruntime{Q, \dbbase}$. For clarity, we implicitly exclude $v_0$ in the proof below.
|
||||
|
||||
The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |D_\Omega.R|$.
|
||||
For the inductive step, we assume that we have circuits for subplans $Q_1, \ldots, Q_n$ such that $|V_{Q_i,\pxdb}| \leq (k_i-1)\qruntime{Q_i,\dbbase}$ where $k_i$ is the degree of $Q_i$.
|
||||
|
||||
\caseheading{Selection}
|
||||
|
@ -182,7 +248,12 @@ The property holds for all recursive queries, and the proof holds.
|
|||
\qed
|
||||
\end{proof}
|
||||
|
||||
With \cref{lem:circ-model-runtime} and our upper bound results on \approxq, we now have all the pieces to argue that using our approximation algorithm, the expected multiplicities of an $\raPlus$ query can be computed in essentially the same runtime as deterministic query processing for the same query, proving claim (iv) of the Introduction.
|
||||
We next need to show that we can construct the circuit in time linear in the deterministic runtime.
|
||||
\begin{lemma}\label{lem:tlc-is-the-same-as-det}
|
||||
Given a query $\query$ over a \dbbaseName $\dbbase$, the runtime $\timeOf{\abbrStepOne}(\query,\dbbase,\circuit) \le O(\qruntime{\query, \dbbase})$
|
||||
\end{lemma}
|
||||
|
||||
With \Cref{lem:circ-model-runtime,lem:tlc-is-the-same-as-det} and our upper bound results on \approxq, we now have all the pieces to argue that using our approximation algorithm, the expected multiplicities of an $\raPlus$ query can be computed in essentially the same runtime as deterministic query processing for the same query, proving claim (iv) of the Introduction.
|
||||
|
||||
\section{Proof of \Cref{cor:cost-model}}
|
||||
\begin{proof}
|
||||
|
|
Loading…
Reference in a new issue