Finished cleaning App C; started App D.
This commit is contained in:
parent
775ec53143
commit
1ca2c00cd0
72
appendix.tex
72
appendix.tex
|
@ -13,7 +13,7 @@
|
|||
\section{Missing details from Section~\ref{sec:background}}\label{sec:proofs-background}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Background details for proof of~\Cref{prop:expection-of-polynom}}
|
||||
\subsection{Background details for proof of~\Cref{prop:expection-of-polynom}}\label{app:subsec:background-nxdbs}
|
||||
\subsubsection{$\semK$-relations and \abbrNXPDB\xplural}\label{subsec:supp-mat-background}\label{subsec:supp-mat-krelations}
|
||||
\input{app_k-relations}
|
||||
\input{app_notation-background}
|
||||
|
@ -39,7 +39,7 @@
|
|||
\label{sec:circuits-formal}
|
||||
We now formalize circuits and the construction of circuits for $\raPlus$ queries.
|
||||
As mentioned earlier, we represent lineage polynomials as arithmetic circuits over $\mathbb N$-valued variables with $+$, $\times$.
|
||||
A circuit for query $Q$ and \abbrNXPDB $\pxdb$ is a directed acyclic graph $\tuple{V_{Q,\pxdb}, E_{Q,\pxdb}, \phi_{Q,\pxdb}, \ell_{Q,\pxdb}}$ with vertices $V_{Q,\pxdb}$ and directed edges $E_{Q,\pxdb} \subset {V_{Q,\pxdb}}^2$.
|
||||
A circuit for query $Q$ and \abbrNXPDB $\pxdb$ \footnote{For background on \abbrNXPDB\xplural, see~\Cref{app:subsec:background-nxdbs}} is a directed acyclic graph $\tuple{V_{Q,\pxdb}, E_{Q,\pxdb}, \phi_{Q,\pxdb}, \ell_{Q,\pxdb}}$ with vertices $V_{Q,\pxdb}$ and directed edges $E_{Q,\pxdb} \subset {V_{Q,\pxdb}}^2$.
|
||||
The sink function $\phi_{Q,\pxdb} : \udom^n \rightarrow V_{Q,\pxdb}$ is a partial function that maps the tuples of the $n$-ary relation $Q(\pxdb)$ to vertices.
|
||||
We require that $\phi_{Q,\pxdb}$'s range be limited to sink vertices (i.e., vertices with out-degree 0).
|
||||
A function $\ell_{Q,\pxdb} : V_{Q,\pxdb} \rightarrow \{\;+,\times\;\}\cup \mathbb N \cup \vct X$ assigns a label to each node: Source nodes (i.e., vertices with in-degree 0) are labeled with constants or variables (i.e., $\mathbb N \cup \vct X$), while the remaining nodes are labeled with the symbol $+$ or $\times$.
|
||||
|
@ -54,7 +54,8 @@ Note that we can construct circuits for \bis in time linear in the time required
|
|||
|
||||
We now connect the size of a circuit (where the size of a circuit is the number of vertices in the corresponding DAG)
|
||||
for a given $\raPlus$ query $Q$ and \abbrNXPDB $\pxdb$ to
|
||||
the runtime $\qruntime{Q,\dbbase}$ of the PDB's \dbbaseName $\dbbase$.
|
||||
the runtime $\qruntime{Q,\tupset}$ of the PDB's \dbbaseName $\tupset$.
|
||||
\AH{@atri: do we use $\tupset$ or $\gentupset$ here?}
|
||||
We do this formally by showing that the size of the circuit is asymptotically no worse than the corresponding runtime of a large class of deterministic query processing algorithms.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
@ -76,38 +77,38 @@ We define the circuit for a $\raPlus$ query $\query$ recursively by cases as fol
|
|||
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{\abbrStepOne$(\query, \dbbase, E, V, \ell)$}
|
||||
\caption{\abbrStepOne$(\query, \tupset, E, V, \ell)$}
|
||||
\label{alg:lc}
|
||||
\begin{algorithmic}[1]
|
||||
\Require $\query$: query
|
||||
\Require $\dbbase$: a \dbbaseName
|
||||
\Require $\tupset$: a \dbbaseName
|
||||
\Require $E, V, \ell$: accumulators for the edge list, vertex list, and vertex label list.
|
||||
\Ensure $\circuit = \tuple{E, V, \phi, \ell}$: a circuit encoding the lineage of each tuple in $\query(\dbbase)$
|
||||
\If{$\query$ is $R$} \Comment{\textbf{Case 1}: $\query$ is a relation atom}
|
||||
\For{$t \in \dbbase.R$}
|
||||
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, R(t))\}$ \Comment{Allocate a fresh node $v_t$}
|
||||
\Ensure $\circuit = \tuple{E, V, \phi, \ell}$: a circuit encoding the lineage of each tuple in $\query(\tupset)$
|
||||
\If{$\query$ is $\rel$} \Comment{\textbf{Case 1}: $\query$ is a relation atom}
|
||||
\For{$t \in \tupset.\rel$}
|
||||
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{\inparen{v_t, \rel\inparen{\tup}}\}$ \Comment{Allocate a fresh node $v_t$}
|
||||
\State $\phi(t) \gets v_t$
|
||||
\EndFor
|
||||
\ElsIf{$\query$ is $\sigma_\theta(\query')$} \Comment{\textbf{Case 2}: $\query$ is a Selection}
|
||||
\State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \dbbase, V, E, \ell)$
|
||||
\State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \tupset, V, E, \ell)$
|
||||
\For{$t \in \domain(\phi')$}
|
||||
\State \textbf{if }$\theta(t)$
|
||||
\textbf{ then } $\phi(t) \gets \phi'(t)$
|
||||
\textbf{ else } $\phi(t) \gets v_0$
|
||||
\EndFor
|
||||
\ElsIf{$\query$ is $\pi_{\vec{A}}(\query')$} \Comment{\textbf{Case 3}: $\query$ is a Projection}
|
||||
\State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \dbbase, V, E, \ell)$
|
||||
\For{$t \in \pi_{\vec{A}}(\query'(\dbbase))$}
|
||||
\State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \tupset, V, E, \ell)$
|
||||
\For{$t \in \pi_{\vec{A}}(\query'(\tupset))$}
|
||||
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$\Comment{Allocate a fresh node $v_t$}
|
||||
\State $\phi(t) \leftarrow v_t$
|
||||
\EndFor
|
||||
\For{$t \in \query'(\dbbase)$}
|
||||
\For{$t \in \query'(\tupset)$}
|
||||
\State $E \leftarrow E \cup \{(\phi'(t), \phi(\pi_{\vec{A}}t))\}$
|
||||
\EndFor
|
||||
\State Correct nodes with in-degrees $>2$ by appending an equivalent fan-in two tree instead
|
||||
\ElsIf{$\query$ is $\query_1 \cup \query_2$} \Comment{\textbf{Case 4}: $\query$ is a Bag Union}
|
||||
\State $\tuple{V, E, \phi_1, \ell} \gets \abbrStepOne(\query_1, \dbbase, V, E, \ell)$
|
||||
\State $\tuple{V, E, \phi_2, \ell} \gets \abbrStepOne(\query_2, \dbbase, V, E, \ell)$
|
||||
\State $\tuple{V, E, \phi_1, \ell} \gets \abbrStepOne(\query_1, \tupset, V, E, \ell)$
|
||||
\State $\tuple{V, E, \phi_2, \ell} \gets \abbrStepOne(\query_2, \tupset, V, E, \ell)$
|
||||
\State $\phi \gets \phi_1 \cup \phi_2$
|
||||
\For{$t \in \domain(\phi_1) \cap \domain(\phi_2)$}
|
||||
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$ \Comment{Allocate a fresh node $v_t$}
|
||||
|
@ -116,12 +117,12 @@ We define the circuit for a $\raPlus$ query $\query$ recursively by cases as fol
|
|||
\EndFor
|
||||
\ElsIf{$\query$ is $\query_1 \bowtie \ldots \bowtie \query_m$} \Comment{\textbf{Case 5}: $\query$ is a $m$-ary Join}
|
||||
\For{$i \in [m]$}
|
||||
\State $\tuple{V, E, \phi_i, \ell} \gets \abbrStepOne(\query_i, \dbbase, V, E, \ell)$
|
||||
\State $\tuple{V, E, \phi_i, \ell} \gets \abbrStepOne(\query_i, \tupset, V, E, \ell)$
|
||||
\EndFor
|
||||
\For{$t \in \domain(\phi_1) \bowtie \ldots \bowtie \domain(\phi_m)$}
|
||||
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, \times)\}$ \Comment{Allocate a fresh node $v_t$}
|
||||
\State $\phi(t) \gets v_t$
|
||||
\State $E \leftarrow E \cup \comprehension{(\phi_i(\pi_{sch(\query_i(\dbbase))}(t)), v_t)}{i \in [n]}$
|
||||
\State $E \leftarrow E \cup \comprehension{(\phi_i(\pi_{sch(\query_i(\tupset))}(t)), v_t)}{i \in [n]}$
|
||||
\EndFor
|
||||
\State Correct nodes with in-degrees $>2$ by appending an equivalent fan-in two tree instead
|
||||
|
||||
|
@ -134,7 +135,7 @@ We define the circuit for a $\raPlus$ query $\query$ recursively by cases as fol
|
|||
\Cref{alg:lc} defines how the circuit for a query result is constructed. We quickly review the number of vertices emitted in each case.
|
||||
|
||||
\caseheading{Base Relation}
|
||||
This circuit has $|D_\Omega.R|$ vertices.
|
||||
This circuit has $\abs{\tupset.\rel}$ vertices.
|
||||
|
||||
\caseheading{Selection}
|
||||
If we assume dead sinks are iteratively garbage collected,
|
||||
|
@ -159,7 +160,7 @@ We first show that the depth of the circuit (\depth; \Cref{def:size-depth}) is b
|
|||
|
||||
\begin{Proposition}[Circuit depth is bounded]
|
||||
\label{prop:circuit-depth}
|
||||
Let $\query$ be a relational query and $\dbbase$ be a \dbbaseName with $n$ tuples. There exists a (lineage) circuit $\circuit^*$ encoding the lineage of all tuples $\tup \in \query(\dbbase)$ for which
|
||||
Let $\query$ be a relational query and $\tupset$ be a \dbbaseName with $n$ tuples. There exists a (lineage) circuit $\circuit^*$ encoding the lineage of all tuples $\tup \in \query(\tupset)$ for which
|
||||
$\depth(\circuit^*) \leq O(k|\query|\log(n))$.
|
||||
\end{Proposition}
|
||||
|
||||
|
@ -180,18 +181,19 @@ For the projection case, observe that the fan-in is bounded by $|\query'(\dbbase
|
|||
|
||||
\begin{Lemma}\label{lem:circ-model-runtime}
|
||||
\label{lem:circuits-model-runtime}
|
||||
Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\dbbase$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\dbbase$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{Q, \dbbase}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$.
|
||||
Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\tupset$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\tupset$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{Q, \tupset}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$.
|
||||
\end{Lemma}
|
||||
\AH{Why are the number of vertices considered to be the size of the lineage?}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{proof}
|
||||
We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{Q, \dbbase}$. For clarity, we implicitly exclude $v_0$ in the proof below.
|
||||
We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{Q, \tupset}$. For clarity, we implicitly exclude $v_0$ in the proof below.
|
||||
|
||||
The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\dbbase.R|=\qruntime{R, \dbbase}$ (note that here the degree $k=1$).
|
||||
For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{Q_i,\dbbase}$ where $k_i$ is the degree of $Q_i$.
|
||||
The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\tupset.R|=\qruntime{R, \tupset}$ (note that here the degree $k=1$).
|
||||
For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{Q_i,\tupset}$ where $k_i$ is the degree of $Q_i$.
|
||||
|
||||
\caseheading{Selection}
|
||||
Assume that $Q = \sigma_\theta(Q_1)$.
|
||||
In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\dbbase}|$ vertices, so from the inductive assumption and $\qruntime{Q,\dbbase} = \qruntime{Q_1,\dbbase}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{Q,\dbbase} $.
|
||||
In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\tupset}|$ vertices, so from the inductive assumption and $\qruntime{Q,\tupset} = \qruntime{Q_1,\tupset}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{Q,\tupset} $.
|
||||
|
||||
\caseheading{Projection}
|
||||
Assume that $Q = \pi_{\vct A}(Q_1)$.
|
||||
|
@ -199,9 +201,9 @@ The circuit for $Q$ has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
|
|||
\begin{align*}
|
||||
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\
|
||||
\intertext{(From the inductive assumption)}
|
||||
& \leq k\qruntime{Q_1,\dbbase} + \abs{Q_1}\\
|
||||
\intertext{(By definition of $\qruntime{Q,\dbbase}$)}
|
||||
& \le k\qruntime{Q,\dbbase}.
|
||||
& \leq k\qruntime{Q_1,\tupset} + \abs{Q_1}\\
|
||||
\intertext{(By definition of $\qruntime{Q,\tupset}$)}
|
||||
& \le k\qruntime{Q,\tupset}.
|
||||
\end{align*}
|
||||
\caseheading{Union}
|
||||
Assume that $Q = Q_1 \cup Q_2$.
|
||||
|
@ -209,9 +211,9 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ ver
|
|||
\begin{align*}
|
||||
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\
|
||||
\intertext{(From the inductive assumption)}
|
||||
& \leq k(\qruntime{Q_1,\dbbase} + \qruntime{Q_2,\dbbase}) + (|Q_1| + |Q_2|)
|
||||
\intertext{(By definition of $\qruntime{Q,\dbbase}$)}
|
||||
& \leq k(\qruntime{Q,\dbbase}).
|
||||
& \leq k(\qruntime{Q_1,\tupset} + \qruntime{Q_2,\tupset}) + (|Q_1| + |Q_2|)
|
||||
\intertext{(By definition of $\qruntime{Q,\tupset}$)}
|
||||
& \leq k(\qruntime{Q,\tupset}).
|
||||
\end{align*}
|
||||
|
||||
\caseheading{$m$-ary Join}
|
||||
|
@ -220,12 +222,12 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bow
|
|||
\begin{align*}
|
||||
|V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
|
||||
\intertext{From the inductive assumption and noting $\forall i: k_i \leq k$ and $m\le k$}
|
||||
& \leq k\qruntime{Q_1,\dbbase}+\ldots+k\qruntime{Q_k,\dbbase}+\\
|
||||
& \leq k\qruntime{Q_1,\tupset}+\ldots+k\qruntime{Q_k,\tupset}+\\
|
||||
&\;\;\; (m-1)|{Q_1} \bowtie \ldots \bowtie {Q_m}|\\
|
||||
& \leq k(\qruntime{Q_1,\dbbase}+\ldots+\qruntime{Q_m,\dbbase}+\\
|
||||
&\;\;\;|{Q_1} \bowtie \ldots \bowtie {Q_m}|)\\
|
||||
\intertext{(By definition of $\qruntime{Q,\dbbase}$ and assumption on $\jointime{\cdot}$)}
|
||||
& \le k\qruntime{Q,\dbbase}.
|
||||
& \leq k\left(\qruntime{Q_1,\tupset}+\ldots+\qruntime{Q_m,\tupset}+\right.\\
|
||||
&\;\;\;\left.|{Q_1} \bowtie \ldots \bowtie {Q_m}|\right)\\
|
||||
\intertext{(By definition of $\qruntime{Q,\tupset}$ and assumption on $\jointime{\cdot}$)}
|
||||
& \le k\qruntime{Q,\tupset}.
|
||||
\end{align*}
|
||||
|
||||
The property holds for all recursive queries, and the proof holds.
|
||||
|
|
BIN
main.synctex.gz
BIN
main.synctex.gz
Binary file not shown.
Loading…
Reference in a new issue