Finished cleaning App C; started App D.

This commit is contained in:
Aaron Huber 2022-04-27 10:45:00 -04:00
parent 775ec53143
commit 1ca2c00cd0
3 changed files with 37 additions and 35 deletions

View file

@ -13,7 +13,7 @@
\section{Missing details from Section~\ref{sec:background}}\label{sec:proofs-background}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Background details for proof of~\Cref{prop:expection-of-polynom}}
\subsection{Background details for proof of~\Cref{prop:expection-of-polynom}}\label{app:subsec:background-nxdbs}
\subsubsection{$\semK$-relations and \abbrNXPDB\xplural}\label{subsec:supp-mat-background}\label{subsec:supp-mat-krelations}
\input{app_k-relations}
\input{app_notation-background}
@ -39,7 +39,7 @@
\label{sec:circuits-formal}
We now formalize circuits and the construction of circuits for $\raPlus$ queries.
As mentioned earlier, we represent lineage polynomials as arithmetic circuits over $\mathbb N$-valued variables with $+$, $\times$.
A circuit for query $Q$ and \abbrNXPDB $\pxdb$ is a directed acyclic graph $\tuple{V_{Q,\pxdb}, E_{Q,\pxdb}, \phi_{Q,\pxdb}, \ell_{Q,\pxdb}}$ with vertices $V_{Q,\pxdb}$ and directed edges $E_{Q,\pxdb} \subset {V_{Q,\pxdb}}^2$.
A circuit for query $Q$ and \abbrNXPDB $\pxdb$ \footnote{For background on \abbrNXPDB\xplural, see~\Cref{app:subsec:background-nxdbs}} is a directed acyclic graph $\tuple{V_{Q,\pxdb}, E_{Q,\pxdb}, \phi_{Q,\pxdb}, \ell_{Q,\pxdb}}$ with vertices $V_{Q,\pxdb}$ and directed edges $E_{Q,\pxdb} \subset {V_{Q,\pxdb}}^2$.
The sink function $\phi_{Q,\pxdb} : \udom^n \rightarrow V_{Q,\pxdb}$ is a partial function that maps the tuples of the $n$-ary relation $Q(\pxdb)$ to vertices.
We require that $\phi_{Q,\pxdb}$'s range be limited to sink vertices (i.e., vertices with out-degree 0).
A function $\ell_{Q,\pxdb} : V_{Q,\pxdb} \rightarrow \{\;+,\times\;\}\cup \mathbb N \cup \vct X$ assigns a label to each node: Source nodes (i.e., vertices with in-degree 0) are labeled with constants or variables (i.e., $\mathbb N \cup \vct X$), while the remaining nodes are labeled with the symbol $+$ or $\times$.
@ -54,7 +54,8 @@ Note that we can construct circuits for \bis in time linear in the time required
We now connect the size of a circuit (where the size of a circuit is the number of vertices in the corresponding DAG)
for a given $\raPlus$ query $Q$ and \abbrNXPDB $\pxdb$ to
the runtime $\qruntime{Q,\dbbase}$ of the PDB's \dbbaseName $\dbbase$.
the runtime $\qruntime{Q,\tupset}$ of the PDB's \dbbaseName $\tupset$.
\AH{@atri: do we use $\tupset$ or $\gentupset$ here?}
We do this formally by showing that the size of the circuit is asymptotically no worse than the corresponding runtime of a large class of deterministic query processing algorithms.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -76,38 +77,38 @@ We define the circuit for a $\raPlus$ query $\query$ recursively by cases as fol
\begin{algorithm}
\caption{\abbrStepOne$(\query, \dbbase, E, V, \ell)$}
\caption{\abbrStepOne$(\query, \tupset, E, V, \ell)$}
\label{alg:lc}
\begin{algorithmic}[1]
\Require $\query$: query
\Require $\dbbase$: a \dbbaseName
\Require $\tupset$: a \dbbaseName
\Require $E, V, \ell$: accumulators for the edge list, vertex list, and vertex label list.
\Ensure $\circuit = \tuple{E, V, \phi, \ell}$: a circuit encoding the lineage of each tuple in $\query(\dbbase)$
\If{$\query$ is $R$} \Comment{\textbf{Case 1}: $\query$ is a relation atom}
\For{$t \in \dbbase.R$}
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, R(t))\}$ \Comment{Allocate a fresh node $v_t$}
\Ensure $\circuit = \tuple{E, V, \phi, \ell}$: a circuit encoding the lineage of each tuple in $\query(\tupset)$
\If{$\query$ is $\rel$} \Comment{\textbf{Case 1}: $\query$ is a relation atom}
\For{$t \in \tupset.\rel$}
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{\inparen{v_t, \rel\inparen{\tup}}\}$ \Comment{Allocate a fresh node $v_t$}
\State $\phi(t) \gets v_t$
\EndFor
\ElsIf{$\query$ is $\sigma_\theta(\query')$} \Comment{\textbf{Case 2}: $\query$ is a Selection}
\State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \dbbase, V, E, \ell)$
\State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \tupset, V, E, \ell)$
\For{$t \in \domain(\phi')$}
\State \textbf{if }$\theta(t)$
\textbf{ then } $\phi(t) \gets \phi'(t)$
\textbf{ else } $\phi(t) \gets v_0$
\EndFor
\ElsIf{$\query$ is $\pi_{\vec{A}}(\query')$} \Comment{\textbf{Case 3}: $\query$ is a Projection}
\State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \dbbase, V, E, \ell)$
\For{$t \in \pi_{\vec{A}}(\query'(\dbbase))$}
\State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \tupset, V, E, \ell)$
\For{$t \in \pi_{\vec{A}}(\query'(\tupset))$}
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$\Comment{Allocate a fresh node $v_t$}
\State $\phi(t) \leftarrow v_t$
\EndFor
\For{$t \in \query'(\dbbase)$}
\For{$t \in \query'(\tupset)$}
\State $E \leftarrow E \cup \{(\phi'(t), \phi(\pi_{\vec{A}}t))\}$
\EndFor
\State Correct nodes with in-degrees $>2$ by appending an equivalent fan-in two tree instead
\ElsIf{$\query$ is $\query_1 \cup \query_2$} \Comment{\textbf{Case 4}: $\query$ is a Bag Union}
\State $\tuple{V, E, \phi_1, \ell} \gets \abbrStepOne(\query_1, \dbbase, V, E, \ell)$
\State $\tuple{V, E, \phi_2, \ell} \gets \abbrStepOne(\query_2, \dbbase, V, E, \ell)$
\State $\tuple{V, E, \phi_1, \ell} \gets \abbrStepOne(\query_1, \tupset, V, E, \ell)$
\State $\tuple{V, E, \phi_2, \ell} \gets \abbrStepOne(\query_2, \tupset, V, E, \ell)$
\State $\phi \gets \phi_1 \cup \phi_2$
\For{$t \in \domain(\phi_1) \cap \domain(\phi_2)$}
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$ \Comment{Allocate a fresh node $v_t$}
@ -116,12 +117,12 @@ We define the circuit for a $\raPlus$ query $\query$ recursively by cases as fol
\EndFor
\ElsIf{$\query$ is $\query_1 \bowtie \ldots \bowtie \query_m$} \Comment{\textbf{Case 5}: $\query$ is a $m$-ary Join}
\For{$i \in [m]$}
\State $\tuple{V, E, \phi_i, \ell} \gets \abbrStepOne(\query_i, \dbbase, V, E, \ell)$
\State $\tuple{V, E, \phi_i, \ell} \gets \abbrStepOne(\query_i, \tupset, V, E, \ell)$
\EndFor
\For{$t \in \domain(\phi_1) \bowtie \ldots \bowtie \domain(\phi_m)$}
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, \times)\}$ \Comment{Allocate a fresh node $v_t$}
\State $\phi(t) \gets v_t$
\State $E \leftarrow E \cup \comprehension{(\phi_i(\pi_{sch(\query_i(\dbbase))}(t)), v_t)}{i \in [n]}$
\State $E \leftarrow E \cup \comprehension{(\phi_i(\pi_{sch(\query_i(\tupset))}(t)), v_t)}{i \in [n]}$
\EndFor
\State Correct nodes with in-degrees $>2$ by appending an equivalent fan-in two tree instead
@ -134,7 +135,7 @@ We define the circuit for a $\raPlus$ query $\query$ recursively by cases as fol
\Cref{alg:lc} defines how the circuit for a query result is constructed. We quickly review the number of vertices emitted in each case.
\caseheading{Base Relation}
This circuit has $|D_\Omega.R|$ vertices.
This circuit has $\abs{\tupset.\rel}$ vertices.
\caseheading{Selection}
If we assume dead sinks are iteratively garbage collected,
@ -159,7 +160,7 @@ We first show that the depth of the circuit (\depth; \Cref{def:size-depth}) is b
\begin{Proposition}[Circuit depth is bounded]
\label{prop:circuit-depth}
Let $\query$ be a relational query and $\dbbase$ be a \dbbaseName with $n$ tuples. There exists a (lineage) circuit $\circuit^*$ encoding the lineage of all tuples $\tup \in \query(\dbbase)$ for which
Let $\query$ be a relational query and $\tupset$ be a \dbbaseName with $n$ tuples. There exists a (lineage) circuit $\circuit^*$ encoding the lineage of all tuples $\tup \in \query(\tupset)$ for which
$\depth(\circuit^*) \leq O(k|\query|\log(n))$.
\end{Proposition}
@ -180,18 +181,19 @@ For the projection case, observe that the fan-in is bounded by $|\query'(\dbbase
\begin{Lemma}\label{lem:circ-model-runtime}
\label{lem:circuits-model-runtime}
Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\dbbase$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\dbbase$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{Q, \dbbase}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$.
Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\tupset$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\tupset$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{Q, \tupset}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$.
\end{Lemma}
\AH{Why are the number of vertices considered to be the size of the lineage?}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{proof}
We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{Q, \dbbase}$. For clarity, we implicitly exclude $v_0$ in the proof below.
We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{Q, \tupset}$. For clarity, we implicitly exclude $v_0$ in the proof below.
The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\dbbase.R|=\qruntime{R, \dbbase}$ (note that here the degree $k=1$).
For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{Q_i,\dbbase}$ where $k_i$ is the degree of $Q_i$.
The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\tupset.R|=\qruntime{R, \tupset}$ (note that here the degree $k=1$).
For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{Q_i,\tupset}$ where $k_i$ is the degree of $Q_i$.
\caseheading{Selection}
Assume that $Q = \sigma_\theta(Q_1)$.
In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\dbbase}|$ vertices, so from the inductive assumption and $\qruntime{Q,\dbbase} = \qruntime{Q_1,\dbbase}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{Q,\dbbase} $.
In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\tupset}|$ vertices, so from the inductive assumption and $\qruntime{Q,\tupset} = \qruntime{Q_1,\tupset}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{Q,\tupset} $.
\caseheading{Projection}
Assume that $Q = \pi_{\vct A}(Q_1)$.
@ -199,9 +201,9 @@ The circuit for $Q$ has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
\begin{align*}
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\
\intertext{(From the inductive assumption)}
& \leq k\qruntime{Q_1,\dbbase} + \abs{Q_1}\\
\intertext{(By definition of $\qruntime{Q,\dbbase}$)}
& \le k\qruntime{Q,\dbbase}.
& \leq k\qruntime{Q_1,\tupset} + \abs{Q_1}\\
\intertext{(By definition of $\qruntime{Q,\tupset}$)}
& \le k\qruntime{Q,\tupset}.
\end{align*}
\caseheading{Union}
Assume that $Q = Q_1 \cup Q_2$.
@ -209,9 +211,9 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ ver
\begin{align*}
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\
\intertext{(From the inductive assumption)}
& \leq k(\qruntime{Q_1,\dbbase} + \qruntime{Q_2,\dbbase}) + (|Q_1| + |Q_2|)
\intertext{(By definition of $\qruntime{Q,\dbbase}$)}
& \leq k(\qruntime{Q,\dbbase}).
& \leq k(\qruntime{Q_1,\tupset} + \qruntime{Q_2,\tupset}) + (|Q_1| + |Q_2|)
\intertext{(By definition of $\qruntime{Q,\tupset}$)}
& \leq k(\qruntime{Q,\tupset}).
\end{align*}
\caseheading{$m$-ary Join}
@ -220,12 +222,12 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bow
\begin{align*}
|V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
\intertext{From the inductive assumption and noting $\forall i: k_i \leq k$ and $m\le k$}
& \leq k\qruntime{Q_1,\dbbase}+\ldots+k\qruntime{Q_k,\dbbase}+\\
& \leq k\qruntime{Q_1,\tupset}+\ldots+k\qruntime{Q_k,\tupset}+\\
&\;\;\; (m-1)|{Q_1} \bowtie \ldots \bowtie {Q_m}|\\
& \leq k(\qruntime{Q_1,\dbbase}+\ldots+\qruntime{Q_m,\dbbase}+\\
&\;\;\;|{Q_1} \bowtie \ldots \bowtie {Q_m}|)\\
\intertext{(By definition of $\qruntime{Q,\dbbase}$ and assumption on $\jointime{\cdot}$)}
& \le k\qruntime{Q,\dbbase}.
& \leq k\left(\qruntime{Q_1,\tupset}+\ldots+\qruntime{Q_m,\tupset}+\right.\\
&\;\;\;\left.|{Q_1} \bowtie \ldots \bowtie {Q_m}|\right)\\
\intertext{(By definition of $\qruntime{Q,\tupset}$ and assumption on $\jointime{\cdot}$)}
& \le k\qruntime{Q,\tupset}.
\end{align*}
The property holds for all recursive queries, and the proof holds.

BIN
main.pdf

Binary file not shown.

Binary file not shown.