Fixes to Appendix D.

This commit is contained in:
Aaron Huber 2022-05-13 13:45:24 -04:00
parent dbb14420db
commit 3511a18e54
3 changed files with 32 additions and 33 deletions

View file

@ -112,11 +112,11 @@ We define the circuit for a $\raPlus$ query $\query$ recursively by cases as fol
\ElsIf{$\query$ is $\query_1 \cup \query_2$} \Comment{\textbf{Case 4}: $\query$ is a Bag Union}
\State $\tuple{V', E', \phi_1, \ell'} \gets \lincirc(\query_1, \tupset, V, E, \ell)$
\State $\tuple{V, E, \phi_2, \ell} \gets \lincirc(\query_2, \tupset, V', E', \ell')$
\State $\phi \gets \phi_1 \cup \phi_2$
\For{$t \in \domain(\phi_1) \cap \domain(\phi_2)$}
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$ \Comment{Allocate a fresh node $v_t$}
\State $\phi(t) \gets v_t$
\State $E \leftarrow E \cup \{(\phi_1(t), v_t), (\phi_2(t), v_t)\}$
\State $\phi \gets \phi_1 \cup \phi_2$\label{alg:lincirc-union-phi}
\For{$t \in \domain(\phi_1) \cap \domain(\phi_2)$}\label{alg:lincirc-union-intersection}
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$\label{alg:lincirc-union-intersection-one} \Comment{Allocate a fresh node $v_t$}
\State $\phi(t) \gets v_t$\label{alg:lincirc-union-intersection-two}
\State $E \leftarrow E \cup \{(\phi_1(t), v_t), (\phi_2(t), v_t)\}$\label{alg:lincirc-union-intersection-three}
\EndFor
\State\Return $\tuple{V, E, \phi, \ell}$
\ElsIf{$\query$ is $\query_1 \bowtie \ldots \bowtie \query_m$} \Comment{\textbf{Case 5}: $\query$ is a $m$-ary Join}
@ -186,19 +186,19 @@ For the projection case, observe that the fan-in is bounded by $|\query'(\dbbase
\begin{Lemma}\label{lem:circ-model-runtime}
\label{lem:circuits-model-runtime}
Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\tupset$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\tupset$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{Q, \tupset}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$.
Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\tupset$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\tupset$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{\query, \tupset, \bound}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$.
\end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{proof}
We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{Q, \tupset}$. For clarity, we implicitly exclude $v_0$ in the proof below.
We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{\query, \tupset, \bound}$. For clarity, we implicitly exclude $v_0$ in the proof below.
The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\tupset.R|=\qruntime{R, \tupset}$ (note that here the degree $k=1$).
For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{Q_i,\tupset}$ where $k_i$ is the degree of $Q_i$.
The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\tupset.R|=\qruntime{\rel, \tupset, \bound}$ (note that here the degree $k=1$).
For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{\query_i,\tupset, \bound}$ where $k_i$ is the degree of $Q_i$.
\caseheading{Selection}
Assume that $Q = \sigma_\theta(Q_1)$.
In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\tupset}|$ vertices, so from the inductive assumption and $\qruntime{Q,\tupset} = \qruntime{Q_1,\tupset}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{Q,\tupset} $.
In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\tupset}|$ vertices, so from the inductive assumption and $\qruntime{\query,\tupset, \bound} = \qruntime{\query_1,\tupset, \bound}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{\query,\tupset, \bound} $.
\caseheading{Projection}
Assume that $Q = \pi_{\vct A}(Q_1)$.
@ -206,9 +206,9 @@ The circuit for $Q$ has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
\begin{align*}
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\
\intertext{(From the inductive assumption)}
& \leq k\qruntime{Q_1,\tupset} + \abs{Q_1}\\
\intertext{(By definition of $\qruntime{Q,\tupset}$)}
& \le k\qruntime{Q,\tupset}.
& \leq k\qruntime{\query_1,\tupset, \bound} + \abs{Q_1}\\
\intertext{(By definition of $\qruntime{\query,\tupset, \bound}$)}
& \le k\qruntime{\query,\tupset, \bound}.
\end{align*}
\caseheading{Union}
Assume that $Q = Q_1 \cup Q_2$.
@ -216,9 +216,9 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ ver
\begin{align*}
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\
\intertext{(From the inductive assumption)}
& \leq k(\qruntime{Q_1,\tupset} + \qruntime{Q_2,\tupset}) + (|Q_1| + |Q_2|)
\intertext{(By definition of $\qruntime{Q,\tupset}$)}
& \leq k(\qruntime{Q,\tupset}).
& \leq k(\qruntime{\query_1,\tupset, \bound} + \qruntime{\query_2,\tupset, \bound}) + (|Q_1| + |Q_2|)
\intertext{(By definition of $\qruntime{\query, \tupset, \bound}$)}
& \leq k(\qruntime{\query,\tupset, \bound}).
\end{align*}
\caseheading{$m$-ary Join}
@ -227,12 +227,12 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bow
\begin{align*}
|V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
\intertext{From the inductive assumption and noting $\forall i: k_i \leq k$ and $m\le k$}
& \leq k\qruntime{Q_1,\tupset}+\ldots+k\qruntime{Q_k,\tupset}+\\
& \leq k\qruntime{\query_1,\tupset, \bound}+\ldots+k\qruntime{\query_k,\tupset, \bound}+\\
&\;\;\; (m-1)|{Q_1} \bowtie \ldots \bowtie {Q_m}|\\
& \leq k\left(\qruntime{Q_1,\tupset}+\ldots+\qruntime{Q_m,\tupset}+\right.\\
& \leq k\left(\qruntime{\query_1, \tupset, \bound}+\ldots+\qruntime{\query_1, \tupset, \bound}+\right.\\
&\;\;\;\left.|{Q_1} \bowtie \ldots \bowtie {Q_m}|\right)\\
\intertext{(By definition of $\qruntime{Q,\tupset}$ and assumption on $\jointime{\cdot}$)}
& \le k\qruntime{Q,\tupset}.
\intertext{(By definition of $\qruntime{\query,\tupset, \bound}$ and assumption on $\jointime{\cdot}$)}
& \le k\qruntime{\query,\tupset, \bound}.
\end{align*}
The property holds for all recursive queries, and the proof holds.
@ -244,7 +244,7 @@ The property holds for all recursive queries, and the proof holds.
We next need to show that we can construct the circuit in time linear in the deterministic runtime.
\begin{Lemma}\label{lem:tlc-is-the-same-as-det}
Given a query $\query$ over a \dbbaseName $\tupset$ and the $\circuit^*$ output by \Cref{alg:lc}, the runtime $\timeOf{\lincirc}(\query,\tupset,\circuit^*) \le O(\qruntime{\query, \tupset})$.
Given a query $\query$ over a \dbbaseName $\tupset$ and the $\circuit^*$ output by \Cref{alg:lc}, the runtime $\timeOf{\lincirc}(\query,\tupset,\circuit^*) \le O(\qruntime{\query, \tupset, \bound})$.
\end{Lemma}
\begin{proof}
By analysis of \Cref{alg:lc}, invoked as $\circuit^*\gets\lincirc(\query, \tupset, \emptyset, \{v_0\}, \{(v_0, 0)\})$.
@ -254,30 +254,29 @@ We assume that the tuple to sink mapping $\phi$ is a linked hashmap, with $O(1)$
We assume that the n-ary join $\domain(\phi_1) \bowtie \ldots \bowtie\domain(\phi_n)$ can be computed in time $\jointime{\domain(\phi_1), \ldots, \domain(\phi_n)}$ (\Cref{def:join-cost}) and that an intersection $\domain(\phi_1) \cap \domain(\phi_2)$ can be computed in time $O(|\domain(\phi_1)| + |\domain(\phi_2)|)$ (e.g., with a hash table).
Before proving our runtime bound, we first observe that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$.
Before proving our runtime bound, we first observe that $\qruntime{\query, \tupset, \bound} \geq \Omega(|\query(\db)|)$.
This is true by construction for the relation, projection, and union cases, by \Cref{def:join-cost} for joins, and by the observation that $|\sigma(R)| \leq |R|$.
We showthat $\qruntime{\query, \tupset}$ is an upper-bound for the runtime of \Cref{alg:lc} by recursion.
We show that $\qruntime{\query, \tupset, \bound}$ is an upper-bound for the runtime of \Cref{alg:lc} by recursion.
The base case of a relation atom requires only an $O(|\tupset.R|)$ iteration over the source tuples.
For the remaining cases, we make the recursive assumption that for every subquery $\query'$, it holds that $O(\qruntime{\query', \tupset})$ bounds the runtime of \Cref{alg:lc}.
For the remaining cases, we make the recursive assumption that for every subquery $\query'$, it holds that $O(\qruntime{\query', \tupset, \bound})$ bounds the runtime of \Cref{alg:lc}.
\AH{What is meant by recursive assumption and how is this valid?}
\caseheading{Selection}
Selection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset})$.
Selection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset, \bound})$.
\Cref{alg:lc} requires a loop over every element of $\query'(\tupset)$.
By the observation above that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$, this iteration is also bounded by $O(\qruntime{\query', \tupset})$.
By the observation above that $\qruntime{\query, \db, \bound} \geq \Omega(|\query(\db)|)$, this iteration is also bounded by $O(\qruntime{\query', \tupset, \bound})$.
\caseheading{Projection}
Projection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset})$, which in turn is a term in $\qruntime{\pi_{A}\query', \tupset}$.
Projection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset})$, which in turn is a term in $\qruntime{\pi_{A}\query', \tupset, \bound}$.
What remains is an iteration over $\pi_{A}(\query(\tupset))$ (lines 13--16), an iteration over $\query'(\tupset)$ (lines 17--19), and the construction of a fan-in tree (line 20).
The first iteration is $O(|\query(\tupset)|) \leq O(\qruntime{\query, \tupset})$.
The second iteration and the construction of the bounded fan-in tree are both $O(|\query'(\tupset)|) \leq O(\qruntime{\query', \tupset}) \leq O(\qruntime{\query, \tupset}) $, by the the observation above that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$.
The first iteration is $O(|\query(\tupset)|) \leq O(\qruntime{\query, \tupset, \bound})$.
The second iteration and the construction of the bounded fan-in tree are both $O(|\query'(\tupset)|) \leq O(\qruntime{\query', \tupset}) \leq O(\qruntime{\query, \tupset, \bound}) $, by the the observation above that $\qruntime{\query, \db, \bound} \geq \Omega(|\query(\db)|)$.
\caseheading{Bag Union}
As above, the recursive calls explicitly correspond to terms in the expansion of $\qruntime{\query_1 \cup \query_2, \tupset}$.
Initializing $\phi$ (line 24) can be accomplished in $O(\domain(\phi_1) + \domain(\phi_2)) = O(|\query_1(\tupset)| + |\query_2(\tupset)|) \leq O(\qruntime{\query_1, \tupset} + \qruntime{\query_2, \tupset})$.
The remainder requires computing $\query_1 \cup \query_2$ (line 25) and iterating over it (lines 25--29), which is $O(|\query_1| + |\query_2|)$ as noted above --- this directly corresponds to terms in $\qruntime{\query_1 \cup \query_2, \tupset}$.
As above, the recursive calls explicitly correspond to terms in the expansion of $\qruntime{\query_1 \cup \query_2, \tupset, \bound}$.
Initializing $\phi$ (\Cref{alg:lincirc-union-phi}) can be accomplished in $O(\domain(\phi_1) + \domain(\phi_2)) = O(|\query_1(\tupset)| + |\query_2(\tupset)|) \leq O(\qruntime{\query_1, \tupset} + \qruntime{\query_2, \tupset, \bound})$.
The remainder requires computing $\query_1 \cap \query_2$ (\Cref{alg:lincirc-union-intersection}) and iterating over it (\Crefrange{alg:lincirc-union-intersection-one}{alg:lincirc-union-intersection-three}), which is $O(|\query_1| + |\query_2|)$ as noted above --- this directly corresponds to terms in $\qruntime{\query_1 \cup \query_2, \tupset, \bound}$.
\caseheading{$m$-ary Join}

BIN
main.pdf

Binary file not shown.

Binary file not shown.