Fixes to Appendix D.

This commit is contained in:
Aaron Huber 2022-05-13 13:45:24 -04:00
parent dbb14420db
commit 3511a18e54
3 changed files with 32 additions and 33 deletions

View file

@ -112,11 +112,11 @@ We define the circuit for a $\raPlus$ query $\query$ recursively by cases as fol
\ElsIf{$\query$ is $\query_1 \cup \query_2$} \Comment{\textbf{Case 4}: $\query$ is a Bag Union} \ElsIf{$\query$ is $\query_1 \cup \query_2$} \Comment{\textbf{Case 4}: $\query$ is a Bag Union}
\State $\tuple{V', E', \phi_1, \ell'} \gets \lincirc(\query_1, \tupset, V, E, \ell)$ \State $\tuple{V', E', \phi_1, \ell'} \gets \lincirc(\query_1, \tupset, V, E, \ell)$
\State $\tuple{V, E, \phi_2, \ell} \gets \lincirc(\query_2, \tupset, V', E', \ell')$ \State $\tuple{V, E, \phi_2, \ell} \gets \lincirc(\query_2, \tupset, V', E', \ell')$
\State $\phi \gets \phi_1 \cup \phi_2$ \State $\phi \gets \phi_1 \cup \phi_2$\label{alg:lincirc-union-phi}
\For{$t \in \domain(\phi_1) \cap \domain(\phi_2)$} \For{$t \in \domain(\phi_1) \cap \domain(\phi_2)$}\label{alg:lincirc-union-intersection}
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$ \Comment{Allocate a fresh node $v_t$} \State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$\label{alg:lincirc-union-intersection-one} \Comment{Allocate a fresh node $v_t$}
\State $\phi(t) \gets v_t$ \State $\phi(t) \gets v_t$\label{alg:lincirc-union-intersection-two}
\State $E \leftarrow E \cup \{(\phi_1(t), v_t), (\phi_2(t), v_t)\}$ \State $E \leftarrow E \cup \{(\phi_1(t), v_t), (\phi_2(t), v_t)\}$\label{alg:lincirc-union-intersection-three}
\EndFor \EndFor
\State\Return $\tuple{V, E, \phi, \ell}$ \State\Return $\tuple{V, E, \phi, \ell}$
\ElsIf{$\query$ is $\query_1 \bowtie \ldots \bowtie \query_m$} \Comment{\textbf{Case 5}: $\query$ is a $m$-ary Join} \ElsIf{$\query$ is $\query_1 \bowtie \ldots \bowtie \query_m$} \Comment{\textbf{Case 5}: $\query$ is a $m$-ary Join}
@ -186,19 +186,19 @@ For the projection case, observe that the fan-in is bounded by $|\query'(\dbbase
\begin{Lemma}\label{lem:circ-model-runtime} \begin{Lemma}\label{lem:circ-model-runtime}
\label{lem:circuits-model-runtime} \label{lem:circuits-model-runtime}
Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\tupset$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\tupset$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{Q, \tupset}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$. Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\tupset$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\tupset$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{\query, \tupset, \bound}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$.
\end{Lemma} \end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{proof} \begin{proof}
We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{Q, \tupset}$. For clarity, we implicitly exclude $v_0$ in the proof below. We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{\query, \tupset, \bound}$. For clarity, we implicitly exclude $v_0$ in the proof below.
The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\tupset.R|=\qruntime{R, \tupset}$ (note that here the degree $k=1$). The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\tupset.R|=\qruntime{\rel, \tupset, \bound}$ (note that here the degree $k=1$).
For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{Q_i,\tupset}$ where $k_i$ is the degree of $Q_i$. For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{\query_i,\tupset, \bound}$ where $k_i$ is the degree of $Q_i$.
\caseheading{Selection} \caseheading{Selection}
Assume that $Q = \sigma_\theta(Q_1)$. Assume that $Q = \sigma_\theta(Q_1)$.
In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\tupset}|$ vertices, so from the inductive assumption and $\qruntime{Q,\tupset} = \qruntime{Q_1,\tupset}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{Q,\tupset} $. In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\tupset}|$ vertices, so from the inductive assumption and $\qruntime{\query,\tupset, \bound} = \qruntime{\query_1,\tupset, \bound}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{\query,\tupset, \bound} $.
\caseheading{Projection} \caseheading{Projection}
Assume that $Q = \pi_{\vct A}(Q_1)$. Assume that $Q = \pi_{\vct A}(Q_1)$.
@ -206,9 +206,9 @@ The circuit for $Q$ has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
\begin{align*} \begin{align*}
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\ |V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\
\intertext{(From the inductive assumption)} \intertext{(From the inductive assumption)}
& \leq k\qruntime{Q_1,\tupset} + \abs{Q_1}\\ & \leq k\qruntime{\query_1,\tupset, \bound} + \abs{Q_1}\\
\intertext{(By definition of $\qruntime{Q,\tupset}$)} \intertext{(By definition of $\qruntime{\query,\tupset, \bound}$)}
& \le k\qruntime{Q,\tupset}. & \le k\qruntime{\query,\tupset, \bound}.
\end{align*} \end{align*}
\caseheading{Union} \caseheading{Union}
Assume that $Q = Q_1 \cup Q_2$. Assume that $Q = Q_1 \cup Q_2$.
@ -216,9 +216,9 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ ver
\begin{align*} \begin{align*}
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\ |V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\
\intertext{(From the inductive assumption)} \intertext{(From the inductive assumption)}
& \leq k(\qruntime{Q_1,\tupset} + \qruntime{Q_2,\tupset}) + (|Q_1| + |Q_2|) & \leq k(\qruntime{\query_1,\tupset, \bound} + \qruntime{\query_2,\tupset, \bound}) + (|Q_1| + |Q_2|)
\intertext{(By definition of $\qruntime{Q,\tupset}$)} \intertext{(By definition of $\qruntime{\query, \tupset, \bound}$)}
& \leq k(\qruntime{Q,\tupset}). & \leq k(\qruntime{\query,\tupset, \bound}).
\end{align*} \end{align*}
\caseheading{$m$-ary Join} \caseheading{$m$-ary Join}
@ -227,12 +227,12 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bow
\begin{align*} \begin{align*}
|V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\ |V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
\intertext{From the inductive assumption and noting $\forall i: k_i \leq k$ and $m\le k$} \intertext{From the inductive assumption and noting $\forall i: k_i \leq k$ and $m\le k$}
& \leq k\qruntime{Q_1,\tupset}+\ldots+k\qruntime{Q_k,\tupset}+\\ & \leq k\qruntime{\query_1,\tupset, \bound}+\ldots+k\qruntime{\query_k,\tupset, \bound}+\\
&\;\;\; (m-1)|{Q_1} \bowtie \ldots \bowtie {Q_m}|\\ &\;\;\; (m-1)|{Q_1} \bowtie \ldots \bowtie {Q_m}|\\
& \leq k\left(\qruntime{Q_1,\tupset}+\ldots+\qruntime{Q_m,\tupset}+\right.\\ & \leq k\left(\qruntime{\query_1, \tupset, \bound}+\ldots+\qruntime{\query_1, \tupset, \bound}+\right.\\
&\;\;\;\left.|{Q_1} \bowtie \ldots \bowtie {Q_m}|\right)\\ &\;\;\;\left.|{Q_1} \bowtie \ldots \bowtie {Q_m}|\right)\\
\intertext{(By definition of $\qruntime{Q,\tupset}$ and assumption on $\jointime{\cdot}$)} \intertext{(By definition of $\qruntime{\query,\tupset, \bound}$ and assumption on $\jointime{\cdot}$)}
& \le k\qruntime{Q,\tupset}. & \le k\qruntime{\query,\tupset, \bound}.
\end{align*} \end{align*}
The property holds for all recursive queries, and the proof holds. The property holds for all recursive queries, and the proof holds.
@ -244,7 +244,7 @@ The property holds for all recursive queries, and the proof holds.
We next need to show that we can construct the circuit in time linear in the deterministic runtime. We next need to show that we can construct the circuit in time linear in the deterministic runtime.
\begin{Lemma}\label{lem:tlc-is-the-same-as-det} \begin{Lemma}\label{lem:tlc-is-the-same-as-det}
Given a query $\query$ over a \dbbaseName $\tupset$ and the $\circuit^*$ output by \Cref{alg:lc}, the runtime $\timeOf{\lincirc}(\query,\tupset,\circuit^*) \le O(\qruntime{\query, \tupset})$. Given a query $\query$ over a \dbbaseName $\tupset$ and the $\circuit^*$ output by \Cref{alg:lc}, the runtime $\timeOf{\lincirc}(\query,\tupset,\circuit^*) \le O(\qruntime{\query, \tupset, \bound})$.
\end{Lemma} \end{Lemma}
\begin{proof} \begin{proof}
By analysis of \Cref{alg:lc}, invoked as $\circuit^*\gets\lincirc(\query, \tupset, \emptyset, \{v_0\}, \{(v_0, 0)\})$. By analysis of \Cref{alg:lc}, invoked as $\circuit^*\gets\lincirc(\query, \tupset, \emptyset, \{v_0\}, \{(v_0, 0)\})$.
@ -254,30 +254,29 @@ We assume that the tuple to sink mapping $\phi$ is a linked hashmap, with $O(1)$
We assume that the n-ary join $\domain(\phi_1) \bowtie \ldots \bowtie\domain(\phi_n)$ can be computed in time $\jointime{\domain(\phi_1), \ldots, \domain(\phi_n)}$ (\Cref{def:join-cost}) and that an intersection $\domain(\phi_1) \cap \domain(\phi_2)$ can be computed in time $O(|\domain(\phi_1)| + |\domain(\phi_2)|)$ (e.g., with a hash table). We assume that the n-ary join $\domain(\phi_1) \bowtie \ldots \bowtie\domain(\phi_n)$ can be computed in time $\jointime{\domain(\phi_1), \ldots, \domain(\phi_n)}$ (\Cref{def:join-cost}) and that an intersection $\domain(\phi_1) \cap \domain(\phi_2)$ can be computed in time $O(|\domain(\phi_1)| + |\domain(\phi_2)|)$ (e.g., with a hash table).
Before proving our runtime bound, we first observe that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$. Before proving our runtime bound, we first observe that $\qruntime{\query, \tupset, \bound} \geq \Omega(|\query(\db)|)$.
This is true by construction for the relation, projection, and union cases, by \Cref{def:join-cost} for joins, and by the observation that $|\sigma(R)| \leq |R|$. This is true by construction for the relation, projection, and union cases, by \Cref{def:join-cost} for joins, and by the observation that $|\sigma(R)| \leq |R|$.
We showthat $\qruntime{\query, \tupset}$ is an upper-bound for the runtime of \Cref{alg:lc} by recursion. We show that $\qruntime{\query, \tupset, \bound}$ is an upper-bound for the runtime of \Cref{alg:lc} by recursion.
The base case of a relation atom requires only an $O(|\tupset.R|)$ iteration over the source tuples. The base case of a relation atom requires only an $O(|\tupset.R|)$ iteration over the source tuples.
For the remaining cases, we make the recursive assumption that for every subquery $\query'$, it holds that $O(\qruntime{\query', \tupset})$ bounds the runtime of \Cref{alg:lc}. For the remaining cases, we make the recursive assumption that for every subquery $\query'$, it holds that $O(\qruntime{\query', \tupset, \bound})$ bounds the runtime of \Cref{alg:lc}.
\AH{What is meant by recursive assumption and how is this valid?}
\caseheading{Selection} \caseheading{Selection}
Selection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset})$. Selection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset, \bound})$.
\Cref{alg:lc} requires a loop over every element of $\query'(\tupset)$. \Cref{alg:lc} requires a loop over every element of $\query'(\tupset)$.
By the observation above that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$, this iteration is also bounded by $O(\qruntime{\query', \tupset})$. By the observation above that $\qruntime{\query, \db, \bound} \geq \Omega(|\query(\db)|)$, this iteration is also bounded by $O(\qruntime{\query', \tupset, \bound})$.
\caseheading{Projection} \caseheading{Projection}
Projection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset})$, which in turn is a term in $\qruntime{\pi_{A}\query', \tupset}$. Projection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset})$, which in turn is a term in $\qruntime{\pi_{A}\query', \tupset, \bound}$.
What remains is an iteration over $\pi_{A}(\query(\tupset))$ (lines 13--16), an iteration over $\query'(\tupset)$ (lines 17--19), and the construction of a fan-in tree (line 20). What remains is an iteration over $\pi_{A}(\query(\tupset))$ (lines 13--16), an iteration over $\query'(\tupset)$ (lines 17--19), and the construction of a fan-in tree (line 20).
The first iteration is $O(|\query(\tupset)|) \leq O(\qruntime{\query, \tupset})$. The first iteration is $O(|\query(\tupset)|) \leq O(\qruntime{\query, \tupset, \bound})$.
The second iteration and the construction of the bounded fan-in tree are both $O(|\query'(\tupset)|) \leq O(\qruntime{\query', \tupset}) \leq O(\qruntime{\query, \tupset}) $, by the the observation above that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$. The second iteration and the construction of the bounded fan-in tree are both $O(|\query'(\tupset)|) \leq O(\qruntime{\query', \tupset}) \leq O(\qruntime{\query, \tupset, \bound}) $, by the the observation above that $\qruntime{\query, \db, \bound} \geq \Omega(|\query(\db)|)$.
\caseheading{Bag Union} \caseheading{Bag Union}
As above, the recursive calls explicitly correspond to terms in the expansion of $\qruntime{\query_1 \cup \query_2, \tupset}$. As above, the recursive calls explicitly correspond to terms in the expansion of $\qruntime{\query_1 \cup \query_2, \tupset, \bound}$.
Initializing $\phi$ (line 24) can be accomplished in $O(\domain(\phi_1) + \domain(\phi_2)) = O(|\query_1(\tupset)| + |\query_2(\tupset)|) \leq O(\qruntime{\query_1, \tupset} + \qruntime{\query_2, \tupset})$. Initializing $\phi$ (\Cref{alg:lincirc-union-phi}) can be accomplished in $O(\domain(\phi_1) + \domain(\phi_2)) = O(|\query_1(\tupset)| + |\query_2(\tupset)|) \leq O(\qruntime{\query_1, \tupset} + \qruntime{\query_2, \tupset, \bound})$.
The remainder requires computing $\query_1 \cup \query_2$ (line 25) and iterating over it (lines 25--29), which is $O(|\query_1| + |\query_2|)$ as noted above --- this directly corresponds to terms in $\qruntime{\query_1 \cup \query_2, \tupset}$. The remainder requires computing $\query_1 \cap \query_2$ (\Cref{alg:lincirc-union-intersection}) and iterating over it (\Crefrange{alg:lincirc-union-intersection-one}{alg:lincirc-union-intersection-three}), which is $O(|\query_1| + |\query_2|)$ as noted above --- this directly corresponds to terms in $\qruntime{\query_1 \cup \query_2, \tupset, \bound}$.
\caseheading{$m$-ary Join} \caseheading{$m$-ary Join}

BIN
main.pdf

Binary file not shown.

Binary file not shown.