Fixes to Appendix D.
This commit is contained in:
parent
dbb14420db
commit
3511a18e54
65
appendix.tex
65
appendix.tex
|
@ -112,11 +112,11 @@ We define the circuit for a $\raPlus$ query $\query$ recursively by cases as fol
|
||||||
\ElsIf{$\query$ is $\query_1 \cup \query_2$} \Comment{\textbf{Case 4}: $\query$ is a Bag Union}
|
\ElsIf{$\query$ is $\query_1 \cup \query_2$} \Comment{\textbf{Case 4}: $\query$ is a Bag Union}
|
||||||
\State $\tuple{V', E', \phi_1, \ell'} \gets \lincirc(\query_1, \tupset, V, E, \ell)$
|
\State $\tuple{V', E', \phi_1, \ell'} \gets \lincirc(\query_1, \tupset, V, E, \ell)$
|
||||||
\State $\tuple{V, E, \phi_2, \ell} \gets \lincirc(\query_2, \tupset, V', E', \ell')$
|
\State $\tuple{V, E, \phi_2, \ell} \gets \lincirc(\query_2, \tupset, V', E', \ell')$
|
||||||
\State $\phi \gets \phi_1 \cup \phi_2$
|
\State $\phi \gets \phi_1 \cup \phi_2$\label{alg:lincirc-union-phi}
|
||||||
\For{$t \in \domain(\phi_1) \cap \domain(\phi_2)$}
|
\For{$t \in \domain(\phi_1) \cap \domain(\phi_2)$}\label{alg:lincirc-union-intersection}
|
||||||
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$ \Comment{Allocate a fresh node $v_t$}
|
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$\label{alg:lincirc-union-intersection-one} \Comment{Allocate a fresh node $v_t$}
|
||||||
\State $\phi(t) \gets v_t$
|
\State $\phi(t) \gets v_t$\label{alg:lincirc-union-intersection-two}
|
||||||
\State $E \leftarrow E \cup \{(\phi_1(t), v_t), (\phi_2(t), v_t)\}$
|
\State $E \leftarrow E \cup \{(\phi_1(t), v_t), (\phi_2(t), v_t)\}$\label{alg:lincirc-union-intersection-three}
|
||||||
\EndFor
|
\EndFor
|
||||||
\State\Return $\tuple{V, E, \phi, \ell}$
|
\State\Return $\tuple{V, E, \phi, \ell}$
|
||||||
\ElsIf{$\query$ is $\query_1 \bowtie \ldots \bowtie \query_m$} \Comment{\textbf{Case 5}: $\query$ is a $m$-ary Join}
|
\ElsIf{$\query$ is $\query_1 \bowtie \ldots \bowtie \query_m$} \Comment{\textbf{Case 5}: $\query$ is a $m$-ary Join}
|
||||||
|
@ -186,19 +186,19 @@ For the projection case, observe that the fan-in is bounded by $|\query'(\dbbase
|
||||||
|
|
||||||
\begin{Lemma}\label{lem:circ-model-runtime}
|
\begin{Lemma}\label{lem:circ-model-runtime}
|
||||||
\label{lem:circuits-model-runtime}
|
\label{lem:circuits-model-runtime}
|
||||||
Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\tupset$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\tupset$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{Q, \tupset}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$.
|
Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\tupset$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\tupset$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{\query, \tupset, \bound}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$.
|
||||||
\end{Lemma}
|
\end{Lemma}
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\begin{proof}
|
\begin{proof}
|
||||||
We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{Q, \tupset}$. For clarity, we implicitly exclude $v_0$ in the proof below.
|
We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{\query, \tupset, \bound}$. For clarity, we implicitly exclude $v_0$ in the proof below.
|
||||||
|
|
||||||
The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\tupset.R|=\qruntime{R, \tupset}$ (note that here the degree $k=1$).
|
The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\tupset.R|=\qruntime{\rel, \tupset, \bound}$ (note that here the degree $k=1$).
|
||||||
For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{Q_i,\tupset}$ where $k_i$ is the degree of $Q_i$.
|
For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{\query_i,\tupset, \bound}$ where $k_i$ is the degree of $Q_i$.
|
||||||
|
|
||||||
\caseheading{Selection}
|
\caseheading{Selection}
|
||||||
Assume that $Q = \sigma_\theta(Q_1)$.
|
Assume that $Q = \sigma_\theta(Q_1)$.
|
||||||
In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\tupset}|$ vertices, so from the inductive assumption and $\qruntime{Q,\tupset} = \qruntime{Q_1,\tupset}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{Q,\tupset} $.
|
In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\tupset}|$ vertices, so from the inductive assumption and $\qruntime{\query,\tupset, \bound} = \qruntime{\query_1,\tupset, \bound}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{\query,\tupset, \bound} $.
|
||||||
|
|
||||||
\caseheading{Projection}
|
\caseheading{Projection}
|
||||||
Assume that $Q = \pi_{\vct A}(Q_1)$.
|
Assume that $Q = \pi_{\vct A}(Q_1)$.
|
||||||
|
@ -206,9 +206,9 @@ The circuit for $Q$ has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\
|
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\
|
||||||
\intertext{(From the inductive assumption)}
|
\intertext{(From the inductive assumption)}
|
||||||
& \leq k\qruntime{Q_1,\tupset} + \abs{Q_1}\\
|
& \leq k\qruntime{\query_1,\tupset, \bound} + \abs{Q_1}\\
|
||||||
\intertext{(By definition of $\qruntime{Q,\tupset}$)}
|
\intertext{(By definition of $\qruntime{\query,\tupset, \bound}$)}
|
||||||
& \le k\qruntime{Q,\tupset}.
|
& \le k\qruntime{\query,\tupset, \bound}.
|
||||||
\end{align*}
|
\end{align*}
|
||||||
\caseheading{Union}
|
\caseheading{Union}
|
||||||
Assume that $Q = Q_1 \cup Q_2$.
|
Assume that $Q = Q_1 \cup Q_2$.
|
||||||
|
@ -216,9 +216,9 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ ver
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\
|
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\
|
||||||
\intertext{(From the inductive assumption)}
|
\intertext{(From the inductive assumption)}
|
||||||
& \leq k(\qruntime{Q_1,\tupset} + \qruntime{Q_2,\tupset}) + (|Q_1| + |Q_2|)
|
& \leq k(\qruntime{\query_1,\tupset, \bound} + \qruntime{\query_2,\tupset, \bound}) + (|Q_1| + |Q_2|)
|
||||||
\intertext{(By definition of $\qruntime{Q,\tupset}$)}
|
\intertext{(By definition of $\qruntime{\query, \tupset, \bound}$)}
|
||||||
& \leq k(\qruntime{Q,\tupset}).
|
& \leq k(\qruntime{\query,\tupset, \bound}).
|
||||||
\end{align*}
|
\end{align*}
|
||||||
|
|
||||||
\caseheading{$m$-ary Join}
|
\caseheading{$m$-ary Join}
|
||||||
|
@ -227,12 +227,12 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bow
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
|V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
|
|V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
|
||||||
\intertext{From the inductive assumption and noting $\forall i: k_i \leq k$ and $m\le k$}
|
\intertext{From the inductive assumption and noting $\forall i: k_i \leq k$ and $m\le k$}
|
||||||
& \leq k\qruntime{Q_1,\tupset}+\ldots+k\qruntime{Q_k,\tupset}+\\
|
& \leq k\qruntime{\query_1,\tupset, \bound}+\ldots+k\qruntime{\query_k,\tupset, \bound}+\\
|
||||||
&\;\;\; (m-1)|{Q_1} \bowtie \ldots \bowtie {Q_m}|\\
|
&\;\;\; (m-1)|{Q_1} \bowtie \ldots \bowtie {Q_m}|\\
|
||||||
& \leq k\left(\qruntime{Q_1,\tupset}+\ldots+\qruntime{Q_m,\tupset}+\right.\\
|
& \leq k\left(\qruntime{\query_1, \tupset, \bound}+\ldots+\qruntime{\query_1, \tupset, \bound}+\right.\\
|
||||||
&\;\;\;\left.|{Q_1} \bowtie \ldots \bowtie {Q_m}|\right)\\
|
&\;\;\;\left.|{Q_1} \bowtie \ldots \bowtie {Q_m}|\right)\\
|
||||||
\intertext{(By definition of $\qruntime{Q,\tupset}$ and assumption on $\jointime{\cdot}$)}
|
\intertext{(By definition of $\qruntime{\query,\tupset, \bound}$ and assumption on $\jointime{\cdot}$)}
|
||||||
& \le k\qruntime{Q,\tupset}.
|
& \le k\qruntime{\query,\tupset, \bound}.
|
||||||
\end{align*}
|
\end{align*}
|
||||||
|
|
||||||
The property holds for all recursive queries, and the proof holds.
|
The property holds for all recursive queries, and the proof holds.
|
||||||
|
@ -244,7 +244,7 @@ The property holds for all recursive queries, and the proof holds.
|
||||||
|
|
||||||
We next need to show that we can construct the circuit in time linear in the deterministic runtime.
|
We next need to show that we can construct the circuit in time linear in the deterministic runtime.
|
||||||
\begin{Lemma}\label{lem:tlc-is-the-same-as-det}
|
\begin{Lemma}\label{lem:tlc-is-the-same-as-det}
|
||||||
Given a query $\query$ over a \dbbaseName $\tupset$ and the $\circuit^*$ output by \Cref{alg:lc}, the runtime $\timeOf{\lincirc}(\query,\tupset,\circuit^*) \le O(\qruntime{\query, \tupset})$.
|
Given a query $\query$ over a \dbbaseName $\tupset$ and the $\circuit^*$ output by \Cref{alg:lc}, the runtime $\timeOf{\lincirc}(\query,\tupset,\circuit^*) \le O(\qruntime{\query, \tupset, \bound})$.
|
||||||
\end{Lemma}
|
\end{Lemma}
|
||||||
\begin{proof}
|
\begin{proof}
|
||||||
By analysis of \Cref{alg:lc}, invoked as $\circuit^*\gets\lincirc(\query, \tupset, \emptyset, \{v_0\}, \{(v_0, 0)\})$.
|
By analysis of \Cref{alg:lc}, invoked as $\circuit^*\gets\lincirc(\query, \tupset, \emptyset, \{v_0\}, \{(v_0, 0)\})$.
|
||||||
|
@ -254,30 +254,29 @@ We assume that the tuple to sink mapping $\phi$ is a linked hashmap, with $O(1)$
|
||||||
We assume that the n-ary join $\domain(\phi_1) \bowtie \ldots \bowtie\domain(\phi_n)$ can be computed in time $\jointime{\domain(\phi_1), \ldots, \domain(\phi_n)}$ (\Cref{def:join-cost}) and that an intersection $\domain(\phi_1) \cap \domain(\phi_2)$ can be computed in time $O(|\domain(\phi_1)| + |\domain(\phi_2)|)$ (e.g., with a hash table).
|
We assume that the n-ary join $\domain(\phi_1) \bowtie \ldots \bowtie\domain(\phi_n)$ can be computed in time $\jointime{\domain(\phi_1), \ldots, \domain(\phi_n)}$ (\Cref{def:join-cost}) and that an intersection $\domain(\phi_1) \cap \domain(\phi_2)$ can be computed in time $O(|\domain(\phi_1)| + |\domain(\phi_2)|)$ (e.g., with a hash table).
|
||||||
|
|
||||||
|
|
||||||
Before proving our runtime bound, we first observe that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$.
|
Before proving our runtime bound, we first observe that $\qruntime{\query, \tupset, \bound} \geq \Omega(|\query(\db)|)$.
|
||||||
This is true by construction for the relation, projection, and union cases, by \Cref{def:join-cost} for joins, and by the observation that $|\sigma(R)| \leq |R|$.
|
This is true by construction for the relation, projection, and union cases, by \Cref{def:join-cost} for joins, and by the observation that $|\sigma(R)| \leq |R|$.
|
||||||
|
|
||||||
We showthat $\qruntime{\query, \tupset}$ is an upper-bound for the runtime of \Cref{alg:lc} by recursion.
|
We show that $\qruntime{\query, \tupset, \bound}$ is an upper-bound for the runtime of \Cref{alg:lc} by recursion.
|
||||||
The base case of a relation atom requires only an $O(|\tupset.R|)$ iteration over the source tuples.
|
The base case of a relation atom requires only an $O(|\tupset.R|)$ iteration over the source tuples.
|
||||||
For the remaining cases, we make the recursive assumption that for every subquery $\query'$, it holds that $O(\qruntime{\query', \tupset})$ bounds the runtime of \Cref{alg:lc}.
|
For the remaining cases, we make the recursive assumption that for every subquery $\query'$, it holds that $O(\qruntime{\query', \tupset, \bound})$ bounds the runtime of \Cref{alg:lc}.
|
||||||
|
|
||||||
\AH{What is meant by recursive assumption and how is this valid?}
|
|
||||||
|
|
||||||
\caseheading{Selection}
|
\caseheading{Selection}
|
||||||
Selection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset})$.
|
Selection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset, \bound})$.
|
||||||
\Cref{alg:lc} requires a loop over every element of $\query'(\tupset)$.
|
\Cref{alg:lc} requires a loop over every element of $\query'(\tupset)$.
|
||||||
By the observation above that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$, this iteration is also bounded by $O(\qruntime{\query', \tupset})$.
|
By the observation above that $\qruntime{\query, \db, \bound} \geq \Omega(|\query(\db)|)$, this iteration is also bounded by $O(\qruntime{\query', \tupset, \bound})$.
|
||||||
|
|
||||||
\caseheading{Projection}
|
\caseheading{Projection}
|
||||||
Projection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset})$, which in turn is a term in $\qruntime{\pi_{A}\query', \tupset}$.
|
Projection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset})$, which in turn is a term in $\qruntime{\pi_{A}\query', \tupset, \bound}$.
|
||||||
What remains is an iteration over $\pi_{A}(\query(\tupset))$ (lines 13--16), an iteration over $\query'(\tupset)$ (lines 17--19), and the construction of a fan-in tree (line 20).
|
What remains is an iteration over $\pi_{A}(\query(\tupset))$ (lines 13--16), an iteration over $\query'(\tupset)$ (lines 17--19), and the construction of a fan-in tree (line 20).
|
||||||
The first iteration is $O(|\query(\tupset)|) \leq O(\qruntime{\query, \tupset})$.
|
The first iteration is $O(|\query(\tupset)|) \leq O(\qruntime{\query, \tupset, \bound})$.
|
||||||
The second iteration and the construction of the bounded fan-in tree are both $O(|\query'(\tupset)|) \leq O(\qruntime{\query', \tupset}) \leq O(\qruntime{\query, \tupset}) $, by the the observation above that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$.
|
The second iteration and the construction of the bounded fan-in tree are both $O(|\query'(\tupset)|) \leq O(\qruntime{\query', \tupset}) \leq O(\qruntime{\query, \tupset, \bound}) $, by the the observation above that $\qruntime{\query, \db, \bound} \geq \Omega(|\query(\db)|)$.
|
||||||
|
|
||||||
\caseheading{Bag Union}
|
\caseheading{Bag Union}
|
||||||
As above, the recursive calls explicitly correspond to terms in the expansion of $\qruntime{\query_1 \cup \query_2, \tupset}$.
|
As above, the recursive calls explicitly correspond to terms in the expansion of $\qruntime{\query_1 \cup \query_2, \tupset, \bound}$.
|
||||||
Initializing $\phi$ (line 24) can be accomplished in $O(\domain(\phi_1) + \domain(\phi_2)) = O(|\query_1(\tupset)| + |\query_2(\tupset)|) \leq O(\qruntime{\query_1, \tupset} + \qruntime{\query_2, \tupset})$.
|
Initializing $\phi$ (\Cref{alg:lincirc-union-phi}) can be accomplished in $O(\domain(\phi_1) + \domain(\phi_2)) = O(|\query_1(\tupset)| + |\query_2(\tupset)|) \leq O(\qruntime{\query_1, \tupset} + \qruntime{\query_2, \tupset, \bound})$.
|
||||||
The remainder requires computing $\query_1 \cup \query_2$ (line 25) and iterating over it (lines 25--29), which is $O(|\query_1| + |\query_2|)$ as noted above --- this directly corresponds to terms in $\qruntime{\query_1 \cup \query_2, \tupset}$.
|
The remainder requires computing $\query_1 \cap \query_2$ (\Cref{alg:lincirc-union-intersection}) and iterating over it (\Crefrange{alg:lincirc-union-intersection-one}{alg:lincirc-union-intersection-three}), which is $O(|\query_1| + |\query_2|)$ as noted above --- this directly corresponds to terms in $\qruntime{\query_1 \cup \query_2, \tupset, \bound}$.
|
||||||
|
|
||||||
|
|
||||||
\caseheading{$m$-ary Join}
|
\caseheading{$m$-ary Join}
|
||||||
|
|
BIN
main.synctex.gz
BIN
main.synctex.gz
Binary file not shown.
Loading…
Reference in a new issue