diff --git a/appendix.tex b/appendix.tex index 39aaa0e..87131ae 100644 --- a/appendix.tex +++ b/appendix.tex @@ -112,11 +112,11 @@ We define the circuit for a $\raPlus$ query $\query$ recursively by cases as fol \ElsIf{$\query$ is $\query_1 \cup \query_2$} \Comment{\textbf{Case 4}: $\query$ is a Bag Union} \State $\tuple{V', E', \phi_1, \ell'} \gets \lincirc(\query_1, \tupset, V, E, \ell)$ \State $\tuple{V, E, \phi_2, \ell} \gets \lincirc(\query_2, \tupset, V', E', \ell')$ - \State $\phi \gets \phi_1 \cup \phi_2$ - \For{$t \in \domain(\phi_1) \cap \domain(\phi_2)$} - \State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$ \Comment{Allocate a fresh node $v_t$} - \State $\phi(t) \gets v_t$ - \State $E \leftarrow E \cup \{(\phi_1(t), v_t), (\phi_2(t), v_t)\}$ + \State $\phi \gets \phi_1 \cup \phi_2$\label{alg:lincirc-union-phi} + \For{$t \in \domain(\phi_1) \cap \domain(\phi_2)$}\label{alg:lincirc-union-intersection} + \State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$\label{alg:lincirc-union-intersection-one} \Comment{Allocate a fresh node $v_t$} + \State $\phi(t) \gets v_t$\label{alg:lincirc-union-intersection-two} + \State $E \leftarrow E \cup \{(\phi_1(t), v_t), (\phi_2(t), v_t)\}$\label{alg:lincirc-union-intersection-three} \EndFor \State\Return $\tuple{V, E, \phi, \ell}$ \ElsIf{$\query$ is $\query_1 \bowtie \ldots \bowtie \query_m$} \Comment{\textbf{Case 5}: $\query$ is a $m$-ary Join} @@ -186,19 +186,19 @@ For the projection case, observe that the fan-in is bounded by $|\query'(\dbbase \begin{Lemma}\label{lem:circ-model-runtime} \label{lem:circuits-model-runtime} -Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\tupset$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\tupset$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{Q, \tupset}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$. +Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\tupset$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\tupset$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{\query, \tupset, \bound}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$. \end{Lemma} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{proof} -We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{Q, \tupset}$. For clarity, we implicitly exclude $v_0$ in the proof below. +We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{\query, \tupset, \bound}$. For clarity, we implicitly exclude $v_0$ in the proof below. -The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\tupset.R|=\qruntime{R, \tupset}$ (note that here the degree $k=1$). -For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{Q_i,\tupset}$ where $k_i$ is the degree of $Q_i$. +The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\tupset.R|=\qruntime{\rel, \tupset, \bound}$ (note that here the degree $k=1$). +For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{\query_i,\tupset, \bound}$ where $k_i$ is the degree of $Q_i$. \caseheading{Selection} Assume that $Q = \sigma_\theta(Q_1)$. -In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\tupset}|$ vertices, so from the inductive assumption and $\qruntime{Q,\tupset} = \qruntime{Q_1,\tupset}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{Q,\tupset} $. +In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\tupset}|$ vertices, so from the inductive assumption and $\qruntime{\query,\tupset, \bound} = \qruntime{\query_1,\tupset, \bound}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{\query,\tupset, \bound} $. \caseheading{Projection} Assume that $Q = \pi_{\vct A}(Q_1)$. @@ -206,9 +206,9 @@ The circuit for $Q$ has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices. \begin{align*} |V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\ \intertext{(From the inductive assumption)} -& \leq k\qruntime{Q_1,\tupset} + \abs{Q_1}\\ -\intertext{(By definition of $\qruntime{Q,\tupset}$)} -& \le k\qruntime{Q,\tupset}. +& \leq k\qruntime{\query_1,\tupset, \bound} + \abs{Q_1}\\ +\intertext{(By definition of $\qruntime{\query,\tupset, \bound}$)} +& \le k\qruntime{\query,\tupset, \bound}. \end{align*} \caseheading{Union} Assume that $Q = Q_1 \cup Q_2$. @@ -216,9 +216,9 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ ver \begin{align*} |V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\ \intertext{(From the inductive assumption)} -& \leq k(\qruntime{Q_1,\tupset} + \qruntime{Q_2,\tupset}) + (|Q_1| + |Q_2|) -\intertext{(By definition of $\qruntime{Q,\tupset}$)} -& \leq k(\qruntime{Q,\tupset}). +& \leq k(\qruntime{\query_1,\tupset, \bound} + \qruntime{\query_2,\tupset, \bound}) + (|Q_1| + |Q_2|) +\intertext{(By definition of $\qruntime{\query, \tupset, \bound}$)} +& \leq k(\qruntime{\query,\tupset, \bound}). \end{align*} \caseheading{$m$-ary Join} @@ -227,12 +227,12 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bow \begin{align*} |V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\ \intertext{From the inductive assumption and noting $\forall i: k_i \leq k$ and $m\le k$} -& \leq k\qruntime{Q_1,\tupset}+\ldots+k\qruntime{Q_k,\tupset}+\\ +& \leq k\qruntime{\query_1,\tupset, \bound}+\ldots+k\qruntime{\query_k,\tupset, \bound}+\\ &\;\;\; (m-1)|{Q_1} \bowtie \ldots \bowtie {Q_m}|\\ -& \leq k\left(\qruntime{Q_1,\tupset}+\ldots+\qruntime{Q_m,\tupset}+\right.\\ +& \leq k\left(\qruntime{\query_1, \tupset, \bound}+\ldots+\qruntime{\query_1, \tupset, \bound}+\right.\\ &\;\;\;\left.|{Q_1} \bowtie \ldots \bowtie {Q_m}|\right)\\ -\intertext{(By definition of $\qruntime{Q,\tupset}$ and assumption on $\jointime{\cdot}$)} -& \le k\qruntime{Q,\tupset}. +\intertext{(By definition of $\qruntime{\query,\tupset, \bound}$ and assumption on $\jointime{\cdot}$)} +& \le k\qruntime{\query,\tupset, \bound}. \end{align*} The property holds for all recursive queries, and the proof holds. @@ -244,7 +244,7 @@ The property holds for all recursive queries, and the proof holds. We next need to show that we can construct the circuit in time linear in the deterministic runtime. \begin{Lemma}\label{lem:tlc-is-the-same-as-det} -Given a query $\query$ over a \dbbaseName $\tupset$ and the $\circuit^*$ output by \Cref{alg:lc}, the runtime $\timeOf{\lincirc}(\query,\tupset,\circuit^*) \le O(\qruntime{\query, \tupset})$. +Given a query $\query$ over a \dbbaseName $\tupset$ and the $\circuit^*$ output by \Cref{alg:lc}, the runtime $\timeOf{\lincirc}(\query,\tupset,\circuit^*) \le O(\qruntime{\query, \tupset, \bound})$. \end{Lemma} \begin{proof} By analysis of \Cref{alg:lc}, invoked as $\circuit^*\gets\lincirc(\query, \tupset, \emptyset, \{v_0\}, \{(v_0, 0)\})$. @@ -254,30 +254,29 @@ We assume that the tuple to sink mapping $\phi$ is a linked hashmap, with $O(1)$ We assume that the n-ary join $\domain(\phi_1) \bowtie \ldots \bowtie\domain(\phi_n)$ can be computed in time $\jointime{\domain(\phi_1), \ldots, \domain(\phi_n)}$ (\Cref{def:join-cost}) and that an intersection $\domain(\phi_1) \cap \domain(\phi_2)$ can be computed in time $O(|\domain(\phi_1)| + |\domain(\phi_2)|)$ (e.g., with a hash table). -Before proving our runtime bound, we first observe that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$. +Before proving our runtime bound, we first observe that $\qruntime{\query, \tupset, \bound} \geq \Omega(|\query(\db)|)$. This is true by construction for the relation, projection, and union cases, by \Cref{def:join-cost} for joins, and by the observation that $|\sigma(R)| \leq |R|$. -We showthat $\qruntime{\query, \tupset}$ is an upper-bound for the runtime of \Cref{alg:lc} by recursion. +We show that $\qruntime{\query, \tupset, \bound}$ is an upper-bound for the runtime of \Cref{alg:lc} by recursion. The base case of a relation atom requires only an $O(|\tupset.R|)$ iteration over the source tuples. -For the remaining cases, we make the recursive assumption that for every subquery $\query'$, it holds that $O(\qruntime{\query', \tupset})$ bounds the runtime of \Cref{alg:lc}. +For the remaining cases, we make the recursive assumption that for every subquery $\query'$, it holds that $O(\qruntime{\query', \tupset, \bound})$ bounds the runtime of \Cref{alg:lc}. -\AH{What is meant by recursive assumption and how is this valid?} \caseheading{Selection} -Selection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset})$. +Selection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset, \bound})$. \Cref{alg:lc} requires a loop over every element of $\query'(\tupset)$. -By the observation above that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$, this iteration is also bounded by $O(\qruntime{\query', \tupset})$. +By the observation above that $\qruntime{\query, \db, \bound} \geq \Omega(|\query(\db)|)$, this iteration is also bounded by $O(\qruntime{\query', \tupset, \bound})$. \caseheading{Projection} -Projection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset})$, which in turn is a term in $\qruntime{\pi_{A}\query', \tupset}$. +Projection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \tupset})$, which in turn is a term in $\qruntime{\pi_{A}\query', \tupset, \bound}$. What remains is an iteration over $\pi_{A}(\query(\tupset))$ (lines 13--16), an iteration over $\query'(\tupset)$ (lines 17--19), and the construction of a fan-in tree (line 20). -The first iteration is $O(|\query(\tupset)|) \leq O(\qruntime{\query, \tupset})$. -The second iteration and the construction of the bounded fan-in tree are both $O(|\query'(\tupset)|) \leq O(\qruntime{\query', \tupset}) \leq O(\qruntime{\query, \tupset}) $, by the the observation above that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$. +The first iteration is $O(|\query(\tupset)|) \leq O(\qruntime{\query, \tupset, \bound})$. +The second iteration and the construction of the bounded fan-in tree are both $O(|\query'(\tupset)|) \leq O(\qruntime{\query', \tupset}) \leq O(\qruntime{\query, \tupset, \bound}) $, by the the observation above that $\qruntime{\query, \db, \bound} \geq \Omega(|\query(\db)|)$. \caseheading{Bag Union} -As above, the recursive calls explicitly correspond to terms in the expansion of $\qruntime{\query_1 \cup \query_2, \tupset}$. -Initializing $\phi$ (line 24) can be accomplished in $O(\domain(\phi_1) + \domain(\phi_2)) = O(|\query_1(\tupset)| + |\query_2(\tupset)|) \leq O(\qruntime{\query_1, \tupset} + \qruntime{\query_2, \tupset})$. -The remainder requires computing $\query_1 \cup \query_2$ (line 25) and iterating over it (lines 25--29), which is $O(|\query_1| + |\query_2|)$ as noted above --- this directly corresponds to terms in $\qruntime{\query_1 \cup \query_2, \tupset}$. +As above, the recursive calls explicitly correspond to terms in the expansion of $\qruntime{\query_1 \cup \query_2, \tupset, \bound}$. +Initializing $\phi$ (\Cref{alg:lincirc-union-phi}) can be accomplished in $O(\domain(\phi_1) + \domain(\phi_2)) = O(|\query_1(\tupset)| + |\query_2(\tupset)|) \leq O(\qruntime{\query_1, \tupset} + \qruntime{\query_2, \tupset, \bound})$. +The remainder requires computing $\query_1 \cap \query_2$ (\Cref{alg:lincirc-union-intersection}) and iterating over it (\Crefrange{alg:lincirc-union-intersection-one}{alg:lincirc-union-intersection-three}), which is $O(|\query_1| + |\query_2|)$ as noted above --- this directly corresponds to terms in $\qruntime{\query_1 \cup \query_2, \tupset, \bound}$. \caseheading{$m$-ary Join} diff --git a/main.pdf b/main.pdf index a550480..3078a28 100644 Binary files a/main.pdf and b/main.pdf differ diff --git a/main.synctex.gz b/main.synctex.gz index 2ae6d0f..3ec0ad6 100644 Binary files a/main.synctex.gz and b/main.synctex.gz differ