diff --git a/appendix.tex b/appendix.tex index 754e533..aae8d75 100644 --- a/appendix.tex +++ b/appendix.tex @@ -13,7 +13,7 @@ \section{Missing details from Section~\ref{sec:background}}\label{sec:proofs-background} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Background details for proof of~\Cref{prop:expection-of-polynom}} +\subsection{Background details for proof of~\Cref{prop:expection-of-polynom}}\label{app:subsec:background-nxdbs} \subsubsection{$\semK$-relations and \abbrNXPDB\xplural}\label{subsec:supp-mat-background}\label{subsec:supp-mat-krelations} \input{app_k-relations} \input{app_notation-background} @@ -39,7 +39,7 @@ \label{sec:circuits-formal} We now formalize circuits and the construction of circuits for $\raPlus$ queries. As mentioned earlier, we represent lineage polynomials as arithmetic circuits over $\mathbb N$-valued variables with $+$, $\times$. -A circuit for query $Q$ and \abbrNXPDB $\pxdb$ is a directed acyclic graph $\tuple{V_{Q,\pxdb}, E_{Q,\pxdb}, \phi_{Q,\pxdb}, \ell_{Q,\pxdb}}$ with vertices $V_{Q,\pxdb}$ and directed edges $E_{Q,\pxdb} \subset {V_{Q,\pxdb}}^2$. +A circuit for query $Q$ and \abbrNXPDB $\pxdb$ \footnote{For background on \abbrNXPDB\xplural, see~\Cref{app:subsec:background-nxdbs}} is a directed acyclic graph $\tuple{V_{Q,\pxdb}, E_{Q,\pxdb}, \phi_{Q,\pxdb}, \ell_{Q,\pxdb}}$ with vertices $V_{Q,\pxdb}$ and directed edges $E_{Q,\pxdb} \subset {V_{Q,\pxdb}}^2$. The sink function $\phi_{Q,\pxdb} : \udom^n \rightarrow V_{Q,\pxdb}$ is a partial function that maps the tuples of the $n$-ary relation $Q(\pxdb)$ to vertices. We require that $\phi_{Q,\pxdb}$'s range be limited to sink vertices (i.e., vertices with out-degree 0). A function $\ell_{Q,\pxdb} : V_{Q,\pxdb} \rightarrow \{\;+,\times\;\}\cup \mathbb N \cup \vct X$ assigns a label to each node: Source nodes (i.e., vertices with in-degree 0) are labeled with constants or variables (i.e., $\mathbb N \cup \vct X$), while the remaining nodes are labeled with the symbol $+$ or $\times$. @@ -54,7 +54,8 @@ Note that we can construct circuits for \bis in time linear in the time required We now connect the size of a circuit (where the size of a circuit is the number of vertices in the corresponding DAG) for a given $\raPlus$ query $Q$ and \abbrNXPDB $\pxdb$ to -the runtime $\qruntime{Q,\dbbase}$ of the PDB's \dbbaseName $\dbbase$. +the runtime $\qruntime{Q,\tupset}$ of the PDB's \dbbaseName $\tupset$. +\AH{@atri: do we use $\tupset$ or $\gentupset$ here?} We do this formally by showing that the size of the circuit is asymptotically no worse than the corresponding runtime of a large class of deterministic query processing algorithms. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -76,38 +77,38 @@ We define the circuit for a $\raPlus$ query $\query$ recursively by cases as fol \begin{algorithm} -\caption{\abbrStepOne$(\query, \dbbase, E, V, \ell)$} +\caption{\abbrStepOne$(\query, \tupset, E, V, \ell)$} \label{alg:lc} \begin{algorithmic}[1] \Require $\query$: query - \Require $\dbbase$: a \dbbaseName + \Require $\tupset$: a \dbbaseName \Require $E, V, \ell$: accumulators for the edge list, vertex list, and vertex label list. - \Ensure $\circuit = \tuple{E, V, \phi, \ell}$: a circuit encoding the lineage of each tuple in $\query(\dbbase)$ - \If{$\query$ is $R$} \Comment{\textbf{Case 1}: $\query$ is a relation atom} - \For{$t \in \dbbase.R$} - \State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, R(t))\}$ \Comment{Allocate a fresh node $v_t$} + \Ensure $\circuit = \tuple{E, V, \phi, \ell}$: a circuit encoding the lineage of each tuple in $\query(\tupset)$ + \If{$\query$ is $\rel$} \Comment{\textbf{Case 1}: $\query$ is a relation atom} + \For{$t \in \tupset.\rel$} + \State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{\inparen{v_t, \rel\inparen{\tup}}\}$ \Comment{Allocate a fresh node $v_t$} \State $\phi(t) \gets v_t$ \EndFor \ElsIf{$\query$ is $\sigma_\theta(\query')$} \Comment{\textbf{Case 2}: $\query$ is a Selection} - \State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \dbbase, V, E, \ell)$ + \State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \tupset, V, E, \ell)$ \For{$t \in \domain(\phi')$} \State \textbf{if }$\theta(t)$ \textbf{ then } $\phi(t) \gets \phi'(t)$ \textbf{ else } $\phi(t) \gets v_0$ \EndFor \ElsIf{$\query$ is $\pi_{\vec{A}}(\query')$} \Comment{\textbf{Case 3}: $\query$ is a Projection} - \State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \dbbase, V, E, \ell)$ - \For{$t \in \pi_{\vec{A}}(\query'(\dbbase))$} + \State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \tupset, V, E, \ell)$ + \For{$t \in \pi_{\vec{A}}(\query'(\tupset))$} \State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$\Comment{Allocate a fresh node $v_t$} \State $\phi(t) \leftarrow v_t$ \EndFor - \For{$t \in \query'(\dbbase)$} + \For{$t \in \query'(\tupset)$} \State $E \leftarrow E \cup \{(\phi'(t), \phi(\pi_{\vec{A}}t))\}$ \EndFor \State Correct nodes with in-degrees $>2$ by appending an equivalent fan-in two tree instead \ElsIf{$\query$ is $\query_1 \cup \query_2$} \Comment{\textbf{Case 4}: $\query$ is a Bag Union} - \State $\tuple{V, E, \phi_1, \ell} \gets \abbrStepOne(\query_1, \dbbase, V, E, \ell)$ - \State $\tuple{V, E, \phi_2, \ell} \gets \abbrStepOne(\query_2, \dbbase, V, E, \ell)$ + \State $\tuple{V, E, \phi_1, \ell} \gets \abbrStepOne(\query_1, \tupset, V, E, \ell)$ + \State $\tuple{V, E, \phi_2, \ell} \gets \abbrStepOne(\query_2, \tupset, V, E, \ell)$ \State $\phi \gets \phi_1 \cup \phi_2$ \For{$t \in \domain(\phi_1) \cap \domain(\phi_2)$} \State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$ \Comment{Allocate a fresh node $v_t$} @@ -116,12 +117,12 @@ We define the circuit for a $\raPlus$ query $\query$ recursively by cases as fol \EndFor \ElsIf{$\query$ is $\query_1 \bowtie \ldots \bowtie \query_m$} \Comment{\textbf{Case 5}: $\query$ is a $m$-ary Join} \For{$i \in [m]$} - \State $\tuple{V, E, \phi_i, \ell} \gets \abbrStepOne(\query_i, \dbbase, V, E, \ell)$ + \State $\tuple{V, E, \phi_i, \ell} \gets \abbrStepOne(\query_i, \tupset, V, E, \ell)$ \EndFor \For{$t \in \domain(\phi_1) \bowtie \ldots \bowtie \domain(\phi_m)$} \State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, \times)\}$ \Comment{Allocate a fresh node $v_t$} \State $\phi(t) \gets v_t$ - \State $E \leftarrow E \cup \comprehension{(\phi_i(\pi_{sch(\query_i(\dbbase))}(t)), v_t)}{i \in [n]}$ + \State $E \leftarrow E \cup \comprehension{(\phi_i(\pi_{sch(\query_i(\tupset))}(t)), v_t)}{i \in [n]}$ \EndFor \State Correct nodes with in-degrees $>2$ by appending an equivalent fan-in two tree instead @@ -134,7 +135,7 @@ We define the circuit for a $\raPlus$ query $\query$ recursively by cases as fol \Cref{alg:lc} defines how the circuit for a query result is constructed. We quickly review the number of vertices emitted in each case. \caseheading{Base Relation} -This circuit has $|D_\Omega.R|$ vertices. +This circuit has $\abs{\tupset.\rel}$ vertices. \caseheading{Selection} If we assume dead sinks are iteratively garbage collected, @@ -159,7 +160,7 @@ We first show that the depth of the circuit (\depth; \Cref{def:size-depth}) is b \begin{Proposition}[Circuit depth is bounded] \label{prop:circuit-depth} -Let $\query$ be a relational query and $\dbbase$ be a \dbbaseName with $n$ tuples. There exists a (lineage) circuit $\circuit^*$ encoding the lineage of all tuples $\tup \in \query(\dbbase)$ for which +Let $\query$ be a relational query and $\tupset$ be a \dbbaseName with $n$ tuples. There exists a (lineage) circuit $\circuit^*$ encoding the lineage of all tuples $\tup \in \query(\tupset)$ for which $\depth(\circuit^*) \leq O(k|\query|\log(n))$. \end{Proposition} @@ -180,18 +181,19 @@ For the projection case, observe that the fan-in is bounded by $|\query'(\dbbase \begin{Lemma}\label{lem:circ-model-runtime} \label{lem:circuits-model-runtime} -Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\dbbase$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\dbbase$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{Q, \dbbase}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$. +Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\tupset$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\tupset$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{Q, \tupset}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$. \end{Lemma} +\AH{Why are the number of vertices considered to be the size of the lineage?} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{proof} -We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{Q, \dbbase}$. For clarity, we implicitly exclude $v_0$ in the proof below. +We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{Q, \tupset}$. For clarity, we implicitly exclude $v_0$ in the proof below. -The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\dbbase.R|=\qruntime{R, \dbbase}$ (note that here the degree $k=1$). -For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{Q_i,\dbbase}$ where $k_i$ is the degree of $Q_i$. +The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\tupset.R|=\qruntime{R, \tupset}$ (note that here the degree $k=1$). +For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{Q_i,\tupset}$ where $k_i$ is the degree of $Q_i$. \caseheading{Selection} Assume that $Q = \sigma_\theta(Q_1)$. -In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\dbbase}|$ vertices, so from the inductive assumption and $\qruntime{Q,\dbbase} = \qruntime{Q_1,\dbbase}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{Q,\dbbase} $. +In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\tupset}|$ vertices, so from the inductive assumption and $\qruntime{Q,\tupset} = \qruntime{Q_1,\tupset}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{Q,\tupset} $. \caseheading{Projection} Assume that $Q = \pi_{\vct A}(Q_1)$. @@ -199,9 +201,9 @@ The circuit for $Q$ has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices. \begin{align*} |V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\ \intertext{(From the inductive assumption)} -& \leq k\qruntime{Q_1,\dbbase} + \abs{Q_1}\\ -\intertext{(By definition of $\qruntime{Q,\dbbase}$)} -& \le k\qruntime{Q,\dbbase}. +& \leq k\qruntime{Q_1,\tupset} + \abs{Q_1}\\ +\intertext{(By definition of $\qruntime{Q,\tupset}$)} +& \le k\qruntime{Q,\tupset}. \end{align*} \caseheading{Union} Assume that $Q = Q_1 \cup Q_2$. @@ -209,9 +211,9 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ ver \begin{align*} |V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\ \intertext{(From the inductive assumption)} -& \leq k(\qruntime{Q_1,\dbbase} + \qruntime{Q_2,\dbbase}) + (|Q_1| + |Q_2|) -\intertext{(By definition of $\qruntime{Q,\dbbase}$)} -& \leq k(\qruntime{Q,\dbbase}). +& \leq k(\qruntime{Q_1,\tupset} + \qruntime{Q_2,\tupset}) + (|Q_1| + |Q_2|) +\intertext{(By definition of $\qruntime{Q,\tupset}$)} +& \leq k(\qruntime{Q,\tupset}). \end{align*} \caseheading{$m$-ary Join} @@ -220,12 +222,12 @@ The circuit for $Q$ has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bow \begin{align*} |V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\ \intertext{From the inductive assumption and noting $\forall i: k_i \leq k$ and $m\le k$} -& \leq k\qruntime{Q_1,\dbbase}+\ldots+k\qruntime{Q_k,\dbbase}+\\ +& \leq k\qruntime{Q_1,\tupset}+\ldots+k\qruntime{Q_k,\tupset}+\\ &\;\;\; (m-1)|{Q_1} \bowtie \ldots \bowtie {Q_m}|\\ -& \leq k(\qruntime{Q_1,\dbbase}+\ldots+\qruntime{Q_m,\dbbase}+\\ -&\;\;\;|{Q_1} \bowtie \ldots \bowtie {Q_m}|)\\ -\intertext{(By definition of $\qruntime{Q,\dbbase}$ and assumption on $\jointime{\cdot}$)} -& \le k\qruntime{Q,\dbbase}. +& \leq k\left(\qruntime{Q_1,\tupset}+\ldots+\qruntime{Q_m,\tupset}+\right.\\ +&\;\;\;\left.|{Q_1} \bowtie \ldots \bowtie {Q_m}|\right)\\ +\intertext{(By definition of $\qruntime{Q,\tupset}$ and assumption on $\jointime{\cdot}$)} +& \le k\qruntime{Q,\tupset}. \end{align*} The property holds for all recursive queries, and the proof holds. diff --git a/main.pdf b/main.pdf index 9c41a59..645c522 100644 Binary files a/main.pdf and b/main.pdf differ diff --git a/main.synctex.gz b/main.synctex.gz index 3c22e1b..d51e2a8 100644 Binary files a/main.synctex.gz and b/main.synctex.gz differ