paper-BagRelationalPDBsAreHard/appendix.tex

153 lines
8.2 KiB
TeX

%!TEX root=./main.tex
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Missing details from Section~\ref{sec:background}}\label{sec:proofs-background}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Supplementary Material for~\Cref{prop:expection-of-polynom}}\label{subsec:supp-mat-background}
\input{app_notation-background}
\section{Missing details from Section~\ref{sec:hard}}
\label{app:single-mult-p}
\input{app_hardness-results}
\section{Missing Details from Section~\ref{sec:algo}}\label{sec:proofs-approx-alg}
\input{app_approx-alg-analysis}
\input{app_one-pass-analysis}
\input{app_samp-monom-analysis}
\subsection{Experimental Results}\label{app:subsec:experiment}
\input{experiments}
\section{Circuits}\label{app:sec-cicuits}
\subsection{Representing Polynomials with Circuits}\label{app:subsec-rep-poly-lin-circ}
\newcommand{\getpoly}[1]{\textbf{lin}\inparen{#1}}
Each vertex $v \in V_{Q,\pxdb}$ in the arithmetic circuit for
\[\tuple{V_{Q,\pxdb}, E_{Q,\pxdb}, \phi_{Q,\pxdb}, \ell_{Q,\pxdb}}\]
encodes a polynomial, realized as
\[\getpoly{v} = \begin{cases}
\sum_{v' : (v',v) \in E_{Q,\pxdb}} \getpoly{v'} & \textbf{if } \ell(v) = +\\
\prod_{v' : (v',v) \in E_{Q,\pxdb}} \getpoly{v'} & \textbf{if } \ell(v) = \times\\
\ell(v) & \textbf{otherwise}
\end{cases}\]
We define the circuit for a select-union-project-join $Q$ recursively by cases as follows. In each case, let $\tuple{V_{Q_i,\pxdb}, E_{Q_i,\pxdb}, \phi_{Q_{i},\pxdb}, \ell_{Q_i,\pxdb}}$ denote the circuit for subquery $Q_i$.
\caseheading{Base Relation}
Let $Q$ be a base relation $R$. We define one node for each tuple. Formally, let $V_{Q,\pxdb} = \comprehension{v_t}{t\in R}$, let $\phi_{Q,\pxdb}(t) = v_t$, let $\ell_{Q,\pxdb}(v_t) = R(t)$, and let $E_{Q,\pxdb} = \emptyset$.
This circuit has $|R|$ vertices.
\caseheading{Selection}
Let $Q = \sigma_\theta \inparen{Q_1}$.
We re-use the circuit for $Q_1$. %, but define a new distinguished node $v_0$ with label $0$ and make it the sink node for all tuples that fail the selection predicate.
Formally, let $V_{Q,\pxdb} = V_{Q_1,\pxdb}$, let $\ell_{Q,\pxdb}(v_0) = 0$, and let $\ell_{Q,\pxdb}(v) = \ell_{Q_1,\pxdb}(v)$ for any $v \in V_{Q_1,\pxdb}$. Let $E_{Q,\pxdb} = E_{Q_1,\pxdb}$, and define
$$\phi_{Q,\pxdb}(t) =
\phi_{Q_{1}, \pxdb}(t) \text{ for } t \text{ s.t.}\; \theta(t).$$
Dead sinks are iteratively removed, and so
%\AH{While not explicit, I assume a reviewer would know that the notation above discards tuples/vertices not satisfying the selection predicate.}
%v_0 & \textbf{otherwise}
%\end{cases}$$
this circuit has at most $|V_{Q_1,\pxdb}|$ vertices.
\caseheading{Projection}
Let $Q = \pi_{\vct A} {Q_1}$.
We extend the circuit for ${Q_1}$ with a new set of sum vertices (i.e., vertices with label $+$) for each tuple in $Q$, and connect them to the corresponding sink nodes of the circuit for ${Q_1}$.
Naively, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup \comprehension{v_t}{t \in \pi_{\vct A} {Q_1}}$, let $\phi_{Q,\pxdb}(t) = v_t$, and let $\ell_{Q,\pxdb}(v_t) = +$. Finally let
$$E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup \comprehension{(\phi_{Q_{1}, \pxdb}(t'), v_t)}{t = \pi_{\vct A} t', t' \in {Q_1}, t \in \pi_{\vct A} {Q_1}}$$
This formulation will produce vertices with an in-degree greater than two, a problem that we correct by replacing every vertex with an in-degree over two by an equivalent fan-in tree. The resulting structure has at most $|{Q_1}|-1$ new vertices.
% \AH{Is the rightmost operator \emph{supposed} to be a $-$? In the beginning we add $|\pi_{\vct A}{Q_1}|$ vertices.}
The corrected circuit thus has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
\caseheading{Union}
Let $Q = {Q_1} \cup {Q_2}$.
We merge graphs and produce a sum vertex for all tuples in both sides of the union.
Formally, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup V_{Q_2,\pxdb} \cup \comprehension{v_t}{t \in {Q_1} \cap {Q_2}}$, let $\ell_{Q,\pxdb}(v_t) = +$, and let
\[E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup E_{Q_2,\pxdb} \cup \comprehension{(\phi_{Q_{1}, \pxdb}(t), v_t), (\phi_{Q_{2}, \pxdb}(t), v_t)}{t \in {Q_1} \cap {Q_2}}\]
\[
\phi_{Q,\pxdb}(t) = \begin{cases}
v_t & \textbf{if } t \in {Q_1} \cap {Q_1}\\
\phi_{Q_{1}, \pxdb}(t) & \textbf{if } t \not \in {Q_2}\\
\phi_{Q_{2}, \pxdb}(t) & \textbf{if } t \not \in {Q_1}\\
\end{cases}\]
This circuit has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ vertices.
\caseheading{$k$-ary Join}
Let $Q = {Q_1} \bowtie \ldots \bowtie {Q_k}$.
We merge graphs and produce a multiplication vertex for all tuples resulting from the join
Naively, let $V_{Q,\pxdb} = V_{Q_1,\pxdb} \cup \ldots \cup V_{Q_k,\pxdb} \cup \comprehension{v_t}{t \in {Q_1} \bowtie \ldots \bowtie {Q_k}}$, let
{\small
\begin{multline*}
E_{Q,\pxdb} = E_{Q_1,\pxdb} \cup \ldots \cup E_{Q_k,\pxdb} \cup
\left\{\;
(\phi_{Q_{1}, \pxdb}(\pi_{\sch({Q_1})}t), v_t), \right.\\
\ldots, (\phi_{Q_k,\pxdb}(\pi_{\sch({Q_k})}t), v_t)
\;\left|\;t \in {Q_1} \bowtie \ldots \bowtie {Q_k}\;\right\}
\end{multline*}
}
Let $\ell_{Q,\pxdb}(v_t) = \times$, and let $\phi_{Q,\pxdb}(t) = v_t$
As in projection, newly created vertices will have an in-degree of $k$, and a fan-in tree is required.
There are $|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ such vertices, so the corrected circuit has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Proof for~\Cref{lem:circuits-model-runtime}}\label{app:subsec-lem-lin-vs-qplan}
Proof by induction. The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |R|$.
For the inductive step, we assume that we have circuits for subplans $Q_1, \ldots, Q_n$ such that $|V_{Q_i,\pxdb}| \leq (k_i-1)\qruntime{Q_i,\pxdb}$ where $k_i$ is the degree of $Q_i$.
\caseheading{Selection}
Assume that $Q = \sigma_\theta(Q_1)$.
In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\pxdb}|$ vertices, so from the inductive assumption and $\qruntime{Q,\pxdb} = \qruntime{Q_1,\pxdb}$ by definition, we have $|V_{Q,\pxdb}| \leq (k-1) \qruntime{Q,\pxdb} $.
% \AH{Technically, $\kElem$ is the degree of $\poly_1$, but I guess this is a moot point since one can argue that $\kElem$ is also the degree of $\poly$.}
% OK: Correct
\caseheading{Projection}
Assume that $Q = \pi_{\vct A}(Q_1)$.
The circuit for $Q$ has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
% \AH{The combination of terms above doesn't follow the details for projection above.}
\begin{align*}
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\
%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1,\pxdb} \geq |Q_1|$}
%& \leq |V_{Q_1,\pxdb}| + 2 \qruntime{Q_1,\pxdb}\\
\intertext{(From the inductive assumption)}
& \leq (k-1)\qruntime{Q_1,\pxdb} + \abs{Q_1}\\
\intertext{(By definition of $\qruntime{Q,\pxdb}$)}
& \le (k-1)\qruntime{Q,\pxdb}.
\end{align*}
\caseheading{Union}
Assume that $Q = Q_1 \cup Q_2$.
The circuit for $Q$ has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ vertices.
\begin{align*}
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\
%\intertext{By \Cref{prop:queries-need-to-output-tuples} $\qruntime{Q_1,\pxdb} \geq |Q_1|$}
%& \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+\qruntime{Q_1,\pxdb}+\qruntime{Q_2,\pxdb}|\\
\intertext{(From the inductive assumption)}
& \leq (k-1)(\qruntime{Q_1,\pxdb} + \qruntime{Q_2,\pxdb}) + (b_1 + b_2)
\intertext{(By definition of $\qruntime{Q,\pxdb}$)}
& \leq (k-1)(\qruntime{Q,\pxdb}).
\end{align*}
\caseheading{$k$-ary Join}
Assume that $Q = Q_1 \bowtie \ldots \bowtie Q_k$.
The circuit for $Q$ has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
\begin{align*}
|V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
\intertext{From the inductive assumption and noting $\forall i: k_i \leq k-1$}
& \leq (k-1)\qruntime{Q_1,\pxdb}+\ldots+(k-1)\qruntime{Q_k,\pxdb}+\\
&\;\;\; (k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
& \leq (k-1)(\qruntime{Q_1,\pxdb}+\ldots+\qruntime{Q_k,\pxdb}+\\
&\;\;\;|{Q_1} \bowtie \ldots \bowtie {Q_k}|)\\
\intertext{(By definition of $\qruntime{Q,\pxdb}$)}
& = (k-1)\qruntime{Q,\pxdb}.
\end{align*}
The property holds for all recursive queries, and the proof holds.
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End: