Done with pass on S2.4

master
Atri Rudra 2021-09-18 23:17:00 -04:00
parent 0e26c8d736
commit 17a82ec57b
3 changed files with 44 additions and 8 deletions

View File

@ -123,3 +123,20 @@
biburl = {https://dblp.org/rec/conf/pods/JoglekarPR16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{AGM,
author = {Albert Atserias and
Martin Grohe and
D{\'{a}}niel Marx},
title = {Size Bounds and Query Plans for Relational Joins},
journal = {{SIAM} J. Comput.},
volume = {42},
number = {4},
pages = {1737--1767},
year = {2013},
url = {https://doi.org/10.1137/110859440},
doi = {10.1137/110859440},
timestamp = {Thu, 08 Jun 2017 08:59:24 +0200},
biburl = {https://dblp.org/rec/journals/siamcomp/AtseriasGM13.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}

View File

@ -30,11 +30,13 @@ To decouple our results from specific join algorithms, we first abstract the cos
\begin{Definition}[Join Cost]
\label{def:join-cost}
Denote by $\jointime{R_1, \ldots, R_n}$ the runtime of an algorithm for computing the n-ary join $R_1 \bowtie \ldots \bowtie R_n$.
We require only that the algorithm must enumerate its output, i.e., that $\jointime{R_1, \ldots, R_n} \geq \Omega(|R_1(\db) \bowtie \ldots \bowtie R_n(\db)|)$
We require only that the algorithm must enumerate its output, i.e., that $\jointime{R_1, \ldots, R_n} \geq |R_1 \bowtie \ldots \bowtie R_n|$.
\end{Definition}
Worst-case optimal join algorithms~\cite{skew,ngo-survey} and query evaluation via factorized databases~\cite{factorized-db} (as well as work on FAQs~\cite{DBLP:conf/pods/KhamisNR16}) can be modeled as $\raPlus$ queries (though the query size is data dependent).
For these algorithms, $\jointime{R_1, \ldots, R_n} = |R_1| + \ldots + |R_n| + |R_1(\db) \bowtie \ldots \bowtie R_n(\db)|$. Our cost model for general query evaluation follows from the join cost:
For these algorithms, $\jointime{R_1, \ldots, R_n}$ is linear in the {\em AGM bound}~\cite{AGM}.
% = |R_1| + \ldots + |R_n| + |R_1(\db) \bowtie \ldots \bowtie R_n(\db)|$.
Our cost model for general query evaluation follows from the join cost:
\noindent\resizebox{1\linewidth}{!}{
\begin{minipage}{1.0\linewidth}
@ -47,21 +49,21 @@ For these algorithms, $\jointime{R_1, \ldots, R_n} = |R_1| + \ldots + |R_n| + |R
\qruntime{\query \cup \query', \db} & = \qruntime{\query, \db} +
\qruntime{\query', \db} +
\abs{\query(D)}+\abs{\query'(D)} \\
\qruntime{\query_1 \bowtie \ldots \bowtie \query_n, \db}
\qruntime{\query_1 \bowtie \ldots \bowtie \query_m, \db}
& = \qruntime{\query_1, \db} + \ldots +
\qruntime{\query_n,\db} +
\jointime{\query_1(\db), \ldots, \query_n(\db)}
\qruntime{\query_m,\db} +
\jointime{\query_1(\db), \ldots, \query_m(\db)}
\end{align*}
\end{minipage}
}\\
Under this model, a query $Q$ evaluated over database $\db$ has runtime $O(\qruntime{Q,\db})$.
Under this model, an $\raPlus$ query $\query$ evaluated over database $\db$ has runtime $O(\qruntime{Q,\db})$.
We assume that full table scans are used for every base relation access. We can model index scans by treating an index scan query $\sigma_\theta(R)$ as a base relation.
Observe that
%Observe that
% () .\footnote{This claim can be verified by e.g. simply looking at the {\em Generic-Join} algorithm in~\cite{skew} and {\em factorize} algorithm in~\cite{factorized-db}.} It can be verified that the above cost model on the corresponding $\raPlus$ join queries correctly captures the runtime of current best known .
More specifically \Cref{lem:circ-model-runtime} and \Cref{lem:tlc-is-the-same-as-det} show that for any $\raPlus$ query $\query$ and $\dbbase$, there exists a circuit $\circuit|$ such that $\timeOf{\abbrStepOne}(Q,\dbbase,\circuit)$ and $|\circuit$ are both $O(\qruntime{Q, \dbbase})$. Recall we assumed these two bounds when we moved from \Cref{prob:big-o-joint-steps} to \Cref{prob:intro-stmt}.
More specifically \Cref{lem:circ-model-runtime} and \Cref{lem:tlc-is-the-same-as-det} show that for any $\raPlus$ query $\query$ and $\dbbase$, there exists a circuit $\circuit^*$ such that $\timeOf{\abbrStepOne}(Q,\dbbase,\circuit^*)$ and $|\circuit^*|$ are both $O(\qruntime{Q, \dbbase})$. Recall we assumed these two bounds when we moved from \Cref{prob:big-o-joint-steps} to \Cref{prob:intro-stmt}.
%
%We now make a simple observation on the above cost model:
%\begin{proposition}

View File

@ -647,3 +647,20 @@ Maximilian Schleich},
year = 2008,
publisher = {Springer Science \& Business Media}
}
@article{AGM,
author = {Albert Atserias and
Martin Grohe and
D{\'{a}}niel Marx},
title = {Size Bounds and Query Plans for Relational Joins},
journal = {{SIAM} J. Comput.},
volume = {42},
number = {4},
pages = {1737--1767},
year = {2013},
url = {https://doi.org/10.1137/110859440},
doi = {10.1137/110859440},
timestamp = {Thu, 08 Jun 2017 08:59:24 +0200},
biburl = {https://dblp.org/rec/journals/siamcomp/AtseriasGM13.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}