Done with pass on S2.4

2021-09-18 23:17:00 -04:00 · 2021-09-18 23:17:00 -04:00 · 17a82ec57b
parent 0e26c8d736
commit 17a82ec57b
3 changed files with 44 additions and 8 deletions
--- a/atri.bib
+++ b/atri.bib
@ -123,3 +123,20 @@
  biburl    = {https://dblp.org/rec/conf/pods/JoglekarPR16.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
 }
+
+@article{AGM,
+  author    = {Albert Atserias and
+               Martin Grohe and
+               D{\'{a}}niel Marx},
+  title     = {Size Bounds and Query Plans for Relational Joins},
+  journal   = {{SIAM} J. Comput.},
+  volume    = {42},
+  number    = {4},
+  pages     = {1737--1767},
+  year      = {2013},
+  url       = {https://doi.org/10.1137/110859440},
+  doi       = {10.1137/110859440},
+  timestamp = {Thu, 08 Jun 2017 08:59:24 +0200},
+  biburl    = {https://dblp.org/rec/journals/siamcomp/AtseriasGM13.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
--- a/circuits-model-runtime.tex
+++ b/circuits-model-runtime.tex
@ -30,11 +30,13 @@ To decouple our results from specific join algorithms, we first abstract the cos
 \begin{Definition}[Join Cost]
 \label{def:join-cost}
 Denote by $\jointime{R_1, \ldots, R_n}$ the runtime of an algorithm for computing the n-ary join $R_1 \bowtie \ldots \bowtie R_n$.
-We require only that the algorithm must enumerate its output, i.e., that $\jointime{R_1, \ldots, R_n} \geq \Omega(|R_1(\db) \bowtie \ldots \bowtie R_n(\db)|)$
+We require only that the algorithm must enumerate its output, i.e., that $\jointime{R_1, \ldots, R_n} \geq |R_1 \bowtie \ldots \bowtie R_n|$.
 \end{Definition}

 Worst-case optimal join algorithms~\cite{skew,ngo-survey} and query evaluation via factorized databases~\cite{factorized-db} (as well as work on FAQs~\cite{DBLP:conf/pods/KhamisNR16}) can be modeled as $\raPlus$ queries (though the query size is data dependent).
-For these algorithms, $\jointime{R_1, \ldots, R_n} = |R_1| + \ldots + |R_n| + |R_1(\db) \bowtie \ldots \bowtie R_n(\db)|$.  Our cost model for general query evaluation follows from the join cost:
+For these algorithms, $\jointime{R_1, \ldots, R_n}$ is linear in the {\em AGM bound}~\cite{AGM}.
+% = |R_1| + \ldots + |R_n| + |R_1(\db) \bowtie \ldots \bowtie R_n(\db)|$.  
+ Our cost model for general query evaluation follows from the join cost:

 \noindent\resizebox{1\linewidth}{!}{
 \begin{minipage}{1.0\linewidth}
@ -47,21 +49,21 @@ For these algorithms, $\jointime{R_1, \ldots, R_n} = |R_1| + \ldots + |R_n| + |R
    \qruntime{\query \cup \query', \db} & = \qruntime{\query, \db} + 
                                            \qruntime{\query', \db} +
                                            \abs{\query(D)}+\abs{\query'(D)} \\
-    \qruntime{\query_1 \bowtie \ldots \bowtie \query_n, \db} 
+    \qruntime{\query_1 \bowtie \ldots \bowtie \query_m, \db} 
                                        & = \qruntime{\query_1, \db} + \ldots + 
-                                            \qruntime{\query_n,\db} + 
-                                            \jointime{\query_1(\db), \ldots, \query_n(\db)}
+                                            \qruntime{\query_m,\db} + 
+                                            \jointime{\query_1(\db), \ldots, \query_m(\db)}
 \end{align*}
 \end{minipage}
 }\\


-Under this model, a query $Q$ evaluated over database $\db$ has runtime $O(\qruntime{Q,\db})$.
+Under this model, an $\raPlus$ query $\query$ evaluated over database $\db$ has runtime $O(\qruntime{Q,\db})$.
 We assume that full table scans are used for every base relation access. We can model index scans by treating an index scan query $\sigma_\theta(R)$ as a base relation.
-Observe that 
+%Observe that 
 % () .\footnote{This claim can be verified by e.g. simply looking at the {\em Generic-Join} algorithm in~\cite{skew} and {\em factorize} algorithm in~\cite{factorized-db}.} It can be verified that the above cost model on the corresponding $\raPlus$ join queries correctly captures the runtime of current best known .

-More specifically \Cref{lem:circ-model-runtime} and \Cref{lem:tlc-is-the-same-as-det} show that for any $\raPlus$ query $\query$ and $\dbbase$, there exists a circuit $\circuit|$ such that $\timeOf{\abbrStepOne}(Q,\dbbase,\circuit)$ and $|\circuit$ are both $O(\qruntime{Q, \dbbase})$. Recall we assumed these two bounds when we moved from \Cref{prob:big-o-joint-steps} to \Cref{prob:intro-stmt}.
+More specifically \Cref{lem:circ-model-runtime} and \Cref{lem:tlc-is-the-same-as-det} show that for any $\raPlus$ query $\query$ and $\dbbase$, there exists a circuit $\circuit^*$ such that $\timeOf{\abbrStepOne}(Q,\dbbase,\circuit^*)$ and $|\circuit^*|$ are both $O(\qruntime{Q, \dbbase})$. Recall we assumed these two bounds when we moved from \Cref{prob:big-o-joint-steps} to \Cref{prob:intro-stmt}.
 %
 %We now make a simple observation on the above cost model:
 %\begin{proposition}
--- a/main.bib
+++ b/main.bib
@ -647,3 +647,20 @@ Maximilian Schleich},
  year =         2008,
  publisher =    {Springer Science \& Business Media}
 }
+
+@article{AGM,
+  author    = {Albert Atserias and
+               Martin Grohe and
+               D{\'{a}}niel Marx},
+  title     = {Size Bounds and Query Plans for Relational Joins},
+  journal   = {{SIAM} J. Comput.},
+  volume    = {42},
+  number    = {4},
+  pages     = {1737--1767},
+  year      = {2013},
+  url       = {https://doi.org/10.1137/110859440},
+  doi       = {10.1137/110859440},
+  timestamp = {Thu, 08 Jun 2017 08:59:24 +0200},
+  biburl    = {https://dblp.org/rec/journals/siamcomp/AtseriasGM13.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}