Trimming and adding experiments

main
Oliver Kennedy 2023-07-20 11:37:39 -04:00
parent cca999be40
commit e632147da8
Signed by: okennedy
GPG Key ID: 3E5F9B3ABD3FDB60
13 changed files with 147 additions and 43 deletions

View File

@ -0,0 +1,69 @@
import matplotlib.pyplot as plt
import csv
import numpy as np
with open("tpch.csv", newline='') as csvfile:
data = list(csv.DictReader(csvfile))
configs = [
("Astral-Raw", '#B09ABA'),
("Astral-BDD", '#0062A0'),
# ("Spark-Fair", '#409EDA'),
]
x = np.arange(len(data))
print(data)
experiments = [x["experiment"] for x in data]
print(experiments)
width = 0.35 # the width of the bars
multiplier = 0
fig, ax = plt.subplots(layout='constrained')
fig.set_size_inches(6, 2.5)
for config, color in configs:
offset = width * multiplier
results = [
float(row[config.lower()+"-avg"]) / float(row["astral-raw-avg"])
for row in data
]
print(results)
rects = ax.bar(
x + offset,
results,
width,
color=color,
label=config.replace("BDD", "Shared")
.replace("Raw", "Iterative")
.replace("Spark-Fair", "Native Spark Rules")
)
multiplier += 1
ax.set_ylabel("Rewrite Time (relative)")
ax.set_xlabel("TPC-H Query")
ax.set_xticks(x+width, [n.replace("tpch_", "Q ") for n in experiments])
ax.legend(loc='upper right')
ax.tick_params(axis='x', rotation=90)
ax.set_ylim([0, 1.75])
runtimes = [
float(row["astral-raw-avg"])
for row in data
]
print("Min Base Runtime: "+str(min(runtimes)*1000))
print("Max Base Runtime: "+str(max(runtimes)*1000))
offsets = [
(float(row["astral-raw-avg"]) - float(row["astral-bdd-avg"])) / float(row["astral-raw-avg"])
for row in data
]
print("Min % improvement: "+str(min(offsets)*100))
print("Max % improvement: "+str(max(offsets)*100))
# plt.show()
plt.savefig("../../figures/tpch.pdf")

View File

@ -0,0 +1,23 @@
experiment,astral-raw-avg,astral-raw-stddev,astral-raw-iter,astral-bdd-avg,astral-bdd-stddev,astral-bdd-iter,spark-fair-avg,spark-fair-stddev,spark-fair-iter,spark-full-avg,spark-full-stddev,spark-full-iter
tpch_1,1.840094E-4,2.1781759144752242E-5,2,9.29987E-5,4.419075419372244E-5,2,1.5714089999999998E-4,1.7659918399868424E-5,2,0.007647055599999998,0.0026875984053911536,0
tpch_2,0.0038820162000000004,5.629520118823218E-4,3,0.0130002549,9.422579563034217E-4,100,6.065052E-4,4.843660088775851E-5,2,0.0203240168,0.0011065059343584869,0
tpch_3,2.7122789999999995E-4,1.4648936544678944E-5,3,1.9404859999999998E-4,1.2800727215280034E-5,2,1.615399E-4,1.4713979226911947E-5,2,0.0051231492,5.729715029046055E-4,0
tpch_4,9.43118E-5,1.377914463875026E-4,2,2.2017399999999997E-5,2.0900300572001573E-6,2,4.32168E-5,1.2745027130610603E-5,2,0.0041803599,8.070434000531492E-5,0
tpch_5,7.934705999999998E-4,1.7888404714794493E-5,3,6.464419999999999E-4,2.0244732114804498E-5,2,4.760407E-4,6.934067306421708E-6,2,0.006129368800000001,1.341364917707054E-4,0
tpch_6,3.57857E-5,1.2246298257025344E-6,2,1.5809299999999998E-5,3.512520720223593E-6,2,2.81285E-5,1.1864128634248708E-5,2,0.0019322505000000001,1.9209639696998393E-4,0
tpch_7,7.456517E-4,2.1033036609343995E-5,3,5.260697999999998E-4,1.4001399292934934E-5,2,4.5663539999999997E-4,8.659344411674894E-6,2,0.009379533399999999,0.0026803466425624986,0
tpch_8,0.0020564648,2.434196759017669E-5,3,0.0025151252999999997,1.178515395795706E-5,4,0.0015626877000000001,3.4370261151147756E-4,2,0.010790846400000002,8.961831118822865E-4,0
tpch_9,9.167828000000002E-4,1.710784336378299E-5,3,0.001086863,6.0831552010564396E-6,4,6.111141999999999E-4,1.0169319837634386E-5,2,0.0070050528999999985,5.062979845870654E-4,0
tpch_10,2.439394E-4,7.628740868583057E-6,3,2.0845779999999997E-4,1.145515988365103E-5,2,1.6448690000000002E-4,6.560404491339689E-6,2,0.0043827276,1.4026522905210848E-4,0
tpch_11,2.4914359999999994E-4,5.298831351760511E-5,3,1.215963E-4,1.1898363997205705E-5,2,1.1498620000000001E-4,1.9846618718562532E-5,2,0.007717235800000001,3.591405618672658E-4,0
tpch_12,2.1210689999999997E-4,1.6872309085895882E-5,3,1.1019400000000001E-4,4.195809958279806E-5,2,7.14514E-5,3.728839261754103E-6,2,0.0038320675,1.4166820961405283E-4,0
tpch_13,1.0882800000000001E-4,2.1259760972315673E-5,3,3.29256E-5,3.41436747290037E-6,2,7.710949999999999E-5,1.0701348618281752E-5,2,0.0021374983,1.973782409659428E-4,0
tpch_14,7.21493E-5,5.001632274568013E-6,3,4.03188E-5,1.2962281280702046E-6,2,3.91297E-5,2.6118419956038897E-6,2,0.0024247746999999996,9.39967672817135E-5,0
tpch_15,1.1417409999999999E-4,5.422143090144434E-6,3,4.55709E-5,1.512777607581464E-6,2,6.66032E-5,6.166294297225916E-6,2,0.0059133454000000005,3.4318010087276143E-4,0
tpch_16,8.66505E-5,5.775626394600019E-6,3,7.63784E-5,4.130329176034281E-5,2,6.0093099999999996E-5,4.084791756014001E-6,2,0.0038725242,1.1612078642069436E-4,0
tpch_17,3.0984059999999996E-4,7.380461180714947E-6,3,1.766523E-4,7.316908213856555E-6,2,5.8308100000000005E-5,6.668952518199534E-6,2,0.0054993972000000006,2.0727225538589005E-4,0
tpch_18,3.007245E-4,1.3359403364297422E-5,3,1.06949E-4,9.1362148070195E-6,2,1.2531830000000002E-4,5.426769997889946E-6,2,0.0053609717000000015,2.9065356561446226E-4,0
tpch_19,6.24672E-5,4.4095480221899275E-6,2,5.11519E-5,3.2446215942694374E-6,2,4.47918E-5,3.4470000812301816E-6,2,0.004406292,2.6146098637769234E-4,0
tpch_20,9.79468E-5,4.383021008391389E-6,3,3.4252700000000005E-5,1.1852735591413258E-6,2,4.89149E-5,4.7565494730949315E-6,2,0.010493673899999998,2.857101304349327E-4,0
tpch_21,7.759042E-4,2.0296651210483196E-5,3,2.70554E-4,1.1056649456323163E-5,2,3.9691629999999997E-4,2.8626637871220755E-5,3,0.0083648368,1.3908144900365044E-4,0
tpch_22,8.75676E-5,1.2777619459038465E-5,2,5.4465E-5,5.304875342550475E-6,3,8.65216E-5,3.541804658644899E-6,2,0.004579704199999999,2.057934185098613E-4,0
1 experiment astral-raw-avg astral-raw-stddev astral-raw-iter astral-bdd-avg astral-bdd-stddev astral-bdd-iter spark-fair-avg spark-fair-stddev spark-fair-iter spark-full-avg spark-full-stddev spark-full-iter
2 tpch_1 1.840094E-4 2.1781759144752242E-5 2 9.29987E-5 4.419075419372244E-5 2 1.5714089999999998E-4 1.7659918399868424E-5 2 0.007647055599999998 0.0026875984053911536 0
3 tpch_2 0.0038820162000000004 5.629520118823218E-4 3 0.0130002549 9.422579563034217E-4 100 6.065052E-4 4.843660088775851E-5 2 0.0203240168 0.0011065059343584869 0
4 tpch_3 2.7122789999999995E-4 1.4648936544678944E-5 3 1.9404859999999998E-4 1.2800727215280034E-5 2 1.615399E-4 1.4713979226911947E-5 2 0.0051231492 5.729715029046055E-4 0
5 tpch_4 9.43118E-5 1.377914463875026E-4 2 2.2017399999999997E-5 2.0900300572001573E-6 2 4.32168E-5 1.2745027130610603E-5 2 0.0041803599 8.070434000531492E-5 0
6 tpch_5 7.934705999999998E-4 1.7888404714794493E-5 3 6.464419999999999E-4 2.0244732114804498E-5 2 4.760407E-4 6.934067306421708E-6 2 0.006129368800000001 1.341364917707054E-4 0
7 tpch_6 3.57857E-5 1.2246298257025344E-6 2 1.5809299999999998E-5 3.512520720223593E-6 2 2.81285E-5 1.1864128634248708E-5 2 0.0019322505000000001 1.9209639696998393E-4 0
8 tpch_7 7.456517E-4 2.1033036609343995E-5 3 5.260697999999998E-4 1.4001399292934934E-5 2 4.5663539999999997E-4 8.659344411674894E-6 2 0.009379533399999999 0.0026803466425624986 0
9 tpch_8 0.0020564648 2.434196759017669E-5 3 0.0025151252999999997 1.178515395795706E-5 4 0.0015626877000000001 3.4370261151147756E-4 2 0.010790846400000002 8.961831118822865E-4 0
10 tpch_9 9.167828000000002E-4 1.710784336378299E-5 3 0.001086863 6.0831552010564396E-6 4 6.111141999999999E-4 1.0169319837634386E-5 2 0.0070050528999999985 5.062979845870654E-4 0
11 tpch_10 2.439394E-4 7.628740868583057E-6 3 2.0845779999999997E-4 1.145515988365103E-5 2 1.6448690000000002E-4 6.560404491339689E-6 2 0.0043827276 1.4026522905210848E-4 0
12 tpch_11 2.4914359999999994E-4 5.298831351760511E-5 3 1.215963E-4 1.1898363997205705E-5 2 1.1498620000000001E-4 1.9846618718562532E-5 2 0.007717235800000001 3.591405618672658E-4 0
13 tpch_12 2.1210689999999997E-4 1.6872309085895882E-5 3 1.1019400000000001E-4 4.195809958279806E-5 2 7.14514E-5 3.728839261754103E-6 2 0.0038320675 1.4166820961405283E-4 0
14 tpch_13 1.0882800000000001E-4 2.1259760972315673E-5 3 3.29256E-5 3.41436747290037E-6 2 7.710949999999999E-5 1.0701348618281752E-5 2 0.0021374983 1.973782409659428E-4 0
15 tpch_14 7.21493E-5 5.001632274568013E-6 3 4.03188E-5 1.2962281280702046E-6 2 3.91297E-5 2.6118419956038897E-6 2 0.0024247746999999996 9.39967672817135E-5 0
16 tpch_15 1.1417409999999999E-4 5.422143090144434E-6 3 4.55709E-5 1.512777607581464E-6 2 6.66032E-5 6.166294297225916E-6 2 0.0059133454000000005 3.4318010087276143E-4 0
17 tpch_16 8.66505E-5 5.775626394600019E-6 3 7.63784E-5 4.130329176034281E-5 2 6.0093099999999996E-5 4.084791756014001E-6 2 0.0038725242 1.1612078642069436E-4 0
18 tpch_17 3.0984059999999996E-4 7.380461180714947E-6 3 1.766523E-4 7.316908213856555E-6 2 5.8308100000000005E-5 6.668952518199534E-6 2 0.0054993972000000006 2.0727225538589005E-4 0
19 tpch_18 3.007245E-4 1.3359403364297422E-5 3 1.06949E-4 9.1362148070195E-6 2 1.2531830000000002E-4 5.426769997889946E-6 2 0.0053609717000000015 2.9065356561446226E-4 0
20 tpch_19 6.24672E-5 4.4095480221899275E-6 2 5.11519E-5 3.2446215942694374E-6 2 4.47918E-5 3.4470000812301816E-6 2 0.004406292 2.6146098637769234E-4 0
21 tpch_20 9.79468E-5 4.383021008391389E-6 3 3.4252700000000005E-5 1.1852735591413258E-6 2 4.89149E-5 4.7565494730949315E-6 2 0.010493673899999998 2.857101304349327E-4 0
22 tpch_21 7.759042E-4 2.0296651210483196E-5 3 2.70554E-4 1.1056649456323163E-5 2 3.9691629999999997E-4 2.8626637871220755E-5 3 0.0083648368 1.3908144900365044E-4 0
23 tpch_22 8.75676E-5 1.2777619459038465E-5 2 5.4465E-5 5.304875342550475E-6 3 8.65216E-5 3.541804658644899E-6 2 0.004579704199999999 2.057934185098613E-4 0

View File

@ -34,7 +34,7 @@ import re
dataOutputDirectory = "../timing/output/"
# graph output directory
graphOutputDirectory = "../plotting/output/"
graphOutputDirectory = "../../../figures/"
# regular expression string to match the JSON data string
reDataString = '{"data":\D.*}}'
@ -46,7 +46,7 @@ reQueryString = r'Query.(?P<number>\d\d|\d)."'
numberOfRuns = -1
# graph output names
stackedGraphOutputFile = "stackedGraph-" + str(time.strftime("%F__%H_%M_%S")) + ".pdf"
stackedGraphOutputFile = "stackedGraph-" + str(time.strftime("%F")) + ".pdf"
# storage for totals, then the averages of the data from runs
queryDataDict = defaultdict(dict)
@ -91,19 +91,19 @@ for i in range(1,23):
leftover.append(abs(queryDataDict[str(i)]["applyTime"] - queryDataDict[str(i)]["transformTime"]) / 1000000000.0)
# stacked graph generation
plt.figure(figsize=(10, 6))
plt.figure(figsize=(10, 5))
plt.rcParams.update({'font.size': 18})
plt.xticks(rotation = 90, label = "")
plt.tick_params(axis = 'x', which = 'major', labelsize = 15.0)
plt.ylim(ymin = 0, ymax = 3)
plt.ylim(ymin = 0, ymax = 2)
plt.xlabel('TPC-H Query #')
plt.ylabel('Total Time Spent Optimizing (sec)')
plt.bar(xAxis, search, label='Search', color = '#00263E')
plt.bar(xAxis, transformExpression, label='Expression Transformations', color = '#B09ABA', bottom = search)
plt.bar(xAxis, transformExpression, label='Expression Xforms', color = '#B09ABA', bottom = search)
plt.bar(xAxis, ineffective, label='Ineffective Rewrites', color = '#0062A0', bottom = np.array(search) + np.array(transformExpression))
plt.bar(xAxis, effective, label='Effective Rewrites', color = '#409EDA', bottom = np.array(search) + np.array(transformExpression) + np.array(ineffective))
plt.bar(xAxis, execution, label='Fixpoint Loop', color = '#76C8FC', bottom = np.array(search) + np.array(transformExpression) + np.array(ineffective) + np.array(effective))
plt.bar(xAxis, leftover, label='Untracked', color = '#7A0097', bottom = np.array(search) + np.array(transformExpression) + np.array(ineffective) + np.array(effective) + np.array(execution))
plt.legend(loc="upper left")
plt.legend(loc="upper left", ncol=2)
plt.savefig(graphOutputDirectory + stackedGraphOutputFile ,bbox_inches='tight')

Binary file not shown.

BIN
figures/tpch.pdf Normal file

Binary file not shown.

View File

@ -55,6 +55,8 @@
\usepackage{listings}
\usepackage{tikz}
\newcommand{\trimmedcaption}[1]{\vspace*{-2mm}\caption{#1}\vspace*{-5mm}}
\usetikzlibrary{positioning,shapes.misc,arrows.meta}
\tikzset{
plannode/.style={

View File

@ -2,6 +2,6 @@
Optimizing compilers and program analysis tools are built around the same operation: searching large representations of programs for specific patterns.
This search is expensive, and a significant bottleneck for both styles of tools.
In this paper, we propose the AST-Relational ALgebra (\systemlang), a declarative language based on Relational Algebra for writing optimizing compilers and program analysis tools.
In this paper, we propose the AST-Relational ALgebra (\systemlang), a declarative language for writing optimizing compilers and program analysis tools.
We show how typical implementations of rewrite rules using `match' syntax can be compiled into \systemlang, and develop an execution engine specialized for the needs of optimizing compilers.
To demonstrate the potential value of \systemlang, we perform a case study using the Apache Spark Catalyst optimizer, and show that \textcolor{red}{[Experiments Pending]}
To demonstrate the potential value of \systemlang, we perform a case study using the Apache Spark Catalyst optimizer, and show an up to a factor of 4 improvement.

View File

@ -3,7 +3,8 @@
\label{sec:conclusions}
In this paper, we introduced \systemlang, a language for building declarative compilers, and a work-sharing optimization that it enables.
As members of the DB and PL communities, we believe that \systemlang opens up an entirely new avenue of research for both groups, whether in terms of new opportunities for large scale program analysis or optimization, or via new and unique data management patterns and system requirements.
Through this paper and prior work, we have demonstrated the potential of declarative compilers.
We believe that \systemlang opens up an entirely new avenue of research for both the databases and PL communities, both to realize its full promise (e.g., guaranteeing convergence), and to open up new opportunities for large scale program analysis and optimization.
% \begin{itemize}
% \item Recursion / Properties of subtrees

View File

@ -1,13 +1,26 @@
%!TEX root=../main.tex
\section{Experiments}
\label{sec:experiments}
This section is work in progress. The experiments are run on the latest version of open source Apache Spark for the baseline. The concepts detailed in this paper are built into a scala based code generation framework that we call ASTralSys. ASTRalSys integrates into Spark's query optimizer Catalyst.
We have two types of data for our experiments:
\begin{enumerate}
\item The rewrite rules that ASTralSys is build on: We use rewrite rules as defined in Spark's Catalyst and translate them into a representation that ASTralSys understands via a scala based DSL.
\item The query AST that is rewritten using Spark or ASTralSys: We use queries generated by the TPC-H benchmark and queries generated by Vizier, a queryable Spark based notebook.
\end{enumerate}
\begin{figure}
\includegraphics[width=\columnwidth]{figures/tpch.pdf}
\trimmedcaption{Compile times on TPC-H using a set of 7 rules.}
\label{fig:results}
\end{figure}
We plan to add a graph comparing the runtimes(in secs) of baseline Spark and ASTralSys to rewrite a query using candidate rules from Spark's Catalyst. The subsection will explain the performance numbers as seen in the graph and outline insights gathered from the experiment.
\disclaimer{We would like to call out that while some of the concepts detailed in this paper were developed while the first author was employed at amazon this paper doesn't use any code or customer data that relates to amazon or any of its systems. The implementation of ASTralSys was not done in-house but rather by our academic collaborators. The experiments will be run on a off-shore server hosted at the University at Buffalo. We plan to complete the experiments section by 22nd July 2023 and would like the legal review process to commence.}
Evaluation was performed on a 3.5 GHz AMD Ryzen 9 5950X 16-Core CPU, with Linux 6.2.6, OpenJDK 11.0.19, Scala 2.12.15.
Results shown are averaged over 10 runs, with 4 discarded burn-in trials to trigger JIT.
We implemented the Astral compiler, as well as its work sharing optimization as a rewriter for Spark 3.4.1.
We evaluated Astral's work sharing optimization by manually translating 7 rewrite rules (selected to be relevant to the 22 queries of the TPC-H workload) into Astral:
\textbf{PushProjectionThroughUnion},
\textbf{PushProjectionThroughLimit},
\textbf{ReorderJoin},
\textbf{EliminateOuterJoin},
\textbf{PushDownPredicates},
\textbf{PushDownLeftSemiAntiJoin}, and
\textbf{CollapseProject}.
\Cref{fig:results} compares the runtime of a naive rule-at-a-time optimizer (\textbf{Astral-Standalone}) to an optimizer leveraging the work-sharing optimization (\textbf{Astral-Shared}).
Results are normalized to the rule-at-a-time runtime, which varied from $35\mu s$ to $4ms$; these runtimes were comparable to Spark's optimizer using the same rules, and produced comparable query plans.
In general, work-sharing can reduce runtimes significantly, up 4 times faster.
The main limiting factor of the work sharing optimization is that the merged rewrite queries deviate from Spark's carefully selected rule evaluation order.
This can be seen in query 2, on which the shared optimizer does not converge; and on queries 8 and 9, where the shared optimizer requires twice as many iterations to converge.

View File

@ -1,8 +1,8 @@
%!TEX root=../main.tex
\begin{figure}
\includegraphics[width=0.9\columnwidth]{data/fall_2022_spark3.2/plotting/output/stackedGraph-2023-07-17__17_09_17.pdf}
\caption{Work Breakdown for the Spark 3.2 Optimizer}
\includegraphics[width=0.9\columnwidth]{figures/stackedGraph-2023-07-20.pdf}
\trimmedcaption{Work Breakdown for the Spark 3.2 Optimizer}
\label{fig:sparkBreakdown}
\end{figure}
@ -18,8 +18,8 @@ reframes common compilation and analysis tasks as database
operations. \systemlang unifies existing database optimizations (e.g.,
work sharing from streaming systems) with existing compiler tricks
(e.g., Tree Toasting~\cite{balakrishnan:2021:sigmod:treetoaster}),
laying the groundwork for a creating truly scalable, ``declarative'' compiler
by leveraging the wide array of scalable data processing techniques developed by the database community.
laying the groundwork for a creating truly scalable, `declarative' compiler
that leverages a wide array of data processing techniques from the database community.
\paragraph{Production Rules}
Compiler transformations and optimizations are often expressed in
@ -27,7 +27,6 @@ terms of production rules: if a pattern matching a certain form is
found, it is transformed according to the production rule. For
example, the classic selection push-down rule common in query plan optimization
may be expressed as:
$$\production{\sigma_\theta(\pi_{A}(R))}{\pi_{A}(\sigma_\theta(R))}$$
In other words, any relational algebra expression
@ -80,21 +79,17 @@ We explore this \systemlang in the context of Apache Spark's Catalyst query opti
At least a quarter of its time is spent iterating over trees (`Search'), and a further quarter is spent on bookkeeping (`Fixpoint Loop').
Both of these are both strong candidates for database-style optimizations.
For this paper, we translated a significant fragment of the Catalyst optimizer --- \textcolor{red}{[TODO]} rules in total --- into ASTral-compatible match syntax\footnote{
For this paper, we translated 7 rules from the Catalyst optimizer into ASTral-compatible match syntax\footnote{
\url{
https://git.odin.cse.buffalo.edu/Astral/astral-compiler/src/branch/main/astral/catalyst/src/com/astraldb/catalyst/Catalyst.scala
}
}.
We use this fragment to evaluate our optimizations on a variety of queries, including (i) The 22 queries of the TPC-H benchmark, and
(ii) several large queries generated by deployments of Vizier~\cite{brachmann:2020:cidr:your}, a computational notebook based on Spark.
We use this fragment to evaluate our optimizations on the 22 TPC-H benchmark queries.
\subsection{Contributions}
In this paper, we make the following contributions:
(i) We introduce \systemlang, a declarative language for building compilers in \Cref{sec:datamodel};
(ii) We show how match patterns, the a common format for implementing rewrite rules, can be compiled to \systemlang;
(iii) We develop an runtime for \systemlang based on work sharing in stream processing systems in \Cref{sec:queryEvaluation};
(iv) We adapt tree toasting to \systemlang in \Cref{sec:treetoasting};
(v) We evaluate \systemlang by re-implementing a fragment of Spark's Catalyst Optimizer in \Cref{sec:experiments}; and
(vi) We explore potential further ways to leverage the declarative nature of \systemlang in \Cref{sec:conclusions}.
(iii) We develop an runtime for \systemlang based on work sharing in stream processing systems in \Cref{sec:queryEvaluation}; and
(iv) We evaluate \systemlang by re-implementing a fragment of the Catalyst Optimizer in \Cref{sec:experiments}.

View File

@ -60,7 +60,7 @@ The expand operator is similar to the Unnest operator in nested relational algeb
\draw (exp) -> (leftj);
\draw (exp) -> (rightj);
\end{tikzpicture}
\caption{Example rewrite execution plan. Thicker, red lines are for the running example.}
\trimmedcaption{Example rewrite execution plan. Thicker, red lines are for the running example.}
\label{fig:executionPlan}
\end{figure}
@ -85,7 +85,7 @@ $\textsc{MakePlan}$ proceeds in three steps:
(iii) $\textsc{Rewrite}$ applies the selected join elimination rewrite.
In summary, the key challenge of selecting an execution plan is (greedily) selecting an order in which to resolve the atoms.
\begin{algorithm}
\begin{algorithm}[t]
\caption{\textsc{MakePlan}$(A, Q)$}
\label{alg:makePlan}
\begin{algorithmic}
@ -118,7 +118,7 @@ An optimizer is simultaneously interested in matching multiple patterns.
We find an appropriate optimization opportunity in stream processing systems (e.g., Aurora/Borealis\cite{DBLP:conf/cidr/AbadiABCCHLMRRTXZ05}), where multiple simultaneous streams are rewritten to share overlapping computations~\cite{DBLP:journals/ieeecc/KremienKM93}.
\Cref{alg:makeSharedPlan} generalizes \Cref{alg:makePlan} to detect and leverage such opportunities, rewriting multiple atom sets in parallel.
\begin{algorithm}
\begin{algorithm}[t]
\caption{$\textsc{MakeSharedPlan}(Q, \vec A)$}
\label{alg:makeSharedPlan}
\begin{algorithmic}
@ -165,7 +165,7 @@ Here, we develop a non-asynchronous push-based evaluation strategy that can effi
We assume, as per above, that all match pattern queries have been rewritten into a join-free DAG of project, select, and expand operators, rooted at a single AST atom.
We write $\parentsOf{Q}$ to denote the set of subplans in the DAG with $Q$ as a child.
\begin{algorithm}
\begin{algorithm}[t]
\caption{$\textsc{PushParents}(Q, t)$}
\label{alg:pushParents}
\begin{algorithmic}
@ -182,7 +182,7 @@ We write $\parentsOf{Q}$ to denote the set of subplans in the DAG with $Q$ as a
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\begin{algorithm}[t]
\caption{$\textsc{PushPlan}(Q, t)$}
\label{alg:pushPlan}
\begin{algorithmic}
@ -202,8 +202,7 @@ We write $\parentsOf{Q}$ to denote the set of subplans in the DAG with $Q$ as a
\Cref{alg:pushParents,alg:pushPlan} define a mutually recursive execution strategy that terminates when it reaches the top of a plan.
\textsc{PushParents} iteratively explores each ancestor of $Q$ until it finds a match, while \textsc{PushPlan} defines semantics for each operator, returning when an operator filters out a tuple.
To find matches, we invoke $\textsc{PushParents}(\db(\var), \tuple{\var: \constant})$ for each $\constant \in \subtreesOf{\db}$ until a match is discovered.
If no match is found, the optimizer has reached a fixed point.
To find matches, we invoke $\textsc{PushParents}(\db(\var), \tuple{\var: \constant})$ for each $\constant \in \subtreesOf{\db}$ until a match is discovered or the optimizer has reached a fixed point.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

View File

@ -27,7 +27,7 @@ Unbound variables in a scope return a undefined, \texttt{null} value.
\draw (project) -> (target);
\draw (table) -> (rel);
\end{tikzpicture}
\caption{An example AST for the query $\sigma_{X>3}(\pi_{X, Y}(R))$.}
\trimmedcaption{An example AST for the query $\sigma_{X>3}(\pi_{X, Y}(R))$.}
\label{fig:exampleAST}
\end{figure}
@ -114,9 +114,10 @@ We mark scope updates by $\scope[\var \backslash \constant]$ to mean $\scope$ wi
\evalmatcher{\var \leftarrow \expression}(\constant)(\scope)
& = \scope[\var \backslash \expression(\scope)]\\
\evalmatcher{\var \leftarrow \matcher}(\constant)(\scope)
& = \evalmatcher{\matcher}(\constant)(\scope[\var \backslash \constant])\\
& = \evalmatcher{\matcher}(\constant)(\scope[\var \backslash \constant])
\end{align*}
\caption{Operational semantics for match patterns.}
\vspace*{-3mm}
\trimmedcaption{Operational semantics for match patterns.}
\label{fig:evalMatcherSemantics}
\end{figure}
@ -208,7 +209,8 @@ $$\query{\matcher}(\db) = \comprehension{
\inbrackets{\db(\var)}
& = \comprehension{\tuple{\var: \constant}}{\constant \in \subtreesOf{\db}}
\end{align*}
\caption{Semantics of \systemlang's relation atoms}
\vspace*{-3mm}
\trimmedcaption{Semantics of \systemlang's relation atoms}
\label{fig:atomSemantics}
\end{figure}
@ -265,7 +267,7 @@ We write $\schemaOf{\atom}$ for the schema of $\atom$.
\rewritematcher{\var}{\var' \leftarrow \matcher}
& = \rewritematcher{\var}{\matcher} \bowtie \inbrackets{\var' = \var}
\end{align*}
\caption{Reducing match patterns to \systemlang; Each $\genvar$ denotes a freshly allocated variable name}
\trimmedcaption{Reducing match patterns to \systemlang; Each $\genvar$ denotes a freshly allocated variable name}
\label{fig:reductionToFOL}
\end{figure}