Merging related work and trimming.

2023-07-18 10:26:44 -04:00 · 2023-07-18 10:26:44 -04:00 · cca999be40
parent 586c277a96
commit cca999be40
3 changed files with 16 additions and 49 deletions
--- a/main.bib
+++ b/main.bib
@ -3,17 +3,12 @@ author = {Bravenboer, Martin and Smaragdakis, Yannis},
 title = {Strictly Declarative Specification of Sophisticated Points-to Analyses},
 year = {2009},
 isbn = {9781605587660},
-publisher = {Association for Computing Machinery},
-address = {New York, NY, USA},
-url = {https://doi.org/10.1145/1640089.1640108},
-doi = {10.1145/1640089.1640108},
+publisher = {ACM},
 abstract = {We present the DOOP framework for points-to analysis of Java programs. DOOP builds on the idea of specifying pointer analysis algorithms declaratively, using Datalog: a logic-based language for defining (recursive) relations. We carry the declarative approach further than past work by describing the full end-to-end analysis in Datalog and optimizing aggressively using a novel technique specifically targeting highly recursive Datalog programs.As a result, DOOP achieves several benefits, including full order-of-magnitude improvements in runtime. We compare DOOP with Lhotak and Hendren's PADDLE, which defines the state of the art for context-sensitive analyses. For the exact same logical points-to definitions (and, consequently, identical precision) DOOP is more than 15x faster than PADDLE for a 1-call-site sensitive analysis of the DaCapo benchmarks, with lower but still substantial speedups for other important analyses. Additionally, DOOP scales to very precise analyses that are impossible with PADDLE and Whaley et al.'s bddbddb, directly addressing open problems in past literature. Finally, our implementation is modular and can be easily configured to analyses with a wide range of characteristics, largely due to its declarativeness.},
-booktitle = {Proceedings of the 24th ACM SIGPLAN Conference on Object Oriented Programming Systems Languages and Applications},
+booktitle = {OOPSLA},
 pages = {243–262},
 numpages = {20},
 keywords = {bdds, datalog, declarative, points-to analysis, DOOP},
-location = {Orlando, Florida, USA},
-series = {OOPSLA '09}
 }

                  
@ -42,17 +37,12 @@ author = {Antoniadis, Tony and Triantafyllou, Konstantinos and Smaragdakis, Yann
 title = {Porting Doop to Souffl\'{e}: A Tale of Inter-Engine Portability for Datalog-Based Analyses},
 year = {2017},
 isbn = {9781450350723},
-publisher = {Association for Computing Machinery},
-address = {New York, NY, USA},
-url = {https://doi.org/10.1145/3088515.3088522},
-doi = {10.1145/3088515.3088522},
+publisher = {ACM},
 abstract = {We detail our experience of porting the static analysis framework to the recently introduced Datalog engine. The port addresses the idiosynchrasies of the Datalog dialects involved (w.r.t. the type system, value construction, and fact updates) and differences in the runtime systems (w.r.t. parallelism, transactional execution, and optimization methodologies). The overall porting effort is interesting in many ways: as an instance of the benefits of specifying static analyses declaratively, gaining benefits (e.g., parallelism) from a mere porting to a new runtime system; as a study of the effort required to migrate a substantial Datalog codebase (of several thousand rules) to a different dialect. By exploiting shared-memory parallelism, the version of the framework achieves speedups of up to 4x, over already high single-threaded performance.},
-booktitle = {Proceedings of the 6th ACM SIGPLAN International Workshop on State Of the Art in Program Analysis},
+booktitle = {SOAP},
 pages = {25–30},
 numpages = {6},
 keywords = {Datalog, Points-to analysis, Doop},
-location = {Barcelona, Spain},
-series = {SOAP 2017}
 }
                  
@article{egglog,
@ -60,12 +50,9 @@ author = {Zhang, Yihong and Wang, Yisu Remy and Flatt, Oliver and Cao, David and
 title = {Better Together: Unifying Datalog and Equality Saturation},
 year = {2023},
 issue_date = {June 2023},
-publisher = {Association for Computing Machinery},
-address = {New York, NY, USA},
+publisher = {ACM},
 volume = {7},
 number = {PLDI},
-url = {https://doi.org/10.1145/3591239},
-doi = {10.1145/3591239},
 abstract = {We present egglog, a fixpoint reasoning system that unifies Datalog and equality saturation (EqSat). Like Datalog, egglog supports efficient incremental execution, cooperating analyses, and lattice-based reasoning. Like EqSat, egglog supports term rewriting, efficient congruence closure, and extraction of optimized terms. We identify two recent applications -- a unification-based pointer analysis in Datalog and an EqSat-based floating-point term rewriter -- that have been hampered by features missing from Datalog but found in EqSat or vice-versa. We evaluate our system by reimplementing those projects in egglog. The resulting systems in egglog are faster, simpler, and fix bugs found in the original systems.},
 journal = {Proc. ACM Program. Lang.},
 month = {jun},
@ -80,17 +67,12 @@ author = {Tate, Ross and Stepp, Michael and Tatlock, Zachary and Lerner, Sorin},
 title = {Equality Saturation: A New Approach to Optimization},
 year = {2009},
 isbn = {9781605583792},
-publisher = {Association for Computing Machinery},
-address = {New York, NY, USA},
-url = {https://doi.org/10.1145/1480881.1480915},
-doi = {10.1145/1480881.1480915},
+publisher = {ACM},
 abstract = {Optimizations in a traditional compiler are applied sequentially, with each optimization destructively modifying the program to produce a transformed program that is then passed to the next optimization. We present a new approach for structuring the optimization phase of a compiler. In our approach, optimizations take the form of equality analyses that add equality information to a common intermediate representation. The optimizer works by repeatedly applying these analyses to infer equivalences between program fragments, thus saturating the intermediate representation with equalities. Once saturated, the intermediate representation encodes multiple optimized versions of the input program. At this point, a profitability heuristic picks the final optimized program from the various programs represented in the saturated representation. Our proposed way of structuring optimizers has a variety of benefits over previous approaches: our approach obviates the need to worry about optimization ordering, enables the use of a global optimization heuristic that selects among fully optimized programs, and can be used to perform translation validation, even on compilers other than our own. We present our approach, formalize it, and describe our choice of intermediate representation. We also present experimental results showing that our approach is practical in terms of time and space overhead, is effective at discovering intricate optimization opportunities, and is effective at performing translation validation for a realistic optimizer.},
-booktitle = {Proceedings of the 36th Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages},
+booktitle = {POPL},
 pages = {264–276},
 numpages = {13},
 keywords = {equality reasoning, compiler optimization, intermediate representation},
-location = {Savannah, GA, USA},
-series = {POPL '09}
 }

                  
@ -101,7 +83,6 @@ title = {Compiling Pattern Matching to Good Decision Trees},
 year = {2008},
 isbn = {9781605580623},
 publisher = {ACM},
-doi = {10.1145/1411304.1411311},
 abstract = {We address the issue of compiling ML pattern matching to compact and efficient decisions trees. Traditionally, compilation to decision trees is optimized by (1) implementing decision trees as dags with maximal sharing; (2) guiding a simple compiler with heuristics. We first design new heuristics that are inspired by necessity, a concept from lazy pattern matching that we rephrase in terms of decision tree semantics. Thereby, we simplify previous semantic frameworks and demonstrate a straightforward connection between necessity and decision tree runtime efficiency. We complete our study by experiments, showing that optimizing compilation to decision trees is competitive with the optimizing match compiler of Le Fessant and Maranget (2001).},
 booktitle = {SIGPLAN-ML},
 pages = {35–46},
@ -193,7 +174,6 @@ series = {ICFP '13}
  title        = {The Design of the Borealis Stream Processing Engine},
  booktitle    = {{CIDR}},
  pages        = {277--289},
-  publisher    = {www.cidrdb.org},
  year         = {2005}
 }

@ -274,7 +254,6 @@ series = {ICFP '13}
                  Michael Isard},
  title        = {Differential Dataflow},
  booktitle    = {{CIDR}},
-  publisher    = {www.cidrdb.org},
  year         = {2013}
 }

--- a/sections/query_evaluation.tex
+++ b/sections/query_evaluation.tex
@ -91,7 +91,7 @@ In summary, the key challenge of selecting an execution plan is (greedily) selec
  \begin{algorithmic}
    \Require $A = \inset{\atom_1, \ldots, \atom_n} \subseteq \atomdom$: A set of target atoms
    \Require $Q$: A query
-    \Ensure $Q': $ A query equivalent to $Q \bowtie \atom_1 \bowtie \ldots \bowtie \atom_n$
+    \Ensure A query equivalent to $Q \bowtie \atom_1 \bowtie \ldots \bowtie \atom_n$
    \State \textbf{if} {$|A| = 0$} \textbf{then} \Return Q
    \State $\texttt{candidates} = \textsc{EnumerateCandidates}(Q, A)$
    \State $\atom = \textsc{PickCandidate}(\texttt{candidates})$
@ -124,11 +124,9 @@ We find an appropriate optimization opportunity in stream processing systems (e.
  \begin{algorithmic}
    \Require $Q$: A query
    \Require $\vec A$: A vector of target atom sets $A_i$ as in \Cref{alg:makePlan}.
-    \Ensure $\vec Q'$: A vector of queries, where $Q_i'$ is as in \Cref{alg:makePlan}.
+    \Ensure A vector of queries, as in \Cref{alg:makePlan}.
    \State \textbf{if } {$|\vec A| = 0$} \textbf{then} \Return $[]$
-    \For{$A_i \in \vec A$}
-      \State $C_i = \textsc{EnumerateCandidates}(Q, A_i)$
-    \EndFor
+    \State \textbf{for} {$A_i \in \vec A$} \textbf{do} $C_i = \textsc{EnumerateCandidates}(Q, A_i)$ \textbf{end for}
    \State $\atom = \textsc{PickCandidate}(\vec C)$
    \State $Q' = \textsc{Rewrite}(Q \bowtie \atom)$
    \State \Return $
--- a/sections/related_work.tex
+++ b/sections/related_work.tex
@ -1,19 +1,9 @@
 %!TEX root=../main.tex
 \section{Related Work}

-Database-inspired optimizations (semi-na\"ive evaluation and indexing)
-have been the enabling force behind recent orders-of-magnitude
-scalability gains in context-sensitive points to
-analysis~\cite{Bravenboer:2009}. In particular, the Datalog engine
-Souffl\'e has proven a de-facto target for the implementation of
-high-performance reachability-based analyses~\cite{doop-souffle}. Our
-work is primarily distinguished by its focus on \emph{structural}
-constraints, which are largely ignored by these engines and (in our
-experience) result in significant performance losses in workloads
-which manipulate structured data. Our notion of optimization is more
-closely related to equality saturation, which applies rewrite rules to
-a fixed-point~\cite{Tate:2009}. Recent work in this area leverages
-compressed representations, namely union-find~\cite{egglog}.
-
-In prior work, we explored Tree-Toasting~\cite{balakrishnan:2021:sigmod:treetoaster}, a similarly database-inspired optimization for compilers; 
-Although we are not aware of any other efforts by the database community to improve compiler performance, trees have a long history as a data-type of interest, from query languages like XPath~\footnote{\url{http://www.w3.org/TR/xpath}} to collection transformations~\cite{DBLP:journals/tcs/BunemanNTW95}.
+Database-inspired optimizations (semi-na\"ive evaluation and indexing) have driven recent orders-of-magnitude scalability gains in context-sensitive points-to analysis~\cite{Bravenboer:2009}. 
+In particular, the Datalog engine Souffl\'e has become the de-facto infrastructure for high-performance reachability-based analyses~\cite{doop-souffle}. 
+Our work is primarily focuses on \emph{structural} constraints, largely ignored by these engines, but still resulting in significant performance losses in structured data manipulation workloads. 
+Our notion of optimization is more closely related to equality saturation~\cite{Tate:2009}, where recent work leverages compressed representations, namely union-find~\cite{egglog}.
+Our own prior work proposed Tree-Toasting~\cite{balakrishnan:2021:sigmod:treetoaster}, which leverages materialized views to accelerate rewrite discovery.
+Along these lines, trees have had a long history as a data-type of interest, from query languages like XPath\footnote{\url{http://www.w3.org/TR/xpath}} to collection transformation languages~\cite{DBLP:journals/tcs/BunemanNTW95}.