paper-Declarative-Compilers/main.bib

@inproceedings{Bravenboer:2009,
author = {Bravenboer, Martin and Smaragdakis, Yannis},
title = {Strictly Declarative Specification of Sophisticated Points-to Analyses},
year = {2009},
isbn = {9781605587660},
publisher = {ACM},
abstract = {We present the DOOP framework for points-to analysis of Java programs. DOOP builds on the idea of specifying pointer analysis algorithms declaratively, using Datalog: a logic-based language for defining (recursive) relations. We carry the declarative approach further than past work by describing the full end-to-end analysis in Datalog and optimizing aggressively using a novel technique specifically targeting highly recursive Datalog programs.As a result, DOOP achieves several benefits, including full order-of-magnitude improvements in runtime. We compare DOOP with Lhotak and Hendren's PADDLE, which defines the state of the art for context-sensitive analyses. For the exact same logical points-to definitions (and, consequently, identical precision) DOOP is more than 15x faster than PADDLE for a 1-call-site sensitive analysis of the DaCapo benchmarks, with lower but still substantial speedups for other important analyses. Additionally, DOOP scales to very precise analyses that are impossible with PADDLE and Whaley et al.'s bddbddb, directly addressing open problems in past literature. Finally, our implementation is modular and can be easily configured to analyses with a wide range of characteristics, largely due to its declarativeness.},
booktitle = {OOPSLA},
pages = {243–262},
numpages = {20},
keywords = {bdds, datalog, declarative, points-to analysis, DOOP},
}


@article{Cocke:1977,
author = {Cocke, John and Kennedy, Ken},
title = {An Algorithm for Reduction of Operator Strength},
year = {1977},
issue_date = {Nov. 1977},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {20},
number = {11},
issn = {0001-0782},
url = {https://doi.org/10.1145/359863.359888},
doi = {10.1145/359863.359888},
abstract = {A simple algorithm which uses an indexed temporary table to perform reduction of operator strength in strongly connected regions is presented. Several extensions, including linear function test replacement, are discussed. These algorithms should fit well into an integrated package of local optimization algorithms.},
journal = {Commun. ACM},
month = {nov},
pages = {850–856},
numpages = {7},
keywords = {operator strength reduction, optimization of compiled code, compilers, program analysis, test replacement, strongly connected region}
}

@inproceedings{doop-souffle,
author = {Antoniadis, Tony and Triantafyllou, Konstantinos and Smaragdakis, Yannis},
title = {Porting Doop to Souffl\'{e}: A Tale of Inter-Engine Portability for Datalog-Based Analyses},
year = {2017},
isbn = {9781450350723},
publisher = {ACM},
abstract = {We detail our experience of porting the static analysis framework to the recently introduced Datalog engine. The port addresses the idiosynchrasies of the Datalog dialects involved (w.r.t. the type system, value construction, and fact updates) and differences in the runtime systems (w.r.t. parallelism, transactional execution, and optimization methodologies). The overall porting effort is interesting in many ways: as an instance of the benefits of specifying static analyses declaratively, gaining benefits (e.g., parallelism) from a mere porting to a new runtime system; as a study of the effort required to migrate a substantial Datalog codebase (of several thousand rules) to a different dialect. By exploiting shared-memory parallelism, the version of the framework achieves speedups of up to 4x, over already high single-threaded performance.},
booktitle = {SOAP},
pages = {25–30},
numpages = {6},
keywords = {Datalog, Points-to analysis, Doop},
}

@article{egglog,
author = {Zhang, Yihong and Wang, Yisu Remy and Flatt, Oliver and Cao, David and Zucker, Philip and Rosenthal, Eli and Tatlock, Zachary and Willsey, Max},
title = {Better Together: Unifying Datalog and Equality Saturation},
year = {2023},
issue_date = {June 2023},
publisher = {ACM},
volume = {7},
number = {PLDI},
abstract = {We present egglog, a fixpoint reasoning system that unifies Datalog and equality saturation (EqSat). Like Datalog, egglog supports efficient incremental execution, cooperating analyses, and lattice-based reasoning. Like EqSat, egglog supports term rewriting, efficient congruence closure, and extraction of optimized terms. We identify two recent applications -- a unification-based pointer analysis in Datalog and an EqSat-based floating-point term rewriter -- that have been hampered by features missing from Datalog but found in EqSat or vice-versa. We evaluate our system by reimplementing those projects in egglog. The resulting systems in egglog are faster, simpler, and fix bugs found in the original systems.},
journal = {Proc. ACM Program. Lang.},
month = {jun},
articleno = {125},
numpages = {25},
keywords = {Rewrite systems, Program optimization, Equality saturation, Datalog}
}


@inproceedings{Tate:2009,
author = {Tate, Ross and Stepp, Michael and Tatlock, Zachary and Lerner, Sorin},
title = {Equality Saturation: A New Approach to Optimization},
year = {2009},
isbn = {9781605583792},
publisher = {ACM},
abstract = {Optimizations in a traditional compiler are applied sequentially, with each optimization destructively modifying the program to produce a transformed program that is then passed to the next optimization. We present a new approach for structuring the optimization phase of a compiler. In our approach, optimizations take the form of equality analyses that add equality information to a common intermediate representation. The optimizer works by repeatedly applying these analyses to infer equivalences between program fragments, thus saturating the intermediate representation with equalities. Once saturated, the intermediate representation encodes multiple optimized versions of the input program. At this point, a profitability heuristic picks the final optimized program from the various programs represented in the saturated representation. Our proposed way of structuring optimizers has a variety of benefits over previous approaches: our approach obviates the need to worry about optimization ordering, enables the use of a global optimization heuristic that selects among fully optimized programs, and can be used to perform translation validation, even on compilers other than our own. We present our approach, formalize it, and describe our choice of intermediate representation. We also present experimental results showing that our approach is practical in terms of time and space overhead, is effective at discovering intricate optimization opportunities, and is effective at performing translation validation for a realistic optimizer.},
booktitle = {POPL},
pages = {264–276},
numpages = {13},
keywords = {equality reasoning, compiler optimization, intermediate representation},
}


@inproceedings{Luc:2008,
author = {Maranget, Luc},
title = {Compiling Pattern Matching to Good Decision Trees},
year = {2008},
isbn = {9781605580623},
publisher = {ACM},
abstract = {We address the issue of compiling ML pattern matching to compact and efficient decisions trees. Traditionally, compilation to decision trees is optimized by (1) implementing decision trees as dags with maximal sharing; (2) guiding a simple compiler with heuristics. We first design new heuristics that are inspired by necessity, a concept from lazy pattern matching that we rephrase in terms of decision tree semantics. Thereby, we simplify previous semantic frameworks and demonstrate a straightforward connection between necessity and decision tree runtime efficiency. We complete our study by experiments, showing that optimizing compilation to decision trees is competitive with the optimizing match compiler of Le Fessant and Maranget (2001).},
booktitle = {SIGPLAN-ML},
pages = {35–46},
numpages = {12},
keywords = {heuristics, match compilers, decision trees},
}

@inproceedings{Keep:2013,
author = {Keep, Andrew W. and Dybvig, R. Kent},
title = {A Nanopass Framework for Commercial Compiler Development},
year = {2013},
isbn = {9781450323260},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2500365.2500618},
doi = {10.1145/2500365.2500618},
abstract = {Contemporary compilers must typically handle sophisticated high-level source languages, generate efficient code for multiple hardware architectures and operating systems, and support source-level debugging, profiling, and other program development tools. As a result, compilers tend to be among the most complex of software systems. Nanopass frameworks are designed to help manage this complexity. A nanopass compiler is comprised of many single-task passes with formally defined intermediate languages. The perceived downside of a nanopass compiler is that the extra passes will lead to substantially longer compilation times. To determine whether this is the case, we have created a plug replacement for the commercial Chez Scheme compiler, implemented using an updated nanopass framework, and we have compared the speed of the new compiler and the code it generates against the original compiler for a large set of benchmark programs. This paper describes the updated nanopass framework, the new compiler, and the results of our experiments. The compiler produces faster code than the original, averaging 15-27% depending on architecture and optimization level, due to a more sophisticated but slower register allocator and improvements to several optimizations. Compilation times average well within a factor of two of the original compiler, despite the slower register allocator and the replacement of five passes of the original 10 with over 50 nanopasses.},
booktitle = {Proceedings of the 18th ACM SIGPLAN International Conference on Functional Programming},
pages = {343–350},
numpages = {8},
keywords = {nanopass, compiler, scheme},
location = {Boston, Massachusetts, USA},
series = {ICFP '13}
}


@inproceedings{balakrishnan:2021:sigmod:treetoaster,
   author = {Balakrishnan, Darshana and Nuessle, Carl and Kennedy, Oliver and Ziarek, Lukasz},
   title = {TreeToaster: Towards an IVM-Optimized Compiler},
   booktitle = {SIGMOD},
   year = {2021}
}


@inproceedings{DBLP:conf/pods/GreenKT07,
  author       = {Todd J. Green and
                  Gregory Karvounarakis and
                  Val Tannen},
  title        = {Provenance semirings},
  booktitle    = {{PODS}},
  pages        = {31--40},
  publisher    = {{ACM}},
  year         = {2007}
}


@inproceedings{DBLP:conf/pods/JaeschkeS82,
  author       = {Gerhard Jaeschke and
                  Hans{-}J{\"{o}}rg Schek},
  title        = {Remarks on the Algebra of Non First Normal Form Relations},
  booktitle    = {{PODS}},
  pages        = {124--138},
  publisher    = {{ACM}},
  year         = {1982}
}


@article{DBLP:journals/ieeecc/KremienKM93,
  author       = {Orly Kremien and
                  Jeff Kramer and
                  Jeff Magee},
  title        = {Scalable, adaptive load sharing for distributed systems},
  journal      = {{IEEE} PDTSA.},
  volume       = {1},
  number       = {3},
  pages        = {62--70},
  year         = {1993}
}


@inproceedings{DBLP:conf/cidr/AbadiABCCHLMRRTXZ05,
  author       = {Daniel J. Abadi and
                  Yanif Ahmad and
                  Magdalena Balazinska and
                  Ugur {\c{C}}etintemel and
                  Mitch Cherniack and
                  Jeong{-}Hyon Hwang and
                  Wolfgang Lindner and
                  Anurag Maskey and
                  Alex Rasin and
                  Esther Ryvkina and
                  Nesime Tatbul and
                  Ying Xing and
                  Stanley B. Zdonik},
  title        = {The Design of the Borealis Stream Processing Engine},
  booktitle    = {{CIDR}},
  pages        = {277--289},
  year         = {2005}
}


@incollection{DBLP:books/sp/16/CetintemelAABBCHMMRRSTXZ16,
  author       = {Ugur {\c{C}}etintemel and
                  Daniel J. Abadi and
                  Yanif Ahmad and
                  Hari Balakrishnan and
                  Magdalena Balazinska and
                  Mitch Cherniack and
                  Jeong{-}Hyon Hwang and
                  Samuel Madden and
                  Anurag Maskey and
                  Alexander Rasin and
                  Esther Ryvkina and
                  Mike Stonebraker and
                  Nesime Tatbul and
                  Ying Xing and
                  Stan Zdonik},
  title        = {The Aurora and Borealis Stream Processing Engines},
  booktitle    = {Data Stream Management},
  series       = {Data-Centric Systems and Applications},
  pages        = {337--359},
  publisher    = {Springer},
  year         = {2016}
}


@inproceedings{balakrishnan:2019:dbpl:fluid,
   author = {Balakrishnan, Darshana and Ziarek, Lukasz and Kennedy, Oliver},
   title = {Fluid Data Structures},
   booktitle = {DBPL},
   year = {2019}
}


@inproceedings{brachmann:2020:cidr:your,
   author = {Brachmann, Michael and Spoth, William and Kennedy, Oliver and Glavic, Boris and Mueller, Heiko and Castelo, Sonia and Bautista, Carlos and Freire, Juliana},
   title = {Your notebook is not crumby enough, REPLace it},
   booktitle = {CIDR},
   year = {2020}
}


@inproceedings{DBLP:conf/sigmod/SolimanAREGSCGRPWNKB14,
  author    = {Mohamed A. Soliman and
               Lyublena Antova and
               Venkatesh Raghavan and
               Amr El{-}Helw and
               Zhongxian Gu and
               Entong Shen and
               George C. Caragea and
               Carlos Garcia{-}Alvarado and
               Foyzur Rahman and
               Michalis Petropoulos and
               Florian Waas and
               Sivaramakrishnan Narayanan and
               Konstantinos Krikellas and
               Rhonda Baldwin},
  title     = {Orca: a modular query optimizer architecture for big data},
  booktitle = {{SIGMOD} Conference},
  pages     = {337--348},
  publisher = {{ACM}},
  year      = {2014}
}


@inproceedings{DBLP:conf/cidr/McSherryMII13,
  author       = {Frank McSherry and
                  Derek Gordon Murray and
                  Rebecca Isaacs and
                  Michael Isard},
  title        = {Differential Dataflow},
  booktitle    = {{CIDR}},
  year         = {2013}
}


@inproceedings{DBLP:conf/lics/Breazu-TannenKP93,
  author       = {Val Tannen and
                  Delia Kesner and
                  Laurence Puel},
  title        = {A Typed Pattern Calculus},
  booktitle    = {{LICS}},
  pages        = {262--274},
  publisher    = {{IEEE} Computer Society},
  year         = {1993}
}


@article{DBLP:journals/tcs/BunemanNTW95,
  author       = {Peter Buneman and
                  Shamim A. Naqvi and
                  Val Tannen and
                  Limsoon Wong},
  title        = {Principles of Programming with Complex Objects and Collection Types},
  journal      = {Theor. Comput. Sci.},
  volume       = {149},
  number       = {1},
  pages        = {3--48},
  year         = {1995}
}