paper-HILDA-2016-Spreadsheets/vizier.bib

%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/

%% Created for Oliver Kennedy at 2016-04-03 01:56:13 -0400


%% Saved with string encoding Unicode (UTF-8)

@article{tan@deb2007,
  author    = {Wang Chiew Tan},
  title     = {Provenance in Databases: Past, Current, and Future},
  journal   = {{IEEE} Data Eng. Bull.},
  volume    = {30},
  number    = {4},
  pages     = {3--12},
  year      = {2007},
  url       = {http://sites.computer.org/debull/A07dec/wang-chiew.pdf},
  timestamp = {Sat, 22 Dec 2007 17:24:49 +0100},
  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/debu/Tan07},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@misc{pdbench,
	Author = {{The MayBMS project}},
	Date-Added = {2016-04-03 05:43:27 +0000},
	Date-Modified = {2016-04-03 05:56:11 +0000},
	Howpublished = {http://pdbench.sourceforge.net},
	Title = {PDBench}}

@inproceedings{Kul:2015:PCO:2808783.2808793,
	Acmid = {2808793},
	Address = {New York, NY, USA},
	Author = {Kul, Gokhan and Upadhyaya, Shambhu},
	Booktitle = {Proceedings of the 7th ACM CCS International Workshop on Managing Insider Security Threats},
	Date-Added = {2016-04-02 23:54:17 +0000},
	Date-Modified = {2016-04-02 23:54:17 +0000},
	Doi = {10.1145/2808783.2808793},
	Isbn = {978-1-4503-3824-0},
	Keywords = {cyber ontology, financial sector, insider attacks, relational database systems, taxonomy},
	Location = {Denver, Colorado, USA},
	Numpages = {4},
	Pages = {75--78},
	Publisher = {ACM},
	Series = {MIST '15},
	Title = {* {A} Preliminary Cyber Ontology for Insider Threats in the Financial Sector},
	Url = {http://doi.acm.org/10.1145/2808783.2808793},
	Year = {2015},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/2808783.2808793},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/2808783.2808793}}

@inproceedings{KLX16,
	Author = {Gokhan Kul and Duc Thanh Luong and Ting Xie and Patrick Coonan and Varun Chandola and Oliver Kennedy and Shambhu Upadhaya},
	Booktitle = {ERMIS},
	Date-Added = {2016-04-02 23:53:34 +0000},
	Date-Modified = {2016-04-02 23:53:34 +0000},
	Title = {* {Ettu: Analyzing} Query Intents in Corporate Databases},
	Year = {2016}}

@inproceedings{6816755,
	Author = {M. Gubanov and M. Stonebraker and D. Bruckner},
	Booktitle = {Data Engineering (ICDE), 2014 IEEE 30th International Conference on},
	Date-Added = {2016-04-02 18:21:52 +0000},
	Date-Modified = {2016-04-02 18:21:52 +0000},
	Doi = {10.1109/ICDE.2014.6816755},
	Keywords = {data integration;data structures;sensor fusion;text analysis;DATA TAMER;data cleaning;data formats;data integration system;data transformations;entity consolidation module;expert-sourcing mechanism;human guidance;large-scale text data research;online media;schema integration facility;structured data fusion;structured data sources;text fusion;Blogs;Cleaning;Data integration;Distributed databases;Media;Motion pictures;Schedules},
	Month = {March},
	Pages = {1258-1261},
	Title = {Text and structured data fusion in data tamer at scale},
	Year = {2014},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICDE.2014.6816755}}

@inproceedings{stonebraker2013data,
	Author = {Stonebraker, Michael and Bruckner, Daniel and Ilyas, Ihab F and Beskales, George and Cherniack, Mitch and Zdonik, Stanley B and Pagan, Alexander and Xu, Shan},
	Booktitle = {CIDR},
	Date-Added = {2016-04-02 18:21:31 +0000},
	Date-Modified = {2016-04-02 18:21:31 +0000},
	Title = {Data Curation at Scale: The Data Tamer System.},
	Year = {2013}}

@misc{trifacta,
	Author = {{Trifacta}},
	Date-Added = {2016-04-02 17:59:15 +0000},
	Date-Modified = {2016-04-02 17:59:33 +0000},
	Howpublished = {https://www.trifacta.com},
	Title = {Trifacta Wrangler}}

@misc{jupyter,
	Author = {{Project Jupyter}},
	Date-Added = {2016-04-02 17:46:47 +0000},
	Date-Modified = {2016-04-02 17:47:21 +0000},
	Howpublished = {http://jupyter.org},
	Title = {Jupyter Notebook}}

@misc{lighttable,
	Author = {{Kodowa, Inc}},
	Date-Added = {2016-04-02 17:46:05 +0000},
	Date-Modified = {2016-04-02 17:47:36 +0000},
	Howpublished = {http://lighttable.com},
	Title = {LightTable}}

@article{spark2014apache,
	Author = {Spark, Apache},
	Date-Added = {2016-04-02 17:39:02 +0000},
	Date-Modified = {2016-04-02 17:39:02 +0000},
	Title = {Apache spark--lightning-fast cluster computing},
	Year = {2014}}

@article{Chan1996119,
	Abstract = {Spreadsheets have long been recognized as important tools for end-user computing. This research explores their use within business organizations. A survey was carried out to investigate the relationships among tasks, spreadsheet proficiency, usage, and satisfaction. The results suggested that the spreadsheet proficiency can have a greater impact on the tasks than the task can have on the spreadsheet proficiency. It was also found that spreadsheet users often do not use many of the commonly available spreadsheet features, and they do not appear inclined to use other software packages for their tasks, even if these packages might be more suitable. The proficiency of the spreadsheet users was not found to be related to the importance of the decisions being taken as a result of the spreadsheet analyses. },
	Author = {Yolande E. Chan and Veda C. Storey},
	Date-Added = {2016-04-02 15:31:11 +0000},
	Date-Modified = {2016-04-02 17:04:23 +0000},
	Doi = {http://dx.doi.org/10.1016/S0378-7206(96)00008-0},
	Issn = {0378-7206},
	Journal = {Information and Management},
	Keywords = {Software packages},
	Number = {3},
	Pages = {119 - 134},
	Title = {The use of spreadsheets in organizations: Determinants and consequences},
	Url = {http://www.sciencedirect.com/science/article/pii/S0378720696000080},
	Volume = {31},
	Year = {1996},
	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/S0378720696000080},
	Bdsk-Url-2 = {http://dx.doi.org/10.1016/S0378-7206(96)00008-0}}

@inproceedings{Bakke:2011:SUI:1978942.1979313,
	Acmid = {1979313},
	Address = {New York, NY, USA},
	Author = {Bakke, Eirik and Karger, David and Miller, Rob},
	Booktitle = {Proceedings of the SIGCHI Conference on Human Factors in Computing Systems},
	Date-Added = {2016-04-02 15:25:41 +0000},
	Date-Modified = {2016-04-02 15:25:41 +0000},
	Doi = {10.1145/1978942.1979313},
	Isbn = {978-1-4503-0228-9},
	Keywords = {databases, foreign key relationships, hierarchical views, one-to-many relationships, spreadsheets},
	Location = {Vancouver, BC, Canada},
	Numpages = {10},
	Pages = {2541--2550},
	Publisher = {ACM},
	Series = {CHI '11},
	Title = {A Spreadsheet-based User Interface for Managing Plural Relationships in Structured Data},
	Url = {http://doi.acm.org/10.1145/1978942.1979313},
	Year = {2011},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1978942.1979313},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1978942.1979313}}

@article{bakke2011schema,
	Author = {Bakke, Eirik and Benson, Edward},
	Date-Added = {2016-04-02 15:24:15 +0000},
	Date-Modified = {2016-04-02 15:24:15 +0000},
	Publisher = {Citeseer},
	Title = {The Schema-Independent Database UI A Proposed Holy Grail and Some Suggestions},
	Year = {2011}}

@article{DBLP:journals/ijcse/JagadishQN15,
	Author = {H. V. Jagadish and Li Qian and Arnab Nandi},
	Bibsource = {dblp computer science bibliography, http://dblp.org},
	Biburl = {http://dblp.uni-trier.de/rec/bib/journals/ijcse/JagadishQN15},
	Date-Added = {2016-04-02 15:19:46 +0000},
	Date-Modified = {2016-04-02 15:19:46 +0000},
	Doi = {10.1504/IJCSE.2015.072651},
	Journal = {{IJCSE}},
	Number = {3},
	Pages = {270--283},
	Timestamp = {Mon, 09 Nov 2015 14:36:44 +0100},
	Title = {Organic databases},
	Url = {http://dx.doi.org/10.1504/IJCSE.2015.072651},
	Volume = {11},
	Year = {2015},
	Bdsk-Url-1 = {http://dx.doi.org/10.1504/IJCSE.2015.072651}}

@article{taxi-patterns,
	Author = {Doraiswamy, H. and Ferreira, N. and Damoulas, T. and Freire, J. and Silva, C.T.},
	Journal = {IEEE TVCG},
	Number = {12},
	Pages = {2634-2643},
	Title = {Using Topological Analysis to Support Event-Guided Exploration in Urban Data},
	Volume = {20},
	Year = {2014}}

@misc{taxifactbook2014,
	Author = {Michael R. Bloomberg and David Yassky},
	Howpublished = {\url{http://www.nyc.gov/html/tlc/downloads/pdf/2014_taxicab_fact_book.pdf}},
	Title = {2014 Taxicab Fact Book},
	Year = {2014}}

@misc{tlcdata,
	Howpublished = {\url{http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml}},
	Key = {tlc},
	Title = {{TLC Trip Record Data}},
	Year = {2015}}

@misc{weatherdata,
	Howpublished = {\url{https://www.ncei.noaa.gov/}},
	Key = {weather},
	Title = {{Weather Data}},
	Year = {2015}}

@article{Buneman19953,
	Abstract = {We present a new principle for the development of database query languages that the primitive operations should be organized around types. Viewing a relational database as consisting of sets of records, this principle dectates that we should investigate separately operations for records and sets. There are two immediate advantages of this approach, which is partly inspired by basic ideas from category theoryl. First, it provides a language for structures in which record and set types may be freely combined: nested relations or complex objects. Second, the fundamental operations for sets are closely related to those for other {\^a}€{\oe}collection types{\^a}€ such as bags or lists, and this suggests how database languages may be uniformly extended to these new types. the most general operation on sets, that of structural recursion, is one in which not all programs are well-defined. In looking for limited forms of this operation that always give rise to well-defined operations, we find a number of close connection with exiting database languages, notably those developed for complex objects. Moreover, even though the general paradigm of structural recursion is shown to be no more expressive than one of the existing languages for complex objects, it possesses certain properties of uniformity that make it a better candidate for an efficient, practical language. Thus rather than developing query languages by extending, for example, relational calculus, we advocate a very powerful paradigm in which a number of well-known languages are to be found as natural sublanguages. },
	Author = {Peter Buneman and Shamim Naqvi and Val Tannen and Limsson Wong},
	Date-Added = {2016-04-01 23:43:24 +0000},
	Date-Modified = {2016-04-01 23:43:24 +0000},
	Doi = {http://dx.doi.org/10.1016/0304-3975(95)00024-Q},
	Issn = {0304-3975},
	Journal = {Theoretical Computer Science},
	Note = {Fourth International Conference on Database Theory (ICDT '92)},
	Number = {1},
	Pages = {3 - 48},
	Title = {Principles of programming with complex objects and collection types},
	Url = {http://www.sciencedirect.com/science/article/pii/030439759500024Q},
	Volume = {149},
	Year = {1995},
	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/030439759500024Q},
	Bdsk-Url-2 = {http://dx.doi.org/10.1016/0304-3975(95)00024-Q}}

@book{hector2002database,
	Author = {Hector, Garcia-Molina and Ullman, Jeffrey D and Widom, Jennifer},
	Date-Added = {2016-04-01 23:26:29 +0000},
	Date-Modified = {2016-04-01 23:26:29 +0000},
	Publisher = {Prentice-Hall},
	Title = {Database systems: The complete book},
	Year = {2002}}

@article{Griffin:1995:IMV:568271.223849,
	Acmid = {223849},
	Address = {New York, NY, USA},
	Author = {Griffin, Timothy and Libkin, Leonid},
	Date-Added = {2016-04-01 16:48:01 +0000},
	Date-Modified = {2016-04-01 16:48:01 +0000},
	Doi = {10.1145/568271.223849},
	Issn = {0163-5808},
	Issue_Date = {May 1995},
	Journal = {SIGMOD Rec.},
	Month = may,
	Number = {2},
	Numpages = {12},
	Pages = {328--339},
	Publisher = {ACM},
	Title = {Incremental Maintenance of Views with Duplicates},
	Url = {http://doi.acm.org/10.1145/568271.223849},
	Volume = {24},
	Year = {1995},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/568271.223849},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/568271.223849}}

@inproceedings{Chaudhuri:1995:OQM:645480.655434,
	Acmid = {655434},
	Address = {Washington, DC, USA},
	Author = {Chaudhuri, Surajit and Krishnamurthy, Ravi and Potamianos, Spyros and Shim, Kyuseok},
	Booktitle = {Proceedings of the Eleventh International Conference on Data Engineering},
	Date-Added = {2016-04-01 16:47:41 +0000},
	Date-Modified = {2016-04-01 16:47:41 +0000},
	Isbn = {0-8186-6910-1},
	Keywords = {materialized views, optimisation, query optimization algorithm, query processing},
	Numpages = {11},
	Pages = {190--200},
	Publisher = {IEEE Computer Society},
	Series = {ICDE '95},
	Title = {Optimizing Queries with Materialized Views},
	Url = {http://dl.acm.org/citation.cfm?id=645480.655434},
	Year = {1995},
	Bdsk-Url-1 = {http://dl.acm.org/citation.cfm?id=645480.655434}}

@article{Buneman:1979:EMR:320083.320099,
	Acmid = {320099},
	Address = {New York, NY, USA},
	Author = {Buneman, O. Peter and Clemons, Eric K.},
	Date-Added = {2016-04-01 16:45:55 +0000},
	Date-Modified = {2016-04-01 16:45:55 +0000},
	Doi = {10.1145/320083.320099},
	Issn = {0362-5915},
	Issue_Date = {Sept. 1979},
	Journal = {ACM Trans. Database Syst.},
	Keywords = {alerters, exception reporting, integrity constraints, programming techniques, relational databases},
	Month = sep,
	Number = {3},
	Numpages = {15},
	Pages = {368--382},
	Publisher = {ACM},
	Title = {Efficiently Monitoring Relational Databases},
	Url = {http://doi.acm.org/10.1145/320083.320099},
	Volume = {4},
	Year = {1979},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/320083.320099},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/320083.320099}}

@article{Blakeley:1986:EUM:16856.16861,
	Acmid = {16861},
	Address = {New York, NY, USA},
	Author = {Blakeley, Jose A. and Larson, Per-Ake and Tompa, Frank Wm},
	Date-Added = {2016-04-01 16:45:22 +0000},
	Date-Modified = {2016-04-01 16:45:22 +0000},
	Doi = {10.1145/16856.16861},
	Issn = {0163-5808},
	Issue_Date = {June 1986},
	Journal = {SIGMOD Rec.},
	Month = jun,
	Number = {2},
	Numpages = {11},
	Pages = {61--71},
	Publisher = {ACM},
	Title = {Efficiently Updating Materialized Views},
	Url = {http://doi.acm.org/10.1145/16856.16861},
	Volume = {15},
	Year = {1986},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/16856.16861},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/16856.16861}}

@article{Agrawal:1990:DPS:93548.93576,
	Acmid = {93576},
	Address = {New York, NY, USA},
	Author = {Agrawal, Hiralal and Horgan, Joseph R.},
	Date-Added = {2016-04-01 16:42:43 +0000},
	Date-Modified = {2016-04-01 16:42:43 +0000},
	Doi = {10.1145/93548.93576},
	Issn = {0362-1340},
	Issue_Date = {Jun. 1990},
	Journal = {SIGPLAN Not.},
	Month = jun,
	Number = {6},
	Numpages = {11},
	Pages = {246--256},
	Publisher = {ACM},
	Title = {Dynamic Program Slicing},
	Url = {http://doi.acm.org/10.1145/93548.93576},
	Volume = {25},
	Year = {1990},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/93548.93576},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/93548.93576}}

@article{KOREL1988155,
	Abstract = {A dynamic program slice is an executable subset of the original program that produces the same computations on a subset of selected variables and inputs. It differs from the static slice (Weiser, 1982, 1984) in that it is entirely defined on the basis of a computation. The two main advantages are the following: Arrays and dynamic data structures can be handled more precisely and the size of slice can be significantly reduced, leading to a finer localization of the fault. The approach is being investigated as a possible extension of the debugging capabilities of STAD, a recently developed System for Testing and Debugging (Korel and Laski, 1987; Laski, 1987).},
	Author = {Bogdan Korel and Janusz Laski},
	Date-Added = {2016-04-01 16:42:20 +0000},
	Date-Modified = {2016-04-01 16:42:20 +0000},
	Doi = {http://dx.doi.org/10.1016/0020-0190(88)90054-3},
	Issn = {0020-0190},
	Journal = {Information Processing Letters},
	Keywords = {debugging},
	Number = {3},
	Pages = {155 - 163},
	Title = {Dynamic program slicing},
	Url = {http://www.sciencedirect.com/science/article/pii/0020019088900543},
	Volume = {29},
	Year = {1988},
	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/0020019088900543},
	Bdsk-Url-2 = {http://dx.doi.org/10.1016/0020-0190(88)90054-3}}

@inproceedings{Weiser:1981:PS:800078.802557,
	Acmid = {802557},
	Address = {Piscataway, NJ, USA},
	Author = {Weiser, Mark},
	Booktitle = {Proceedings of the 5th International Conference on Software Engineering},
	Date-Added = {2016-04-01 16:41:34 +0000},
	Date-Modified = {2016-04-01 16:41:34 +0000},
	Isbn = {0-89791-146-6},
	Keywords = {Data flow analysis, Debugging, Human factors, Program maintenance, Program metrics, Software tools},
	Location = {San Diego, California, USA},
	Numpages = {11},
	Pages = {439--449},
	Publisher = {IEEE Press},
	Series = {ICSE '81},
	Title = {Program Slicing},
	Url = {http://dl.acm.org/citation.cfm?id=800078.802557},
	Year = {1981},
	Bdsk-Url-1 = {http://dl.acm.org/citation.cfm?id=800078.802557}}

@article{Wang:2008:BML:1453856.1453896,
	Acmid = {1453896},
	Author = {Wang, Daisy Zhe and Michelakis, Eirinaios and Garofalakis, Minos and Hellerstein, Joseph M.},
	Date-Added = {2016-04-01 03:48:37 +0000},
	Date-Modified = {2016-04-01 03:48:37 +0000},
	Doi = {10.14778/1453856.1453896},
	Issn = {2150-8097},
	Issue_Date = {August 2008},
	Journal = {Proc. VLDB Endow.},
	Month = aug,
	Number = {1},
	Numpages = {12},
	Pages = {340--351},
	Publisher = {VLDB Endowment},
	Title = {BayesStore: Managing Large, Uncertain Data Repositories with Probabilistic Graphical Models},
	Url = {http://dx.doi.org/10.14778/1453856.1453896},
	Volume = {1},
	Year = {2008},
	Bdsk-Url-1 = {http://dx.doi.org/10.14778/1453856.1453896}}

@inproceedings{Deshpande:2006:MSM:1142473.1142483,
	Acmid = {1142483},
	Address = {New York, NY, USA},
	Author = {Deshpande, Amol and Madden, Samuel},
	Booktitle = {Proceedings of the 2006 ACM SIGMOD International Conference on Management of Data},
	Date-Added = {2016-04-01 03:47:54 +0000},
	Date-Modified = {2016-04-01 03:47:54 +0000},
	Doi = {10.1145/1142473.1142483},
	Isbn = {1-59593-434-0},
	Keywords = {query processing, regression, sensor networks, statistical models, uncertain data, views},
	Location = {Chicago, IL, USA},
	Numpages = {12},
	Pages = {73--84},
	Publisher = {ACM},
	Series = {SIGMOD '06},
	Title = {MauveDB: Supporting Model-based User Views in Database Systems},
	Url = {http://doi.acm.org/10.1145/1142473.1142483},
	Year = {2006},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1142473.1142483},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1142473.1142483}}

@inproceedings{katsis2015combining,
	Author = {Katsis, Yannis and Freund, Yoav and Papakonstantinou, Yannis},
	Booktitle = {CIDR},
	Date-Added = {2016-04-01 03:46:38 +0000},
	Date-Modified = {2016-04-01 03:46:38 +0000},
	Title = {Combining Databases and Signal Processing in Plato.},
	Year = {2015}}

@article{Crankshaw:2014aa,
	Abstract = {To support complex data-intensive applications such as personalized recommendations, targeted advertising, and intelligent services, the data management community has focused heavily on the design of systems to support training complex models on large datasets. Unfortunately, the design of these systems largely ignores a critical component of the overall analytics process: the deployment and serving of models at scale. In this work, we present Velox, a new component of the Berkeley Data Analytics Stack. Velox is a data management system for facilitating the next steps in real-world, large-scale analytics pipelines: online model management, maintenance, and serving. Velox provides end-user applications and services with a low-latency, intuitive interface to models, transforming the raw statistical models currently trained using existing offline large-scale compute frameworks into full-blown, end-to-end data products capable of recommending products, targeting advertisements, and personalizing web content. To provide up-to-date results for these complex models, Velox also facilitates lightweight online model maintenance and selection (i.e., dynamic weighting). In this paper, we describe the challenges and architectural considerations required to achieve this functionality, including the abilities to span online and offline systems, to adaptively adjust model materialization strategies, and to exploit inherent statistical properties such as model error tolerance, all while operating at "Big Data" scale.},
	Author = {Daniel Crankshaw and Peter Bailis and Joseph E. Gonzalez and Haoyuan Li and Zhao Zhang and Michael J. Franklin and Ali Ghodsi and Michael I. Jordan},
	Date-Added = {2016-04-01 03:45:35 +0000},
	Date-Modified = {2016-04-01 03:45:35 +0000},
	Eprint = {1409.3809},
	Month = {09},
	Title = {The Missing Piece in Complex Analytics: Low Latency, Scalable Model Management and Serving with Velox},
	Url = {http://arxiv.org/abs/1409.3809},
	Year = {2014},
	Bdsk-Url-1 = {http://arxiv.org/abs/1409.3809}}

@inbook{Gross_Singer_Wegerich_Herzog_VanAlstine_Bockhorst_1997,
	Abstractnote = {To assure the continued safe and reliable operation of a nuclear power station, it is essential that accurate online information on the current state of the entire system be available to the operators. Such information is needed to determine the operability of safety and control systems, the condition of active components, the necessity of preventative maintenance, and the status of sensory systems. To this end, ANL has developed a new Multivariate State Estimation Technique (MSET) which utilizes advanced pattern recognition methods to enhance sensor and component operational validation for commercial nuclear reactors. Operational data from the Crystal River-3 (CR-3) nuclear power plant are used to illustrate the high sensitivity, accuracy, and the rapid response time of MSET for annunciation of a variety of signal disturbances.},
	Author = {Gross, K.C. and Singer, R.M. and Wegerich, S.W. and Herzog, J.P. and VanAlstine, R. and Bockhorst, F.},
	Date-Added = {2016-03-31 00:49:09 +0000},
	Date-Modified = {2016-03-31 00:49:09 +0000},
	Month = {May},
	Place = {United States},
	Title = {Application of a model-based fault detection system to nuclear plant signals},
	Url = {http://www.osti.gov/scitech/servlets/purl/481606},
	Year = {1997},
	Bdsk-Url-1 = {http://www.osti.gov/scitech/servlets/purl/481606}}

@article{Sowell:2009aa,
	Abstract = {Recent work has shown that we can dramatically improve the performance of computer games and simulations through declarative processing: Character AI can be written in an imperative scripting language which is then compiled to relational algebra and executed by a special games engine with features similar to a main memory database system. In this paper we lay out a challenging research agenda built on these ideas.},
	Author = {Benjamin Sowell and Alan Demers and Johannes Gehrke and Nitin Gupta and Haoyuan Li and Walker White},
	Date-Added = {2016-03-29 22:27:58 +0000},
	Date-Modified = {2016-03-29 22:27:58 +0000},
	Eprint = {0909.1770},
	Month = {09},
	Title = {From Declarative Languages to Declarative Processing in Computer Games},
	Url = {http://arxiv.org/abs/0909.1770},
	Year = {2009},
	Bdsk-Url-1 = {http://arxiv.org/abs/0909.1770}}

@inproceedings{White:2007:SGE:1247480.1247486,
	Acmid = {1247486},
	Address = {New York, NY, USA},
	Author = {White, Walker and Demers, Alan and Koch, Christoph and Gehrke, Johannes and Rajagopalan, Rajmohan},
	Booktitle = {Proceedings of the 2007 ACM SIGMOD International Conference on Management of Data},
	Date-Added = {2016-03-29 22:25:55 +0000},
	Date-Modified = {2016-03-29 22:25:55 +0000},
	Doi = {10.1145/1247480.1247486},
	Isbn = {978-1-59593-686-8},
	Keywords = {aggregates, games, indexing, scripting},
	Location = {Beijing, China},
	Numpages = {12},
	Pages = {31--42},
	Publisher = {ACM},
	Series = {SIGMOD '07},
	Title = {Scaling Games to Epic Proportions},
	Url = {http://doi.acm.org/10.1145/1247480.1247486},
	Year = {2007},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1247480.1247486},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1247480.1247486}}

@inproceedings{Raman:2001:PWI:645927.672045,
	Acmid = {672045},
	Address = {San Francisco, CA, USA},
	Author = {Raman, Vijayshankar and Hellerstein, Joseph M.},
	Booktitle = {Proceedings of the 27th International Conference on Very Large Data Bases},
	Date-Added = {2016-03-29 22:20:34 +0000},
	Date-Modified = {2016-03-29 22:20:34 +0000},
	Isbn = {1-55860-804-4},
	Numpages = {10},
	Pages = {381--390},
	Publisher = {Morgan Kaufmann Publishers Inc.},
	Series = {VLDB '01},
	Title = {Potter's Wheel: An Interactive Data Cleaning System},
	Url = {http://dl.acm.org/citation.cfm?id=645927.672045},
	Year = {2001},
	Bdsk-Url-1 = {http://dl.acm.org/citation.cfm?id=645927.672045}}

@inproceedings{Olston:2009:GED:1559845.1559873,
	Acmid = {1559873},
	Address = {New York, NY, USA},
	Author = {Olston, Christopher and Chopra, Shubham and Srivastava, Utkarsh},
	Booktitle = {Proceedings of the 2009 ACM SIGMOD International Conference on Management of Data},
	Date-Added = {2016-03-28 15:01:13 +0000},
	Date-Modified = {2016-03-28 15:01:13 +0000},
	Doi = {10.1145/1559845.1559873},
	Isbn = {978-1-60558-551-2},
	Keywords = {dataflow programming, example data},
	Location = {Providence, Rhode Island, USA},
	Numpages = {12},
	Pages = {245--256},
	Publisher = {ACM},
	Series = {SIGMOD '09},
	Title = {Generating Example Data for Dataflow Programs},
	Url = {http://doi.acm.org/10.1145/1559845.1559873},
	Year = {2009},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1559845.1559873},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1559845.1559873}}

@inbook{Lopes2000,
	Address = {Berlin, Heidelberg},
	Author = {Lopes, St{\'e}phane and Petit, Jean-Marc and Lakhal, Lotfi},
	Chapter = {Efficient Discovery of Functional Dependencies and Armstrong Relations},
	Date-Added = {2016-03-28 03:43:51 +0000},
	Date-Modified = {2016-03-28 03:43:51 +0000},
	Doi = {10.1007/3-540-46439-5_24},
	Editor = {Zaniolo, Carlo and Lockemann, Peter C. and Scholl, Marc H. and Grust, Torsten},
	Isbn = {978-3-540-46439-6},
	Pages = {350--364},
	Publisher = {Springer Berlin Heidelberg},
	Title = {Advances in Database Technology --- EDBT 2000: 7th International Conference on Extending Database Technology Konstanz, Germany, March 27--31, 2000 Proceedings},
	Url = {http://dx.doi.org/10.1007/3-540-46439-5_24},
	Year = {2000},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/3-540-46439-5_24}}

@inbook{Wyss2001,
	Address = {Berlin, Heidelberg},
	Author = {Wyss, Catharine and Giannella, Chris and Robertson, Edward},
	Chapter = {FastFDs: A Heuristic-Driven, Depth-First Algorithm for Mining Functional Dependencies from Relation Instances Extended Abstract},
	Date-Added = {2016-03-28 03:43:10 +0000},
	Date-Modified = {2016-03-28 03:43:10 +0000},
	Doi = {10.1007/3-540-44801-2_11},
	Editor = {Kambayashi, Yahiko and Winiwarter, Werner and Arikawa, Masatoshi},
	Isbn = {978-3-540-44801-3},
	Pages = {101--110},
	Publisher = {Springer Berlin Heidelberg},
	Title = {Data Warehousing and Knowledge Discovery: Third International Conference, DaWaK 2001 Munich, Germany, September 5--7, 2001 Proceedings},
	Url = {http://dx.doi.org/10.1007/3-540-44801-2_11},
	Year = {2001},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/3-540-44801-2_11}}

@article{Huhtala01011999,
	Abstract = {The discovery of functional dependencies from relations is an important database analysis technique. We present Tane, an efficient algorithm for finding functional dependencies from large databases. Tane is based on partitioning the set of rows with respect to their attribute values, which makes testing the validity of functional dependencies fast even for a large number of tuples. The use of partitions also makes the discovery of approximate functional dependencies easy and efficient and the erroneous or exceptional rows can be identified easily. Experiments show that Tane is fast in practice. For benchmark databases the running times are improved by several orders of magnitude over previously published results. The algorithm is also applicable to much larger datasets than the previous methods.},
	Author = {Huhtala, Yk{\"a} and K{\"a}rkk{\"a}inen, Juha and Porkka, Pasi and Toivonen, Hannu},
	Date-Added = {2016-03-28 03:41:52 +0000},
	Date-Modified = {2016-03-28 03:41:52 +0000},
	Doi = {10.1093/comjnl/42.2.100},
	Eprint = {http://comjnl.oxfordjournals.org/content/42/2/100.full.pdf+html},
	Journal = {The Computer Journal},
	Number = {2},
	Pages = {100-111},
	Title = {Tane: An Efficient Algorithm for Discovering Functional and Approximate Dependencies},
	Url = {http://comjnl.oxfordjournals.org/content/42/2/100.abstract},
	Volume = {42},
	Year = {1999},
	Bdsk-Url-1 = {http://comjnl.oxfordjournals.org/content/42/2/100.abstract},
	Bdsk-Url-2 = {http://dx.doi.org/10.1093/comjnl/42.2.100}}

@inproceedings{5767833,
	Author = {F. Chiang and R. J. Miller},
	Booktitle = {Data Engineering (ICDE), 2011 IEEE 27th International Conference on},
	Date-Added = {2016-03-28 03:40:27 +0000},
	Date-Modified = {2016-03-28 03:40:27 +0000},
	Doi = {10.1109/ICDE.2011.5767833},
	Issn = {1063-6382},
	Keywords = {data integrity;constraint repair;data design;data quality;data repair;data sources;functional dependencies;integrity constraints;unified cost model;Cities and towns;Computational modeling;Data models;Databases;Maintenance engineering;Redundancy;Semantics},
	Month = {April},
	Pages = {446-457},
	Title = {A unified model for data and constraint repair},
	Year = {2011},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICDE.2011.5767833}}

@inproceedings{Kolahi:2009:AOR:1514894.1514901,
	Acmid = {1514901},
	Address = {New York, NY, USA},
	Author = {Kolahi, Solmaz and Lakshmanan, Laks V. S.},
	Booktitle = {Proceedings of the 12th International Conference on Database Theory},
	Date-Added = {2016-03-28 03:34:13 +0000},
	Date-Modified = {2016-03-28 03:34:13 +0000},
	Doi = {10.1145/1514894.1514901},
	Isbn = {978-1-60558-423-2},
	Keywords = {approximation algorithm, functional dependency violation, inconsistent databases, repair},
	Location = {St. Petersburg, Russia},
	Numpages = {10},
	Pages = {53--62},
	Publisher = {ACM},
	Series = {ICDT '09},
	Title = {On Approximating Optimum Repairs for Functional Dependency Violations},
	Url = {http://doi.acm.org/10.1145/1514894.1514901},
	Year = {2009},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1514894.1514901},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1514894.1514901}}

@inproceedings{Cong:2007:IDQ:1325851.1325890,
	Acmid = {1325890},
	Author = {Cong, Gao and Fan, Wenfei and Geerts, Floris and Jia, Xibei and Ma, Shuai},
	Booktitle = {Proceedings of the 33rd International Conference on Very Large Data Bases},
	Date-Added = {2016-03-28 03:33:19 +0000},
	Date-Modified = {2016-03-28 03:33:19 +0000},
	Isbn = {978-1-59593-649-3},
	Location = {Vienna, Austria},
	Numpages = {12},
	Pages = {315--326},
	Publisher = {VLDB Endowment},
	Series = {VLDB '07},
	Title = {Improving Data Quality: Consistency and Accuracy},
	Url = {http://dl.acm.org/citation.cfm?id=1325851.1325890},
	Year = {2007},
	Bdsk-Url-1 = {http://dl.acm.org/citation.cfm?id=1325851.1325890}}

@inproceedings{Bohannon:2005:CME:1066157.1066175,
	Acmid = {1066175},
	Address = {New York, NY, USA},
	Author = {Bohannon, Philip and Fan, Wenfei and Flaster, Michael and Rastogi, Rajeev},
	Booktitle = {Proceedings of the 2005 ACM SIGMOD International Conference on Management of Data},
	Date-Added = {2016-03-28 03:32:35 +0000},
	Date-Modified = {2016-03-28 03:32:35 +0000},
	Doi = {10.1145/1066157.1066175},
	Isbn = {1-59593-060-4},
	Location = {Baltimore, Maryland},
	Numpages = {12},
	Pages = {143--154},
	Publisher = {ACM},
	Series = {SIGMOD '05},
	Title = {A Cost-based Model and Effective Heuristic for Repairing Constraints by Value Modification},
	Url = {http://doi.acm.org/10.1145/1066157.1066175},
	Year = {2005},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1066157.1066175},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1066157.1066175}}

@article{Beskales2013,
	Abstract = {Violations of functional dependencies (FDs) and conditional functional dependencies (CFDs) are common in practice, often indicating deviations from the intended data semantics. These violations arise in many contexts such as data integration and Web data extraction. Resolving these violations is challenging for a variety of reasons, one of them being the exponential number of possible repairs. Most of the previous work has tackled this problem by producing a single repair that is nearly optimal with respect to some metric. In this paper, we propose a novel data cleaning approach that is not limited to finding a single repair, namely sampling from the space of possible repairs. We give several motivating scenarios where sampling from the space of CFD repairs is desirable, we propose a new class of useful repairs, and we present an algorithm that randomly samples from this space in an efficient way. We also show how to restrict the space of repairs based on constraints that reflect the accuracy of different parts of the database. We experimentally evaluate our algorithms against previous approaches to show the utility and efficiency of our approach.},
	Author = {Beskales, George and Ilyas, Ihab F. and Golab, Lukasz and Galiullin, Artur},
	Date-Added = {2016-03-28 03:28:03 +0000},
	Date-Modified = {2016-03-28 03:28:03 +0000},
	Doi = {10.1007/s00778-013-0316-z},
	Issn = {0949-877X},
	Journal = {The VLDB Journal},
	Number = {1},
	Pages = {103--128},
	Title = {Sampling from repairs of conditional functional dependency violations},
	Url = {http://dx.doi.org/10.1007/s00778-013-0316-z},
	Volume = {23},
	Year = {2013},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/s00778-013-0316-z}}

@inproceedings{4221832,
	Author = {L. Antova and C. Koch and D. Olteanu},
	Booktitle = {Data Engineering, 2007. ICDE 2007. IEEE 23rd International Conference on},
	Date-Added = {2016-03-28 03:27:01 +0000},
	Date-Modified = {2016-03-28 03:27:01 +0000},
	Doi = {10.1109/ICDE.2007.369042},
	Keywords = {SQL;information management;query processing;relational algebra;MayBMS;PostgreSQL;SQL-like language;finite world-sets;information management;probabilistic world-set decompositions;query language;relational algebra queries;Algebra;Data mining;Database languages;Information management;Medical diagnostic imaging;Medical tests;Pregnancy;Query processing;Scalability;Testing},
	Month = {April},
	Pages = {1479-1480},
	Title = {MayBMS: Managing Incomplete Information with Probabilistic World-Set Decompositions},
	Year = {2007},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICDE.2007.369042}}

@inproceedings{Huang:2009:MPD:1559845.1559984,
	Acmid = {1559984},
	Address = {New York, NY, USA},
	Author = {Huang, Jiewen and Antova, Lyublena and Koch, Christoph and Olteanu, Dan},
	Booktitle = {Proceedings of the 2009 ACM SIGMOD International Conference on Management of Data},
	Date-Added = {2016-03-28 03:26:49 +0000},
	Date-Modified = {2016-03-28 03:26:49 +0000},
	Doi = {10.1145/1559845.1559984},
	Isbn = {978-1-60558-551-2},
	Keywords = {probabilistic databases, query processing},
	Location = {Providence, Rhode Island, USA},
	Numpages = {4},
	Pages = {1071--1074},
	Publisher = {ACM},
	Series = {SIGMOD '09},
	Title = {MayBMS: A Probabilistic Database Management System},
	Url = {http://doi.acm.org/10.1145/1559845.1559984},
	Year = {2009},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1559845.1559984},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1559845.1559984}}

@inbook{398a0c1d345c43c5a309d6ae90213f36,
	Author = {Amr Ebaid and Ahmed Elmagarmid and Ilyas, {Ihab F.} and Mourad Ouzzani and Quiane-Ruiz, {Jorge Arnulfo} and Nan Tang and Si Yin},
	Booktitle = {Proceedings of the VLDB Endowment},
	Date-Added = {2016-03-28 03:24:04 +0000},
	Date-Modified = {2016-03-28 03:24:04 +0000},
	Edition = {12},
	Month = {8},
	Pages = {1218--1221},
	Title = {NADEEF: A generalized data cleaning system},
	Volume = {6},
	Year = {2013}}

@inproceedings{Khayyat:2015:BSB:2723372.2747646,
	Acmid = {2747646},
	Address = {New York, NY, USA},
	Author = {Khayyat, Zuhair and Ilyas, Ihab F. and Jindal, Alekh and Madden, Samuel and Ouzzani, Mourad and Papotti, Paolo and Quian{\'e}-Ruiz, Jorge-Arnulfo and Tang, Nan and Yin, Si},
	Booktitle = {Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data},
	Date-Added = {2016-03-28 03:23:25 +0000},
	Date-Modified = {2016-03-28 03:23:25 +0000},
	Doi = {10.1145/2723372.2747646},
	Isbn = {978-1-4503-2758-9},
	Keywords = {cleansing abstraction, distributed data cleansing, distributed data repair, schema constraints},
	Location = {Melbourne, Victoria, Australia},
	Numpages = {16},
	Pages = {1215--1230},
	Publisher = {ACM},
	Series = {SIGMOD '15},
	Title = {BigDansing: A System for Big Data Cleansing},
	Url = {http://doi.acm.org/10.1145/2723372.2747646},
	Year = {2015},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/2723372.2747646},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/2723372.2747646}}

@inproceedings{Elmagarmid:2014:NGI:2588555.2594511,
	Acmid = {2594511},
	Address = {New York, NY, USA},
	Author = {Elmagarmid, Ahmed and Ilyas, Ihab F. and Ouzzani, Mourad and Quian{\'e}-Ruiz, Jorge-Arnulfo and Tang, Nan and Yin, Si},
	Booktitle = {Proceedings of the 2014 ACM SIGMOD International Conference on Management of Data},
	Date-Added = {2016-03-28 03:23:17 +0000},
	Date-Modified = {2016-03-28 03:23:17 +0000},
	Doi = {10.1145/2588555.2594511},
	Isbn = {978-1-4503-2376-5},
	Keywords = {NADEEF, entity resolution, generic, interactive},
	Location = {Snowbird, Utah, USA},
	Numpages = {4},
	Pages = {1071--1074},
	Publisher = {ACM},
	Series = {SIGMOD '14},
	Title = {NADEEF/ER: Generic and Interactive Entity Resolution},
	Url = {http://doi.acm.org/10.1145/2588555.2594511},
	Year = {2014},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/2588555.2594511},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/2588555.2594511}}

@article{Ebaid:2013:NGD:2536274.2536280,
	Acmid = {2536280},
	Author = {Ebaid, Amr and Elmagarmid, Ahmed and Ilyas, Ihab F. and Ouzzani, Mourad and Quiane-Ruiz, Jorge-Arnulfo and Tang, Nan and Yin, Si},
	Date-Added = {2016-03-28 03:23:00 +0000},
	Date-Modified = {2016-03-28 03:23:00 +0000},
	Doi = {10.14778/2536274.2536280},
	Issn = {2150-8097},
	Issue_Date = {August 2013},
	Journal = {Proc. VLDB Endow.},
	Month = aug,
	Number = {12},
	Numpages = {4},
	Pages = {1218--1221},
	Publisher = {VLDB Endowment},
	Title = {NADEEF: A Generalized Data Cleaning System},
	Url = {http://dx.doi.org/10.14778/2536274.2536280},
	Volume = {6},
	Year = {2013},
	Bdsk-Url-1 = {http://dx.doi.org/10.14778/2536274.2536280}}

@inproceedings{Dallachiesa:2013:NCD:2463676.2465327,
	Acmid = {2465327},
	Address = {New York, NY, USA},
	Author = {Dallachiesa, Michele and Ebaid, Amr and Eldawy, Ahmed and Elmagarmid, Ahmed and Ilyas, Ihab F. and Ouzzani, Mourad and Tang, Nan},
	Booktitle = {Proceedings of the 2013 ACM SIGMOD International Conference on Management of Data},
	Date-Added = {2016-03-28 03:22:39 +0000},
	Date-Modified = {2016-03-28 03:22:39 +0000},
	Doi = {10.1145/2463676.2465327},
	Isbn = {978-1-4503-2037-5},
	Keywords = {conditional functional dependency, data cleaning, etl, matching dependency},
	Location = {New York, New York, USA},
	Numpages = {12},
	Pages = {541--552},
	Publisher = {ACM},
	Series = {SIGMOD '13},
	Title = {NADEEF: A Commodity Data Cleaning System},
	Url = {http://doi.acm.org/10.1145/2463676.2465327},
	Year = {2013},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/2463676.2465327},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/2463676.2465327}}

@inproceedings{abedjan2015dataxformer,
	Author = {Abedjan, Ziawasch and Morcos, John and Gubanov, Michael N and Ilyas, Ihab F and Stonebraker, Michael and Papotti, Paolo and Ouzzani, Mourad},
	Booktitle = {CIDR},
	Date-Added = {2016-03-28 03:22:14 +0000},
	Date-Modified = {2016-03-28 03:22:14 +0000},
	Title = {Dataxformer: Leveraging the Web for Semantic Transformations.},
	Year = {2015}}

@inproceedings{Guo:2011:PWM:2047196.2047205,
	Acmid = {2047205},
	Address = {New York, NY, USA},
	Author = {Guo, Philip J. and Kandel, Sean and Hellerstein, Joseph M. and Heer, Jeffrey},
	Booktitle = {Proceedings of the 24th Annual ACM Symposium on User Interface Software and Technology},
	Date-Added = {2016-03-28 03:21:27 +0000},
	Date-Modified = {2016-03-28 03:21:27 +0000},
	Doi = {10.1145/2047196.2047205},
	Isbn = {978-1-4503-0716-1},
	Keywords = {data analysis, data cleaning, data transformation, end-user programming, mixed-initiative interfaces},
	Location = {Santa Barbara, California, USA},
	Numpages = {10},
	Pages = {65--74},
	Publisher = {ACM},
	Series = {UIST '11},
	Title = {Proactive Wrangling: Mixed-initiative End-user Programming of Data Transformation Scripts},
	Url = {http://doi.acm.org/10.1145/2047196.2047205},
	Year = {2011},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/2047196.2047205},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/2047196.2047205}}

@inproceedings{Kandel:2012:PIS:2254556.2254659,
	Acmid = {2254659},
	Address = {New York, NY, USA},
	Author = {Kandel, Sean and Parikh, Ravi and Paepcke, Andreas and Hellerstein, Joseph M. and Heer, Jeffrey},
	Booktitle = {Proceedings of the International Working Conference on Advanced Visual Interfaces},
	Date-Added = {2016-03-28 03:21:08 +0000},
	Date-Modified = {2016-03-28 03:21:08 +0000},
	Doi = {10.1145/2254556.2254659},
	Isbn = {978-1-4503-1287-5},
	Keywords = {anomaly detection, data analysis, data quality, visualization},
	Location = {Capri Island, Italy},
	Numpages = {8},
	Pages = {547--554},
	Publisher = {ACM},
	Series = {AVI '12},
	Title = {Profiler: Integrated Statistical Analysis and Visualization for Data Quality Assessment},
	Url = {http://doi.acm.org/10.1145/2254556.2254659},
	Year = {2012},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/2254556.2254659},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/2254556.2254659}}

@article{536251,
	Author = {J. Widom},
	Date-Added = {2016-03-28 01:16:39 +0000},
	Date-Modified = {2016-03-28 01:16:39 +0000},
	Doi = {10.1109/69.536251},
	Issn = {1041-4347},
	Journal = {IEEE Transactions on Knowledge and Data Engineering},
	Keywords = {active databases;deductive databases;knowledge based systems;logic programming languages;relational databases;Starburst active database rule system;Starburst extensible relational database system;Starburst rule language;active database rules facility;arbitrary database state transitions;database processing;extensibility features;flexible execution semantics;rule management;rule processing;Algorithm design and analysis;Authorization;Automatic control;Concurrency control;Database systems;Error correction;Expert systems;Production systems;Relational databases;Spatial databases},
	Month = {Aug},
	Number = {4},
	Pages = {583-595},
	Title = {The Starburst active database rule system},
	Volume = {8},
	Year = {1996},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/69.536251}}

@inproceedings{7004218,
	Author = {K. Jeon and S. Chandrashekhara and F. Shen and S. Mehra and O. Kennedy and S. Y. Ko},
	Booktitle = {Big Data (Big Data), 2014 IEEE International Conference on},
	Date-Added = {2016-03-27 20:25:32 +0000},
	Date-Modified = {2016-03-27 20:25:32 +0000},
	Doi = {10.1109/BigData.2014.7004218},
	Keywords = {data handling;high level languages;parallel processing;pattern clustering;query languages;Hadoop MapReduce;Hadoop clusters;Pig Latin;PigMix;PigOut automatically-generated scripts;federated data processing;high-level language;query language;user-supplied script;workflow descriptions;Asia;Data processing;Europe;Manuals;Optimization;Programming;Writing},
	Month = {Oct},
	Pages = {100-109},
	Title = {PigOut: Making multiple Hadoop clusters work together},
	Year = {2014},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/BigData.2014.7004218}}

@inproceedings{EPFL-CONF-203677,
	Abstract = {As the size of data and its heterogeneity increase, traditional database system architecture becomes an obstacle to data analysis. Integrating and ingesting (loading) data into databases is quickly becoming a bottleneck in face of massive data as well as increasingly heterogeneous data formats. Still, state-of-the-art approaches typically rely on copying and transforming data into one (or few) repositories. Queries, on the other hand, are often ad-hoc and supported by pre-cooked operators which are not adaptive enough to optimize access to data. As data formats and queries increasingly vary, there is a need to depart from the current status quo of static query processing primitives and build dynamic, fully adaptive architectures. We build ViDa, a system which reads data in its raw format and processes queries using adaptive, just-in-time operators. Our key insight is use of virtualization, i.e., abstracting data and manipulating it regardless of its original format, and dynamic generation of operators. ViDa's query engine is generated just-in-time; its caches and its query operators adapt to the current query and the workload, while also treating raw datasets as its native storage structures. Finally, ViDa features a language expressive enough to support heterogeneous data models, and to which existing languages can be translated. Users therefore have the power to choose the language best suited for an analysis.},
	Affiliation = {EPFL},
	Author = {Karpathiotakis, Manos and Alagiannis, Ioannis and Heinis, Thomas and Branco, Miguel and Ailamaki, Anastasia},
	Booktitle = {Proceedings of the 7th {B}iennial {C}onference on {I}nnovative {D}ata {S}ystems {R}esearch ({CIDR})},
	Date-Added = {2016-03-27 05:24:40 +0000},
	Date-Modified = {2016-03-27 05:24:40 +0000},
	Details = {http://infoscience.epfl.ch/record/203677},
	Documenturl = {http://infoscience.epfl.ch/record/203677/files/vida-cidr.pdf},
	Keywords = {data virtualization; raw data querying; code generation; just-in-time databases; data analytics; query processing},
	Location = {Asilomar, California, USA},
	Oai-Id = {oai:infoscience.epfl.ch:203677},
	Oai-Set = {conf},
	Review = {REVIEWED},
	Status = {PUBLISHED},
	Submitter = {222383; 222383; 222383},
	Title = {Just-{I}n-{T}ime {D}ata {V}irtualization: {L}ightweight {D}ata {M}anagement with {V}i{D}a},
	Unit = {DIAS},
	Year = 2015}

@article{Karpathiotakis:2014:AQP:2732977.2732986,
	Acmid = {2732986},
	Author = {Karpathiotakis, Manos and Branco, Miguel and Alagiannis, Ioannis and Ailamaki, Anastasia},
	Date-Added = {2016-03-27 05:24:07 +0000},
	Date-Modified = {2016-03-27 05:24:07 +0000},
	Doi = {10.14778/2732977.2732986},
	Issn = {2150-8097},
	Issue_Date = {August 2014},
	Journal = {Proc. VLDB Endow.},
	Month = aug,
	Number = {12},
	Numpages = {12},
	Pages = {1119--1130},
	Publisher = {VLDB Endowment},
	Title = {Adaptive Query Processing on RAW Data},
	Url = {http://dx.doi.org/10.14778/2732977.2732986},
	Volume = {7},
	Year = {2014},
	Bdsk-Url-1 = {http://dx.doi.org/10.14778/2732977.2732986}}

@article{Alagiannis:2012:NAA:2367502.2367543,
	Acmid = {2367543},
	Author = {Alagiannis, Ioannis and Borovica, Renata and Branco, Miguel and Idreos, Stratos and Ailamaki, Anastasia},
	Date-Added = {2016-03-27 05:23:04 +0000},
	Date-Modified = {2016-03-27 05:23:04 +0000},
	Doi = {10.14778/2367502.2367543},
	Issn = {2150-8097},
	Issue_Date = {August 2012},
	Journal = {Proc. VLDB Endow.},
	Month = aug,
	Number = {12},
	Numpages = {4},
	Pages = {1942--1945},
	Publisher = {VLDB Endowment},
	Title = {NoDB in Action: Adaptive Query Processing on Raw Data},
	Url = {http://dx.doi.org/10.14778/2367502.2367543},
	Volume = {5},
	Year = {2012},
	Bdsk-Url-1 = {http://dx.doi.org/10.14778/2367502.2367543}}

@inproceedings{Alagiannis:2012:NEQ:2213836.2213864,
	Acmid = {2213864},
	Address = {New York, NY, USA},
	Author = {Alagiannis, Ioannis and Borovica, Renata and Branco, Miguel and Idreos, Stratos and Ailamaki, Anastasia},
	Booktitle = {Proceedings of the 2012 ACM SIGMOD International Conference on Management of Data},
	Date-Added = {2016-03-27 05:22:32 +0000},
	Date-Modified = {2016-03-27 05:22:32 +0000},
	Doi = {10.1145/2213836.2213864},
	Isbn = {978-1-4503-1247-9},
	Keywords = {adaptive loading, in situ querying, positional map},
	Location = {Scottsdale, Arizona, USA},
	Numpages = {12},
	Pages = {241--252},
	Publisher = {ACM},
	Series = {SIGMOD '12},
	Title = {NoDB: Efficient Query Execution on Raw Data Files},
	Url = {http://doi.acm.org/10.1145/2213836.2213864},
	Year = {2012},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/2213836.2213864},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/2213836.2213864}}

@article{lahiri2013oracle,
	Author = {Lahiri, Tirthankar and Neimat, Marie-Anne and Folkman, Steve},
	Date-Added = {2016-03-27 05:19:01 +0000},
	Date-Modified = {2016-03-27 05:19:01 +0000},
	Journal = {IEEE Data Eng. Bull.},
	Number = {2},
	Pages = {6--13},
	Publisher = {Citeseer},
	Title = {Oracle TimesTen: An In-Memory Database for Enterprise Applications.},
	Volume = {36},
	Year = {2013}}

@inproceedings{Olston:2008:PLN:1376616.1376726,
	Acmid = {1376726},
	Address = {New York, NY, USA},
	Author = {Olston, Christopher and Reed, Benjamin and Srivastava, Utkarsh and Kumar, Ravi and Tomkins, Andrew},
	Booktitle = {Proceedings of the 2008 ACM SIGMOD International Conference on Management of Data},
	Date-Added = {2016-03-27 05:13:54 +0000},
	Date-Modified = {2016-03-27 05:13:54 +0000},
	Doi = {10.1145/1376616.1376726},
	Isbn = {978-1-60558-102-6},
	Keywords = {dataflow language, pig latin},
	Location = {Vancouver, Canada},
	Numpages = {12},
	Pages = {1099--1110},
	Publisher = {ACM},
	Series = {SIGMOD '08},
	Title = {Pig Latin: A Not-so-foreign Language for Data Processing},
	Url = {http://doi.acm.org/10.1145/1376616.1376726},
	Year = {2008},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1376616.1376726},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1376616.1376726}}

@inproceedings{Kandel:2011:WIV:1978942.1979444,
	Acmid = {1979444},
	Address = {New York, NY, USA},
	Author = {Kandel, Sean and Paepcke, Andreas and Hellerstein, Joseph and Heer, Jeffrey},
	Booktitle = {Proceedings of the SIGCHI Conference on Human Factors in Computing Systems},
	Date-Added = {2016-03-27 05:07:12 +0000},
	Date-Modified = {2016-03-27 05:07:12 +0000},
	Doi = {10.1145/1978942.1979444},
	Isbn = {978-1-4503-0228-9},
	Keywords = {data analysis, data cleaning, transformation, visualization, wrangler},
	Location = {Vancouver, BC, Canada},
	Numpages = {10},
	Pages = {3363--3372},
	Publisher = {ACM},
	Series = {CHI '11},
	Title = {Wrangler: Interactive Visual Specification of Data Transformation Scripts},
	Url = {http://doi.acm.org/10.1145/1978942.1979444},
	Year = {2011},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1978942.1979444},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1978942.1979444}}

@article{Kot:2009:CUE:1687627.1687650,
	Acmid = {1687650},
	Author = {Kot, Lucja and Koch, Christoph},
	Date-Added = {2016-03-27 05:01:40 +0000},
	Date-Modified = {2016-03-27 05:30:37 +0000},
	Doi = {10.14778/1687627.1687650},
	Issn = {2150-8097},
	Issue_Date = {August 2009},
	Journal = {Proc. VLDB Endow.},
	Month = aug,
	Number = {1},
	Numpages = {12},
	Pages = {193--204},
	Publisher = {VLDB Endowment},
	Title = {Cooperative Update Exchange in the Youtopia System},
	Url = {http://dx.doi.org/10.14778/1687627.1687650},
	Volume = {2},
	Year = {2009},
	Bdsk-Url-1 = {http://dx.doi.org/10.14778/1687627.1687650}}

@article{Ives:2008:OCD:1462571.1462577,
	Acmid = {1462577},
	Address = {New York, NY, USA},
	Author = {Ives, Zachary G. and Green, Todd J. and Karvounarakis, Grigoris and Taylor, Nicholas E. and Tannen, Val and Talukdar, Partha Pratim and Jacob, Marie and Pereira, Fernando},
	Date-Added = {2016-03-27 04:59:36 +0000},
	Date-Modified = {2016-03-27 04:59:36 +0000},
	Doi = {10.1145/1462571.1462577},
	Issn = {0163-5808},
	Issue_Date = {September 2008},
	Journal = {SIGMOD Rec.},
	Month = sep,
	Number = {3},
	Numpages = {7},
	Pages = {26--32},
	Publisher = {ACM},
	Title = {The ORCHESTRA Collaborative Data Sharing System},
	Url = {http://doi.acm.org/10.1145/1462571.1462577},
	Volume = {37},
	Year = {2008},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1462571.1462577},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1462571.1462577}}

@inproceedings{Green:2007:OFC:1247480.1247631,
	Acmid = {1247631},
	Address = {New York, NY, USA},
	Author = {Green, Todd J. and Karvounarakis, Grigoris and Taylor, Nicholas E. and Biton, Olivier and Ives, Zachary G. and Tannen, Val},
	Booktitle = {Proceedings of the 2007 ACM SIGMOD International Conference on Management of Data},
	Date-Added = {2016-03-27 04:59:16 +0000},
	Date-Modified = {2016-03-27 04:59:16 +0000},
	Doi = {10.1145/1247480.1247631},
	Isbn = {978-1-59593-686-8},
	Keywords = {data exchange, data integration, data sharing, reconciliation, schema mappings},
	Location = {Beijing, China},
	Numpages = {3},
	Pages = {1131--1133},
	Publisher = {ACM},
	Series = {SIGMOD '07},
	Title = {ORCHESTRA: Facilitating Collaborative Data Sharing},
	Url = {http://doi.acm.org/10.1145/1247480.1247631},
	Year = {2007},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1247480.1247631},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1247480.1247631}}

@inproceedings{Milo:1998:USM:645924.671326,
	Acmid = {671326},
	Address = {San Francisco, CA, USA},
	Author = {Milo, Tova and Zohar, Sagit},
	Booktitle = {Proceedings of the 24rd International Conference on Very Large Data Bases},
	Date-Added = {2016-03-27 04:56:01 +0000},
	Date-Modified = {2016-03-27 04:56:01 +0000},
	Isbn = {1-55860-566-5},
	Numpages = {12},
	Pages = {122--133},
	Publisher = {Morgan Kaufmann Publishers Inc.},
	Series = {VLDB '98},
	Title = {Using Schema Matching to Simplify Heterogeneous Data Translation},
	Url = {http://dl.acm.org/citation.cfm?id=645924.671326},
	Year = {1998},
	Bdsk-Url-1 = {http://dl.acm.org/citation.cfm?id=645924.671326}}

@article{4016511,
	Author = {A. K. Elmagarmid and P. G. Ipeirotis and V. S. Verykios},
	Date-Added = {2016-03-27 04:54:56 +0000},
	Date-Modified = {2016-03-27 04:54:56 +0000},
	Doi = {10.1109/TKDE.2007.250581},
	Issn = {1041-4347},
	Journal = {IEEE Transactions on Knowledge and Data Engineering},
	Keywords = {data integrity;data mining;database management systems;database management system;duplicate detection algorithm;duplicate record detection;transcription error;Cleaning;Computer Society;Computer errors;Cost function;Couplings;Detection algorithms;Mirrors;Relational databases;Scalability;Uncertainty;Duplicate detection;data cleaning;data deduplication;data integration;database hardening;entity matching.;entity resolution;fuzzy duplicate detection;identity uncertainty;instance identification;name matching;record linkage},
	Month = {Jan},
	Number = {1},
	Pages = {1-16},
	Title = {Duplicate Record Detection: A Survey},
	Volume = {19},
	Year = {2007},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/TKDE.2007.250581}}

@article{McCarthy:aa,
	Abstract = {This paper describes RESOLVE, a system that uses decision trees to learn how to classify coreferent phrases in the domain of business joint ventures. An experiment is presented in which the performance of RESOLVE is compared to the performance of a manually engineered set of rules for the same task. The results show that decision trees achieve higher performance than the rules in two of three evaluation metrics developed for the coreference task. In addition to achieving better performance than the rules, RESOLVE provides a framework that facilitates the exploration of the types of knowledge that are useful for solving the coreference problem.},
	Author = {Joseph F. McCarthy and Wendy G. Lehnert},
	Date-Added = {2016-03-27 04:50:36 +0000},
	Date-Modified = {2016-03-27 04:50:36 +0000},
	Eprint = {cmp-lg/9505043},
	Title = {Using Decision Trees for Coreference Resolution},
	Url = {http://arxiv.org/abs/cmp-lg/9505043},
	Bdsk-Url-1 = {http://arxiv.org/abs/cmp-lg/9505043}}

@inproceedings{Bohannon:2006:RLL:1142351.1142399,
	Acmid = {1142399},
	Address = {New York, NY, USA},
	Author = {Bohannon, Aaron and Pierce, Benjamin C. and Vaughan, Jeffrey A.},
	Booktitle = {Proceedings of the Twenty-fifth ACM SIGMOD-SIGACT-SIGART Symposium on Principles of Database Systems},
	Date-Added = {2016-03-27 04:44:17 +0000},
	Date-Modified = {2016-03-27 04:44:17 +0000},
	Doi = {10.1145/1142351.1142399},
	Isbn = {1-59593-318-2},
	Keywords = {lenses, view update},
	Location = {Chicago, IL, USA},
	Numpages = {10},
	Pages = {338--347},
	Publisher = {ACM},
	Series = {PODS '06},
	Title = {Relational Lenses: A Language for Updatable Views},
	Url = {http://doi.acm.org/10.1145/1142351.1142399},
	Year = {2006},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1142351.1142399},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1142351.1142399}}

@article{Ives:1999:AQE:304181.304209,
	Acmid = {304209},
	Address = {New York, NY, USA},
	Author = {Ives, Zachary G. and Florescu, Daniela and Friedman, Marc and Levy, Alon and Weld, Daniel S.},
	Date-Added = {2016-03-27 04:25:38 +0000},
	Date-Modified = {2016-03-27 04:25:38 +0000},
	Doi = {10.1145/304181.304209},
	Issn = {0163-5808},
	Issue_Date = {June 1999},
	Journal = {SIGMOD Rec.},
	Month = jun,
	Number = {2},
	Numpages = {12},
	Pages = {299--310},
	Publisher = {ACM},
	Title = {An Adaptive Query Execution System for Data Integration},
	Url = {http://doi.acm.org/10.1145/304181.304209},
	Volume = {28},
	Year = {1999},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/304181.304209},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/304181.304209}}

@article{Fagin200589,
	Abstract = {Data exchange is the problem of taking data structured under a source schema and creating an instance of a target schema that reflects the source data as accurately as possible. In this paper, we address foundational and algorithmic issues related to the semantics of data exchange and to the query answering problem in the context of data exchange. These issues arise because, given a source instance, there may be many target instances that satisfy the constraints of the data exchange problem. We give an algebraic specification that selects, among all solutions to the data exchange problem, a special class of solutions that we call universal. We show that a universal solution has no more and no less data than required for data exchange and that it represents the entire space of possible solutions. We then identify fairly general, yet practical, conditions that guarantee the existence of a universal solution and yield algorithms to compute a canonical universal solution efficiently. We adopt the notion of the ``certain answers'' in indefinite databases for the semantics for query answering in data exchange. We investigate the computational complexity of computing the certain answers in this context and also address other algorithmic issues that arise in data exchange. In particular, we study the problem of computing the certain answers of target queries by simply evaluating them on a canonical universal solution, and we explore the boundary of what queries can and cannot be answered this way, in a data exchange setting. },
	Author = {Ronald Fagin and Phokion G. Kolaitis and Ren{\'e}e J. Miller and Lucian Popa},
	Date-Added = {2016-03-27 04:24:02 +0000},
	Date-Modified = {2016-03-27 04:24:02 +0000},
	Doi = {http://dx.doi.org/10.1016/j.tcs.2004.10.033},
	Issn = {0304-3975},
	Journal = {Theoretical Computer Science},
	Keywords = {First-order inexpressibility},
	Note = {Database Theory},
	Number = {1},
	Pages = {89 - 124},
	Title = {Data exchange: semantics and query answering},
	Url = {http://www.sciencedirect.com/science/article/pii/S030439750400725X},
	Volume = {336},
	Year = {2005},
	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/S030439750400725X},
	Bdsk-Url-2 = {http://dx.doi.org/10.1016/j.tcs.2004.10.033}}

@misc{flextable,
	Author = {HPE/Vertica},
	Date-Added = {2016-03-27 00:32:57 +0000},
	Date-Modified = {2016-03-27 00:33:27 +0000},
	Howpublished = {http://my.vertica.com/docs/7.0.x/PDF/HP_Vertica_7.0.x_FlextablesQuickstart.pdf},
	Title = {Vertica FlexTables},
	Year = {2015}}

@techreport{ilprints264,
	Abstract = {In F7Times-ItalicF7S38semistructured databases there is no schema fixed in advance.  To provide the benefits of a schema in such environments, we introduce DataGuides: concise and accurate structural summaries ofsemistructured databases.  DataGuides serve as dynamic schemas, generated from the database; they areuseful for browsing database structure, formulating queries, storing information such as statistics andsample values, and enabling query optimization.  This paper presents the theoretical foundations of DataGuides along with algorithms for their creation and incremental maintenance.  We provideperformance results based on our implementation of DataGuides in the Lore DBMS for semistructureddata.  We also describe the use of DataGuides in Lore, both in the user interface to enable structurebrowsing and query formulation, and as a means of guiding the query processor and optimizing queryexecution.F5S58},
	Author = {R. Goldman and J. Widom},
	Date-Added = {2016-03-27 00:28:00 +0000},
	Date-Modified = {2016-03-27 00:28:00 +0000},
	Institution = {Stanford InfoLab},
	Number = {1997-50},
	Publisher = {Stanford},
	Title = {DataGuides: Enabling Query Formulation and Optimization in Semistructured Databases},
	Type = {Technical Report},
	Url = {http://ilpubs.stanford.edu:8090/264/},
	Year = {1997},
	Bdsk-Url-1 = {http://ilpubs.stanford.edu:8090/264/}}

@misc{hadoop,
	Author = {A. Bialecki and M. Cafarella and D. Cutting and O. O'Malley},
	Date-Added = {2016-03-26 23:40:43 +0000},
	Date-Modified = {2016-03-26 23:41:42 +0000},
	Howpublished = {http://lucene.apache.org/hadoop/.},
	Title = {{Hadoop}: A framework for running applications on large clusters built of commodity hardware.},
	Year = {2005}}

@misc{stonebraker2014swamp,
	Author = {Michael Stonebraker},
	Date-Added = {2016-03-26 23:35:29 +0000},
	Date-Modified = {2016-03-26 23:35:29 +0000},
	Howpublished = {http://cacm.acm.org/blogs/blog-cacm/181547-why-the-data-lake-is-really-a-data-swamp},
	Title = {Why the 'Data Lake' is Really a 'Data Swamp'}}

@article{Chen:1976:EMU:320434.320440,
	Acmid = {320440},
	Address = {New York, NY, USA},
	Author = {Chen, Peter Pin-Shan},
	Date-Added = {2016-03-26 23:06:47 +0000},
	Date-Modified = {2016-03-26 23:06:47 +0000},
	Doi = {10.1145/320434.320440},
	Issn = {0362-5915},
	Issue_Date = {March 1976},
	Journal = {ACM Trans. Database Syst.},
	Keywords = {Data Base Task Group, data definition and manipulation, data integrity and consistency, data models, database design, entity set model, entity-relationship model, logigcal view of data, network model, relational model, semantics of data},
	Month = mar,
	Number = {1},
	Numpages = {28},
	Pages = {9--36},
	Publisher = {ACM},
	Title = {The Entity-relationship Model\&Mdash;Toward a Unified View of Data},
	Url = {http://doi.acm.org/10.1145/320434.320440},
	Volume = {1},
	Year = {1976},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/320434.320440},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/320434.320440}}

@article{Codd:1970:RMD:362384.362685,
	Acmid = {362685},
	Address = {New York, NY, USA},
	Author = {Codd, E. F.},
	Date-Added = {2016-03-26 23:04:05 +0000},
	Date-Modified = {2016-03-26 23:04:05 +0000},
	Doi = {10.1145/362384.362685},
	Issn = {0001-0782},
	Issue_Date = {June 1970},
	Journal = {Commun. ACM},
	Keywords = {composition, consistency, data bank, data base, data integrity, data organization, data structure, derivability, hierarchies of data, join, networks of data, predicate calculus, redundancy, relations, retrieval language, security},
	Month = jun,
	Number = {6},
	Numpages = {11},
	Pages = {377--387},
	Publisher = {ACM},
	Title = {A Relational Model of Data for Large Shared Data Banks},
	Url = {http://doi.acm.org/10.1145/362384.362685},
	Volume = {13},
	Year = {1970},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/362384.362685},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/362384.362685}}

@article{doraiswamytvcg2014,
	Author = {Doraiswamy, H. and Ferreira, N. and Damoulas, T. and Freire, J. and Silva, C.T.},
	Journal = {IEEE TVCG},
	Number = {12},
	Pages = {2634-2643},
	Title = {Using Topological Analysis to Support Event-Guided Exploration in Urban Data},
	Volume = {20},
	Year = {2014}}

@inproceedings{Fagin:2003:DEG:773153.773163,
	Author = {Fagin, Ronald and Kolaitis, Phokion G. and Popa, Lucian},
	Booktitle = {PODS},
	Date-Added = {2016-03-24 22:33:01 +0000},
	Date-Modified = {2016-03-24 22:33:01 +0000},
	Pages = {90--101},
	Publisher = {ACM},
	Title = {Data Exchange: Getting to the Core},
	Year = {2003}}

@article{Green:2012,
	Author = {Karvounarakis, G. and Green, T.J.},
	Date-Added = {2016-03-24 22:32:50 +0000},
	Date-Modified = {2016-03-24 22:32:50 +0000},
	Journal = {SIGMOD Record},
	Number = {3},
	Pages = {5-14},
	Title = {Semiring-Annotated Data: Queries and Provenance},
	Volume = {41},
	Year = {2012}}

@article{antova200910,
	Author = {Antova, Lyublena and Koch, Christoph and Olteanu, Dan},
	Date-Added = {2016-03-24 22:19:31 +0000},
	Date-Modified = {2016-03-24 22:19:31 +0000},
	Journal = {{VLDB} J.},
	Number = {5},
	Pages = {1021--1040},
	Publisher = {Springer-Verlag New York, Inc.},
	Title = {$10^{(10^{6})}$ worlds and beyond: efficient representation and processing of incomplete information},
	Volume = {18},
	Year = {2009}}

@article{DBLP:journals/jacm/ImielinskiL84,
	Author = {Tomasz Imielinski and Witold Lipski Jr.},
	Bibsource = {dblp computer science bibliography, http://dblp.org},
	Biburl = {http://dblp.uni-trier.de/rec/bib/journals/jacm/ImielinskiL84},
	Date-Added = {2016-03-24 22:19:31 +0000},
	Date-Modified = {2016-03-24 22:19:31 +0000},
	Doi = {10.1145/1634.1886},
	Journal = {J. {ACM}},
	Number = {4},
	Pages = {761--791},
	Timestamp = {Thu, 26 Jan 2012 17:31:32 +0100},
	Title = {Incomplete Information in Relational Databases},
	Url = {http://doi.acm.org/10.1145/1634.1886},
	Volume = {31},
	Year = {1984},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1634.1886},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1634.1886}}

@article{DBLP:journals/debu/GreenT06,
	Author = {Todd J. Green and Val Tannen},
	Bibsource = {dblp computer science bibliography, http://dblp.org},
	Biburl = {http://dblp.uni-trier.de/rec/bib/journals/debu/GreenT06},
	Date-Added = {2016-03-24 22:19:31 +0000},
	Date-Modified = {2016-03-24 22:19:31 +0000},
	Journal = {{IEEE} Data Eng. Bull.},
	Number = {1},
	Pages = {17--24},
	Timestamp = {Fri, 01 Sep 2006 12:46:01 +0200},
	Title = {Models for Incomplete and Probabilistic Information},
	Url = {http://sites.computer.org/debull/A06mar/green.ps},
	Volume = {29},
	Year = {2006},
	Bdsk-Url-1 = {http://sites.computer.org/debull/A06mar/green.ps}}

@inproceedings{Green:2007:PS:1265530.1265535,
	Acmid = {1265535},
	Author = {Green, Todd J. and Karvounarakis, Grigoris and Tannen, Val},
	Booktitle = {PODS},
	Date-Added = {2016-03-24 22:19:31 +0000},
	Date-Modified = {2016-03-24 22:19:31 +0000},
	Doi = {10.1145/1265530.1265535},
	Isbn = {978-1-59593-685-1},
	Keywords = {data lineage, data provenance, datalog, formal power series, incomplete databases, probabilistic databases, semirings},
	Location = {Beijing, China},
	Numpages = {10},
	Pages = {31--40},
	Publisher = {ACM},
	Title = {Provenance Semirings},
	Url = {http://doi.acm.org/10.1145/1265530.1265535},
	Year = {2007},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1265530.1265535},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1265530.1265535}}

@inproceedings{DBLP:conf/sigmod/JefferyFH08,
	Author = {Shawn R. Jeffery and Michael J. Franklin and Alon Y. Halevy},
	Bibsource = {dblp computer science bibliography, http://dblp.org},
	Biburl = {http://dblp.uni-trier.de/rec/bib/conf/sigmod/JefferyFH08},
	Booktitle = {{SIGMOD}},
	Date-Added = {2016-03-24 21:49:16 +0000},
	Date-Modified = {2016-03-24 21:49:16 +0000},
	Doi = {10.1145/1376616.1376701},
	Pages = {847--860},
	Publisher = {{ACM}},
	Timestamp = {Tue, 10 Jun 2008 07:38:05 +0200},
	Title = {Pay-as-you-go user feedback for dataspace systems},
	Url = {http://doi.acm.org/10.1145/1376616.1376701},
	Year = {2008},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1376616.1376701},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1376616.1376701}}

@inproceedings{liu2015management,
	Author = {Hua Liu, Zhen and Gawlick, Dieter},
	Booktitle = {{CIDR}},
	Date-Added = {2016-03-24 21:49:03 +0000},
	Date-Modified = {2016-03-24 21:49:03 +0000},
	Title = {Management of Flexible Schema Data in {RDBMSs} - Opportunities and Limitations for {NoSQL}},
	Year = {2015}}

@article{letchner2009lahar,
	Author = {Letchner, Julie and R{\'e}, Christopher and Balazinska, Magdalena and Philipose, Matthai},
	Date-Added = {2016-03-24 21:28:23 +0000},
	Date-Modified = {2016-03-24 21:28:23 +0000},
	Journal = {Proceedings of the VLDB Endowment},
	Number = {2},
	Pages = {1610--1613},
	Publisher = {VLDB Endowment},
	Title = {Lahar demonstration: warehousing Markovian streams},
	Volume = {2},
	Year = {2009}}

@inproceedings{re2008event,
	Author = {R{\'e}, Christopher and Letchner, Julie and Balazinksa, Magdalena and Suciu, Dan},
	Booktitle = {Proceedings of the 2008 ACM SIGMOD international conference on Management of data},
	Date-Added = {2016-03-24 21:28:23 +0000},
	Date-Modified = {2016-03-24 21:28:23 +0000},
	Organization = {ACM},
	Pages = {715--728},
	Title = {Event queries on correlated probabilistic streams},
	Year = {2008}}

@inproceedings{letchner2009access,
	Author = {Letchner, Julie and Re, Christopher and Balazinska, Magdalena and Philipose, Matthai},
	Booktitle = {Data Engineering, 2009. ICDE'09. IEEE 25th International Conference on},
	Date-Added = {2016-03-24 21:28:22 +0000},
	Date-Modified = {2016-03-24 21:28:22 +0000},
	Organization = {IEEE},
	Pages = {246--257},
	Title = {Access methods for markovian streams},
	Year = {2009}}

@phdthesis{letchner2010lahar,
	Author = {Letchner, Julia Maureen},
	Date-Added = {2016-03-24 21:28:21 +0000},
	Date-Modified = {2016-03-24 21:28:21 +0000},
	School = {University of Washington},
	Title = {Lahar: warehousing markovian streams},
	Year = {2010}}

@techreport{AG14a,
	Author = {Bahareh Arab and Dieter Gawlick and Vasudha Krishnaswamy and Venkatesh Radhakrishnan and Boris Glavic},
	Date-Added = {2016-03-22 00:24:16 +0000},
	Date-Modified = {2016-03-22 00:24:16 +0000},
	Institution = {Illinois Institute of Technology},
	Keywords = {temporal-provenance; provenance; transactions; reenactment; GProM},
	Number = {IIT/CS-DB-2014-02},
	Title = {Reenacting Transactions to Compute their Provenance},
	Venueshort = {Techreport},
	Year = {2014}
	}

@article{GE14,
	Abstract = {Managing fine-grained provenance is a critical requirement for data stream management systems (DSMS), not only to address complex applications that require diagnostic capabilities and assurance, but also for providing advanced functionality such as revision processing or query debugging. This paper introduces a novel approach that uses operator instrumentation, i.e., modifying the behavior of operators, to generate and propagate fine-grained provenance through several operators of a query network. In addition to applying this technique to compute provenance eagerly during query execution, we also study how to decouple provenance computation from query processing to reduce run-time overhead and avoid unnecessary provenance retrieval. Our proposals include computing a concise superset of the provenance (to allow lazily replaying a query and reconstruct its provenance) as well as lazy retrieval (to avoid unnecessary reconstruction of provenance). We develop stream-specific compression methods to reduce the computational and storage overhead of provenance generation and retrieval. Ariadne, our provenance-aware extension of the Borealis DSMS implements these techniques. Our experiments confirm that Ariadne manages provenance with minor overhead and clearly outperforms query rewrite, the current state-of-the-art.},
	Author = {Boris Glavic and Kyumars Sheykh Esmaili and Peter M. Fischer and Nesime Tatbul},
	Date-Added = {2016-03-22 00:24:08 +0000},
	Date-Modified = {2016-03-22 00:24:08 +0000},
	Journal = {Transactions on Internet Technology (TOIT)},
	Keywords = {ariadne; provenance},
	Number = {1},
	Pages = {7:1-7:26},
	Title = {Efficient Stream Provenance via Operator Instrumentation},
	Venueshort = {TOIT},
	Volume = {13},
	Year = {2014}
	}

@inproceedings{G13,
	Abstract = {Data Provenance is information about the origin and creation process of data. Such information is useful for debugging data and transformations, auditing, evaluating the quality of and trust in data, modelling authenticity, and implementing access control for derived data. Provenance has been studied by the database, workflow, and distributed systems communities, but provenance for Big Data - which we refer to as Big Provenance - is a largely unexplored field. This paper reviews existing approaches for large-scale distributed provenance and discusses potential challenges for Big Data benchmarks that aim to incorporate provenance data/management. Furthermore, we will examine how Big Data benchmarking could benefit from different types of provenance information. We argue that provenance can be used for identifying and analyzing performance bottlenecks, to compute performance metrics, and to test a system's ability to exploit commonalities in data and processing.},
	Author = {Boris Glavic},
	Booktitle = {2nd Workshop on Big Data Benchmarking (WBDB)},
	Date-Added = {2016-03-22 00:23:49 +0000},
	Date-Modified = {2016-03-22 00:23:49 +0000},
	Keywords = {big data; provenance},
	Pages = {72-80},
	Slideurl = {http://www.slideshare.net/lordPretzel/wbdb-2012-wbdb},
	Title = {Big Data Provenance: Challenges and Implications for Benchmarking},
	Venueshort = {WBDB},
	Year = {2012}
}

@inproceedings{GE13,
	Abstract = {Managing fine-grained provenance is a critical requirement for data stream management systems (DSMS), not only to address complex applications that require diagnostic capabilities and assurance, but also for providing advanced functionality such as revision processing or query debugging. This paper introduces a novel approach that uses operator instrumentation, i.e., modifying the behavior of operators, to generate and propagate fine-grained provenance through several operators of a query network. In addition to applying this technique to compute provenance eagerly during query execution, we also study how to decouple provenance computation from query processing to reduce run-time overhead and avoid unnecessary provenance retrieval. This includes computing a concise superset of the provenance to allow lazily replaying a query network and reconstruct its provenance as well as lazy retrieval to avoid unnecessary reconstruction of provenance. We develop stream-specific compression methods to reduce the computational and storage overhead of provenance generation and retrieval. Ariadne, our provenance-aware extension of the Borealis DSMS implements these techniques. Our experiments confirm that Ariadne manages provenance with minor overhead and clearly outperforms query rewrite, the current state-of-the-art.},
	Author = {Boris Glavic and Kyumars Sheykh Esmaili and Peter M. Fischer and Nesime Tatbul},
	Booktitle = {Proceedings of the 7th ACM International Conference on Distributed Event-Based Systems (DEBS)},
	Date-Added = {2016-03-22 00:23:43 +0000},
	Date-Modified = {2016-03-22 00:23:43 +0000},
	Keywords = {ariadne; provenance},
	Pages = {291-320},
	Slideurl = {http://www.slideshare.net/lordPretzel/2013-debs-ariadne},
	Title = {Ariadne: Managing Fine-Grained Provenance on Data Streams},
	Venueshort = {DEBS},
	Year = {2013}
}

@inproceedings{AG13,
	Abstract = {The creation of values to represent incomplete information, often referred to as value invention, is central in data exchange. Within schema mappings, Skolem functions have long been used for value invention as they permit a precise representation of missing information. Recent work on a powerful mapping language called second-order tuple generating dependencies (SO tgds), has drawn attention to the fact that the use of arbitrary Skolem functions can have negative computational and programmatic properties in data exchange. In this paper, we present two techniques for understanding when the Skolem functions needed to represent the correct semantics of incomplete information are computationally well-behaved. Specifically, we consider when the Skolem functions in second-order (SO) mappings have a first-order (FO) semantics and are therefore programmatically and computationally more desirable for use in practice. Our first technique, linearization, significantly extends the Nash, Bernstein and Melnik unskolemization algorithm, by understanding when the sets of arguments of the Skolem functions in a mapping are related by set inclusion. We show that such a linear relationship leads to mappings that have FO semantics and are expressible in popular mapping languages including source-to-target tgds and nested tgds. Our second technique uses source semantics, specifically functional dependencies (including keys), to transform SO mappings into equivalent FO mappings. We show that our algorithms are applicable to a strictly larger class of mappings than previous approaches, but more importantly we present an extensive experimental evaluation that quantifies this difference (about 78\% improvement) over an extensive schema mapping benchmark and illustrates the applicability of our results on real mappings.},
	Author = {Patricia C. Arocena and Boris Glavic and Ren\'{e}e J. Miller},
	Booktitle = {Proceedings of the 39th International Conference on Management of Data (SIGMOD)},
	Date-Added = {2016-03-22 00:23:33 +0000},
	Date-Modified = {2016-03-22 00:23:33 +0000},
	Keywords = {data exchange; iBench},
	Pages = {157-168},
	Slideurl = {http://www.slideshare.net/lordPretzel/sigmod-2013-patricias-talk-on-value-invention},
	Title = {Value Invention for Data Exchange},
	Venueshort = {SIGMOD},
	Year = {2013}
}

@inproceedings{GS13,
	Abstract = {Data mining aims at extracting useful information from large datasets. Most data mining approaches reduce the input data to produce a smaller output summarizing the mining result. While the purpose of data mining (extracting information) necessitates this reduction in size, the loss of information it entails can be problematic. Specifically, the results of data mining may be more confusing than insightful, if the user is not able to understand on which input data they are based and how they were created. In this paper, we argue that the user needs access to the provenance of mining results. Provenance, while extensively studied by the database, workflow, and distributed systems communities, has not yet been considered for data mining. We analyze the differences between database, workflow, and data mining provenance, suggest new types of provenance, and identify new use-cases for provenance in data mining. To illustrate our ideas, we present a more detailed discussion of these concepts for two typical data mining algorithms: frequent itemset mining and multi-dimensional scaling.},
	Author = {Boris Glavic and Javed Siddique and Periklis Andritsos and Ren\'{e}e J. Miller},
	Booktitle = {Proceedings of the 5th USENIX Workshop on the Theory and Practice of Provenance (TaPP)},
	Date-Added = {2016-03-22 00:23:27 +0000},
	Date-Modified = {2016-03-22 00:23:27 +0000},
	Keywords = {provenance},
	Slideurl = {http://www.slideshare.net/lordPretzel/tapp-2013},
	Title = {Provenance for Data Mining},
	Venueshort = {TaPP},
	Year = {2013}
}

@inproceedings{GM11,
	Abstract = {We reconsider some of the explicit and implicit properties that underlie well-established definitions of data provenance semantics. Previous work on comparing provenance semantics has mostly focused on expressive power (does the provenance generated by a certain semantics subsume the provenance generated by other semantics) and on understanding whether a semantics is insensitive to query rewrite (i.e., do equivalent queries have the same provenance). In contrast, we try to investigate why certain semantics possess specific properties (like insensitivity) and whether these properties are always desirable. We present a new property stability with respect to query language extension that, to the best of our knowledge, has not been isolated and studied on its own.},
	Author = {Boris Glavic and Ren\'{e}e J. Miller},
	Booktitle = {Proceedings of the 3rd USENIX Workshop on the Theory and Practice of Provenance (TaPP)},
	Date-Added = {2016-03-22 00:23:21 +0000},
	Date-Modified = {2016-03-22 00:23:21 +0000},
	Keywords = {provenance},
	Slideurl = {http://www.slideshare.net/lordPretzel/tapp-2014-talk-boris-41165865},
	Title = {{Reexamining Some Holy Grails of Data Provenance}},
	Venueshort = {TaPP},
	Year = {2011}
}

@article{GD11,
	Abstract = {In this paper, we present Vagabond, a system that uses a novel holistic approach to help users to understand and debug data exchange scenarios. Developing such a scenario is a complex and labor-intensive process where errors are often only revealed in the target instance produced as the result of this process. This makes it very hard to debug such scenarios, especially for non-power users. Vagabond aides a user in debugging by automatically generating possible explanations for target instance errors identified by the user.},
	Author = {Boris Glavic and Jiang Du and Ren\'{e}e J. Miller and Gustavo Alonso and Laura M. Haas},
	Date-Added = {2016-03-22 00:23:09 +0000},
	Date-Modified = {2016-03-22 00:23:09 +0000},
	Journal = {Proceedings of the VLDB Endowment (PVLDB) (Demonstration Track)},
	Keywords = {provenance; vagabond; data exchange},
	Number = {12},
	Pages = {1383-1386},
	Title = {{Debugging Data Exchange with Vagabond}},
	Venueshort = {PVLDB},
	Volume = {4},
	Year = {2011}
}

@inproceedings{GA09b,
	Abstract = {In this demonstration we present the Perm provenance management system (PMS). Perm is capable of computing, storing and querying provenance information for the relational data model. Provenance is computed by using query rewriting techniques to annotate tuples with provenance information. Thus, provenance data and provenance computations are represented as relational data and queries and, hence, can be queried, stored and optimized using standard relational database techniques. This demo shows the complete Perm system and lets attendants examine in detail the process of query rewriting and provenance retrieval in Perm, the most complete data provenance system available today. For example, Perm supports lazy and eager provenance computation, external provenance and various contribution semantics.},
	Author = {Boris Glavic and Gustavo Alonso},
	Booktitle = {Proceedings of the 35th ACM SIGMOD International Conference on Management of Data (SIGMOD) (Demonstration Track)},
	Date-Added = {2016-03-22 00:23:03 +0000},
	Date-Modified = {2016-03-22 00:23:03 +0000},
	Keywords = {provenance; perm},
	Pages = {1055-1058},
	Title = {{The Perm Provenance Management System in Action}},
	Venueshort = {SIGMOD},
	Year = {2009}
}

@inproceedings{GK15,
	Author = {Boris Glavic and Sven K\"{o}hler and Sean Riddle and Bertram Lud\"{a}scher},
	Booktitle = {Proceedings of the 7th USENIX Workshop on the Theory and Practice of Provenance (TaPP)},
	Date-Added = {2016-03-22 00:22:28 +0000},
	Date-Modified = {2016-03-22 00:22:28 +0000},
	Keywords = {provenance;Missing Answers;summarization;Datalog;Game Provenance},
	Slideurl = {http://www.slideshare.net/lordPretzel/2015-ta-ppwhynotpptx},
	Title = {*{Towards Constraint-based Explanations for Answers and Non-Answers}},
	Venueshort = {TaPP},
	Year = {2015}
}

@inproceedings{GM15,
	Author = {Boris Glavic and Tanu Malik and Quan Pham},
	Booktitle = {Proceedings of the 7th USENIX Workshop on the Theory and Practice of Provenance (TaPP) (Poster)},
	Date-Added = {2016-03-22 00:22:22 +0000},
	Date-Modified = {2016-03-22 00:22:22 +0000},
	Keywords = {provenance;LDV;repeatability;database virtualization},
	Title = {*{Making Database Applications Shareable}},
	Venueshort = {TaPP},
	Year = {2015}
}

@article{PW15,
	Author = {Quan Pham and Richard Whaling and Boris Glavic and Tanu Malik},
	Date-Added = {2016-03-22 00:22:09 +0000},
	Date-Modified = {2016-03-22 00:22:09 +0000},
	Journal = {Proceedings of the VLDB Endowment (PVLDB) (Demonstration Track)},
	Keywords = {provenance;LDV;repeatability;database virtualization},
	Number = {12},
	Pages = {1988 - 1999},
	Title = {*{Sharing and Reproducing Database Applications}},
	Venueshort = {PVLDB},
	Volume = {8},
	Year = {2015}
}

@inproceedings{PJ15,
	Author = {Xing Niu and Raghav Kapoor and Dieter Gawlick and Zhen Hua Liu and Vasudha Krishnaswamy and Venkatesh Radhakrishnan and Boris Glavic},
	Booktitle = {Proceedings of the 7th USENIX Workshop on the Theory and Practice of Provenance (TaPP)},
	Date-Added = {2016-03-22 00:22:04 +0000},
	Date-Modified = {2016-03-22 00:22:04 +0000},
	Keywords = {provenance;JSON;GProM;PROV},
	Slideurl = {http://www.slideshare.net/lordPretzel/2015-tapp},
	Title = {Interoperability for Provenance-aware Databases using PROV and JSON},
	Venueshort = {TaPP},
	Year = {2015}
}

@article{AG15c,
	Author = {Patricia C. Arocena and Boris Glavic and Radu Ciucanu and Ren\'{e}e J. Miller},
	Date-Added = {2016-03-22 00:21:57 +0000},
	Date-Modified = {2016-03-22 00:21:57 +0000},
	Journal = {Proceedings of the VLDB Endowment (PVLDB)},
	Keywords = {iBench; data exchange; data integration; benchmarking},
	Number = {3},
	Pages = {108-119},
	Title = {{The iBench Integration Metadata Generator}},
	Venueshort = {PVLDB},
	Volume = {9},
	Year = {2015}
}

@article{AGM16,
	Author = {Arocena, Patricia C. and Glavic, Boris and Mecca, Giansalvatore and Miller, Ren\'ee J. and Papotti, Paolo and Santoro, Donatello},
	Date-Added = {2016-03-22 00:21:05 +0000},
	Date-Modified = {2016-03-22 00:21:52 +0000},
	Journal = {Proceedings of the VLDB Endowment (PVLDB)},
	Keywords = {BART; data cleaning; benchmarking; denial constraints},
	Number = {2},
	Pages = {36-47},
	Title = {{Messing Up with Bart: Error Generation for Evaluating Data-Cleaning Algorithms}},
	Venueshort = {PVLDB},
	Volume = {9},
	Year = {2015}
}

@article{AC15,
	Author = {Arocena, Patricia C. and Ciucanu, Radu and Glavic, Boris and Miller, Ren{\'e}e J.},
	Date-Added = {2016-03-22 00:21:05 +0000},
	Date-Modified = {2016-03-22 00:21:05 +0000},
	Journal = {Proceedings of the VLDB Endowment (PVLDB) (Demonstration Track)},
	Keywords = {iBench; data exchange; data integration; benchmarking},
	Number = {12},
	Pages = {1960 - 1971},
	Title = {{Gain Control over your Integration Evaluations}},
	Venueshort = {PVLDB},
	Volume = {8},
	Year = {2015}
}

@inproceedings{Challen:2015:MWE:2699343.2699361,
	Acmid = {2699361},
	Author = {Challen, Geoffrey and Ajay, Jerry Antony and DiRienzo, Nick and Kennedy, Oliver and Maiti, Anudipa and Nandugudi, Anandatirtha and Shantharam, Sriram and Shi, Jinghao and Srinivasa, Guru Prasad and Ziarek, Lukasz},
	Booktitle = {HotMobile},
	Date-Added = {2016-03-22 00:19:48 +0000},
	Date-Modified = {2016-03-22 00:19:48 +0000},
	Doi = {10.1145/2699343.2699361},
	Isbn = {978-1-4503-3391-7},
	Keywords = {maybe, mobile programming, runtime adaptation, structured uncertainty},
	Location = {Santa Fe, New Mexico, USA},
	Numpages = {6},
	Pages = {105--110},
	Title = {{maybe} We Should Enable More Uncertain Mobile App Programming},
	Url = {http://doi.acm.org/10.1145/2699343.2699361},
	Year = {2015},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/2699343.2699361},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/2699343.2699361}}

@inproceedings{agarwal2013monadic,
	Author = {Agarwal, Sumit and Bellinger, Daniel and Kennedy, Oliver and Upadhyay, Ankur and Ziarek, Lukasz},
	Booktitle = {WebDB},
	Date-Added = {2016-03-22 00:19:48 +0000},
	Date-Modified = {2016-03-22 00:19:48 +0000},
	Organization = {ACM},
	Title = {Monadic Logs for Collaborative Web Applications},
	Year = {2013}}

@inproceedings{pocketdata,
	Author = {Kennedy, Oliver and Ajay, Jerry and Challen, Geoffrey and Ziarek, Lukasz},
	Booktitle = {TPC Technology Conference on Performance Evaluation \& Benchmarking},
	Date-Added = {2016-03-22 00:19:48 +0000},
	Date-Modified = {2016-03-22 00:19:48 +0000},
	Title = {{Pocket Data}: The Need for {TPC-MOBILE}},
	Year = {2015}}

@misc{kennedy2009system,
	Author = {Kennedy, Oliver and Anastasakos, Tasos and Mao, Jianchang and Vee, Erik and Shanmugasundaram, Jayavel and Vassilvitskii, Sergei and Yang, Jian and Tomlin, John},
	Date-Added = {2016-03-22 00:19:48 +0000},
	Date-Modified = {2016-03-22 00:19:48 +0000},
	Note = {US Patent App. 20,100/114,689},
	Title = {SYSTEM FOR DISPLAY ADVERTISING OPTIMIZATION USING CLICK OR CONVERSION PERFORMANCE},
	Year = {2009}}

@inproceedings{kennedy2015just,
	Author = {Kennedy, Oliver and Ziarek, Lukasz},
	Booktitle = {CIDR},
	Date-Added = {2016-03-22 00:19:36 +0000},
	Date-Modified = {2016-03-22 00:19:36 +0000},
	Title = {Just-In-Time Data Structures},
	Year = {2015}}

@techreport{techreport,
	Address = {arXiv:1303.4471},
	Author = {Oliver Kennedy and Lukasz Ziarek},
	Date-Added = {2016-03-22 00:19:19 +0000},
	Date-Modified = {2016-03-22 00:19:19 +0000},
	Institution = {CORR},
	Title = {{BarQL}: Collaborating through Change},
	Year = {2013}}

@inproceedings{kennedy2011dbtoaster,
	Author = {Kennedy, Oliver and Ahmad, Yanif and Koch, Christoph},
	Booktitle = {CIDR},
	Date-Added = {2016-03-22 00:19:19 +0000},
	Date-Modified = {2016-03-22 00:19:19 +0000},
	Pages = {284--295},
	Title = {{DBToaster}: Agile Views for a Dynamic Data Management System.},
	Year = {2011}}

@article{Ahmad:2012:DHD:2336664.2336670,
	Acmid = {2336670},
	Author = {Ahmad, Yanif and Kennedy, Oliver and Koch, Christoph and Nikolic, Milos},
	Date-Added = {2016-03-22 00:19:19 +0000},
	Date-Modified = {2016-03-22 00:19:19 +0000},
	Issn = {2150-8097},
	Issue_Date = {June 2012},
	Journal = {PVLDB},
	Numpages = {12},
	Publisher = {VLDB Endowment},
	Title = {{DBToaster}: Higher-order Delta Processing for Dynamic, Frequently Fresh Views},
	Url = {http://dl.acm.org/citation.cfm?id=2336664.2336670},
	Year = {2012},
	Bdsk-Url-1 = {http://dl.acm.org/citation.cfm?id=2336664.2336670}}

@article{koch2013dbtoaster,
	Author = {Koch, Christoph and Ahmad, Yanif and Kennedy, Oliver Andrzej and Nikolic, Milos and N{\"o}tzli, Andres and Lupei, Daniel and Shaikhha, Amir},
	Date-Added = {2016-03-22 00:19:19 +0000},
	Date-Modified = {2016-03-22 00:19:19 +0000},
	Journal = {VLDBJ},
	Title = {{DBToaster}: Higher-order Delta Processing for Dynamic, Frequently Fresh Views},
	Year = {2013}}

@inproceedings{4812533,
	Abstract = {Collaboration between small-scale wireless devices depends on their ability to infer aggregate properties of all nearby nodes. The highly dynamic environment created by mobile devices introduces a silent failure mode that is disruptive to this kind of inference. We address this problem by presenting techniques for extending existing unstructured aggregation protocols to cope with failure modes introduced by mobile environments. The modified protocols allow devices with limited connectivity to maintain estimates of aggregates, despite unexpected peer departures and arrivals.},
	Author = {Kennedy, O. and Koch, C. and Demers, A.},
	Booktitle = {ICDE},
	Date-Added = {2016-03-22 00:19:19 +0000},
	Date-Modified = {2016-03-22 00:19:19 +0000},
	Doi = {10.1109/ICDE.2009.233},
	Issn = {1084-4627},
	Keywords = {mobile radio;protocols;failure modes;in-network aggregation;small-scale wireless devices;unstructured aggregation protocols;Aggregates;Computer networks;Computer science;Data engineering;Hazards;International collaboration;Joining processes;Peer to peer computing;Protocols;USA Councils;Aggregation;Distributed;Gossip;Sensor Networks;Sketch;Wireless},
	Pages = {1331-1334},
	Title = {Dynamic Approaches to In-network Aggregation},
	Year = {2009},
	Bdsk-Url-1 = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4812533},
	Bdsk-Url-2 = {http://dx.doi.org/10.1109/ICDE.2009.233}}

@misc{nath2012efficient,
	Author = {Nath, Suman K and Lee, Seung Ho and Smyl, Slawomir and Loboz, Charles Z and Kennedy, Oliver Andrzej},
	Date-Added = {2016-03-22 00:19:19 +0000},
	Date-Modified = {2016-03-22 00:19:19 +0000},
	Month = dec # {~20},
	Note = {US Patent 20,120,323,926},
	Title = {Efficient Optimization over Uncertain Data},
	Year = {2012}}

@inproceedings{Kennedy:2011:FPP:1989323.1989482,
	Acmid = {1989482},
	Author = {Kennedy, Oliver and Lee, Steve and Loboz, Charles and Smyl, Slawek and Nath, Suman},
	Booktitle = {SIGMOD},
	Date-Added = {2016-03-22 00:19:19 +0000},
	Date-Modified = {2016-03-22 00:19:19 +0000},
	Doi = {10.1145/1989323.1989482},
	Isbn = {978-1-4503-0661-4},
	Keywords = {Monte Carlo, black box, probabilistic database, simulation},
	Location = {Athens, Greece},
	Numpages = {4},
	Pages = {1303--1306},
	Title = {Fuzzy Prophet: Parameter Exploration in Uncertain Enterprise Scenarios},
	Url = {http://doi.acm.org/10.1145/1989323.1989482},
	Year = {2011},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1989323.1989482},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1989323.1989482}}

@inproceedings{Kennedy:2011:JEO:1989323.1989410,
	Acmid = {1989410},
	Author = {Kennedy, Oliver and Nath, Suman},
	Booktitle = {SIGMOD},
	Date-Added = {2016-03-22 00:19:19 +0000},
	Date-Modified = {2016-03-22 00:19:19 +0000},
	Doi = {10.1145/1989323.1989410},
	Isbn = {978-1-4503-0661-4},
	Keywords = {Monte Carlo, black box, probabilistic database, simulation},
	Location = {Athens, Greece},
	Numpages = {12},
	Pages = {829--840},
	Title = {Jigsaw: Efficient Optimization over Uncertain Enterprise Data},
	Url = {http://doi.acm.org/10.1145/1989323.1989410},
	Year = {2011},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1989323.1989410},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/1989323.1989410}}

@article{freireieeedeb2016,
	Author = {Juliana Freire and Aline Bessa and Fernando Seabra Chirigati and Huy Vo and Kai Zhao},
	Journal = {IEEE Data Eng. Bull.},
	Notes = {To appear},
	Optbibsource = {DBLP, http://dblp.uni-trier.de},
	Optee = {http://sites.computer.org/debull/A13dec/p54.pdf},
	Optnumber = {4},
	Optpages = {54-59},
	Optvolume = {36},
	Title = {Exploring What Not To Clean in Urban Data:\\A Study Using New York City Taxi Trips},
	Year = {2016}}

@inbook{KY15,
	Author = {Kennedy, Oliver and Yang, Ying and Chomicki, Jan and Fehling, Ronny and Liu, Zhen Hua and Gawlick, Dieter},
	Chapter = {Detecting the Temporal Context of Queries},
	Date-Added = {2016-03-15 14:53:31 +0000},
	Date-Modified = {2016-03-15 14:55:22 +0000},
	Doi = {10.1007/978-3-662-46839-5_7},
	Editor = {Castellanos, Malu and Dayal, Umeshwar and Pedersen, Bach Torben and Tatbul, Nesime},
	Isbn = {978-3-662-46839-5},
	Optaddress = {Berlin, Heidelberg},
	Pages = {97--113},
	Publisher = {Springer Berlin Heidelberg},
	Title = {Enabling Real-Time Business Intelligence: International Workshops, BIRTE 2013, Riva del Garda, Italy, August 26, 2013, and BIRTE 2014, Hangzhou, China, September 1, 2014, Revised Selected Papers},
	Url = {http://dx.doi.org/10.1007/978-3-662-46839-5_7},
	Year = {2015},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-662-46839-5_7}}

@inproceedings{Y14,
	Author = {Yang, Ying},
	Booktitle = {VLDB PhD Workshop},
	Date-Added = {2016-03-15 14:52:49 +0000},
	Date-Modified = {2016-03-15 14:55:15 +0000},
	Title = {On-demand query result cleaning},
	Year = {2014}}

@inproceedings{NK15,
	Author = {Xing Niu and Raghav Kapoor and Boris Glavic},
	Booktitle = {TaPP},
	Title = {Heuristic and Cost-based Optimization for Provenance Computation},
	Year = {2015}}

@techreport{AG16,
	Author = {Bahareh Arab and Dieter Gawlick and Vasudha Krishnaswamy and Venkatesh Radhakrishnan and Boris Glavic},
	Institution = {Illinois Institute of Technology},
	Number = {IIT/CS-DB-2016-01},
	Title = {Formal Foundations of Reenactment and Transaction Provenance},
	Year = {2016}}

@inproceedings{AG14,
	Author = {Bahareh Arab and Dieter Gawlick and Venkatesh Radhakrishnan and Hao Guo and Boris Glavic},
	Booktitle = {Proceedings of the 6th USENIX Workshop on the Theory and Practice of Provenance (TaPP)},
	Title = {A Generic Provenance Middleware for Database Queries, Updates, and Transactions},
	Year = {2014}}

@inproceedings{GA09,
	Author = {Boris Glavic and Gustavo Alonso},
	Booktitle = {Proceedings of the 25th IEEE International Conference on Data Engineering (ICDE)},
	Pages = {174-185},
	Title = {{Perm: Processing Provenance and Data on the same Data Model through Query Rewriting}},
	Year = {2009}}

@inproceedings{GA09a,
	Author = {Boris Glavic and Gustavo Alonso},
	Booktitle = {Proceedings of the 12th International Conference on Extending Database Technology (EDBT)},
	Pages = {982-993},
	Title = {{Provenance for Nested Subqueries}},
	Year = {2009}}

@article{GA10,
	Author = {Boris Glavic and Gustavo Alonso and Ren\'{e}e J. Miller and Laura M. Haas},
	Journal = {Proceedings of the Very Large Data Bases Endowment (PVLDB)},
	Number = {1},
	Pages = {1314-1325},
	Title = {{TRAMP: Understanding the Behavior of Schema Mappings through Provenance}},
	Volume = {3},
	Year = {2010}}

@inproceedings{PM15,
	Author = {Quan Pham and Tanu Malik and Boris Glavic and Ian Foster},
	Booktitle = {Proceedings of the 25th IEEE International Conference on Data Engineering (ICDE)},
	Pages = {1179-1190},
	Title = {*{LDV: Light-weight Database Virtualization}},
	Year = {2015}}

@inproceedings{KK10,
	Author = {Kennedy, Oliver and Koch, Christoph},
	Booktitle = {Data Engineering (ICDE), 2010 IEEE 26th International Conference on},
	Organization = {IEEE},
	Pages = {157--168},
	Title = {PIP: A database system for great and small expectations},
	Year = {2010}}

@article{YM15,
	Author = {Yang, Ying and Meneghetti, Niccolo and Fehling, Ronny and Liu, Zhen Hua and Kennedy, Oliver},
	Journal = {Proceedings of the VLDB Endowment},
	Number = {12},
	Pages = {1578--1589},
	Title = {Lenses: an on-demand approach to ETL},
	Volume = {8},
	Year = {2015}}

@article{NY16,
	Author = {Arindam Nandi and Ying Yang and Oliver Kennedy and Boris Glavic and Ronny Fehling and Zhen Hua Liu and Dieter Gawlick},
	Journal = {CoRR},
	Title = {Mimir: Bringing CTables into Practice},
	Volume = {abs/1601.00073},
	Year = {2016}}

@incollection{GM13,
	Author = {Boris Glavic and Miller, Ren{\'e}e J and Alonso, Gustavo},
	Booktitle = {In search of elegance in the theory and practice of computation: a Festschrift in honour of Peter Buneman},
	Pages = {291--320},
	Publisher = {Springer},
	Title = {Using SQL for Efficient Generation and Querying of Provenance Information},
	Year = {2013}}

@incollection{FS06,
	Author = {Freire, Juliana and Silva, Cl{\'a}udio T and Callahan, Steven P and Santos, Emanuele and Scheidegger, Carlos E and Vo, Huy T},
	Booktitle = {Provenance and Annotation of Data},
	Pages = {10--18},
	Publisher = {Springer},
	Title = {Managing rapidly-evolving scientific workflows},
	Year = {2006}}

@inproceedings{CF06b,
	Author = {Callahan, Steven P and Freire, Juliana and Santos, Emanuele and Scheidegger, Carlos E and Silva, Claudio T and Vo, Huy T},
	Booktitle = {Data Engineering Workshops, 2006. Proceedings. 22nd International Conference on},
	Organization = {IEEE},
	Pages = {71--71},
	Title = {Managing the evolution of dataflows with vistrails},
	Year = {2006}}

@inproceedings{CF06a,
	Author = {Callahan, Steven and Freire, Juliana and Santos, Emanuele and Scheidegger, Carlos Eduardo and Silva, Claudio T. and Vo, Huy},
	Booktitle = {SIGMOD '06: Proceedings of the 32th SIGMOD International Conference on Management of Data (demonstration)},
	Pages = {745--747},
	Title = {{VisTrails: Visualization meets Data Management}},
	Year = {2006}}

@article{SF07,
	Author = {Silva, Claudio T. and Freire, Juliana and Callahan, Steven},
	Journal = {Computing in Science and Engineering},
	Number = {5},
	Pages = {82--89},
	Title = {{Provenance for Visualizations: Reproducibility and Beyond}},
	Volume = {9},
	Year = {2007}}

@article{SV07c,
	Author = {Scheidegger, Carlos and Vo, Huy and Koop, David and Freire, Juliana and Silva, Claudio},
	Journal = {IEEE Transactions on Visualization \& Computer Graphics},
	Number = {6},
	Pages = {1560--1567},
	Title = {Querying and creating visualizations by analogy},
	Volume = {13},
	Year = {2007}}

@inproceedings{HL08,
	Author = {Howe, Bill and Lawson, Peter and Bellinger, Renee and Anderson, Erik W. and Santos, Emanuele and Freire, Juliana and Scheidegger, Carlos Eduardo and Baptista, Antonio and Silva, Claudio T.},
	Booktitle = {eScience '08: Proceedings of the 4th IEEE International Conference on eScience},
	Pages = {127--134},
	Title = {{End-to-End eScience: Integrating Workflow, Query, Visualization, and Provenance at an Ocean Observatory}},
	Year = {2008}}

@inproceedings{SV08,
	Author = {Scheidegger, Carlos Eduardo and Vo, Huy and Koop, David and Freire, Juliana and Silva, Claudio T.},
	Booktitle = {SIGMOD '08: Proceedings of the 34th SIGMOD International Conference on Management of Data},
	Organization = {ACM},
	Pages = {1251--1254},
	Title = {{Querying and Re-using Workflows with VisTrails}},
	Year = {2008}}

@article{KS08,
	Author = {Koop, David and Scheidegger, Carlos E and Callahan, Steven P and Freire, Juliana and Silva, Cl{\'a}udio T},
	Journal = {Visualization and Computer Graphics, IEEE Transactions on},
	Number = {6},
	Pages = {1691--1698},
	Title = {Viscomplete: Automating suggestions for visualization pipelines},
	Volume = {14},
	Year = {2008}}

@article{FS12,
	Author = {Juliana Freire and Cl{\'a}udio T. Silva},
	Journal = {Computing in Science and Engineering},
	Number = {4},
	Pages = {18-25},
	Title = {Making Computations and Publications Reproducible with VisTrails},
	Volume = {14},
	Year = {2012}}

@incollection{CF12a,
	Author = {Chirigati, Fernando and Freire, Juliana},
	Booktitle = {Provenance and Annotation of Data and Processes},
	Pages = {11--23},
	Publisher = {Springer},
	Title = {Towards integrating workflow and database provenance},
	Year = {2012}}

@article{AD11c,
	Author = {Amsterdamer, Y. and Davidson, S.B. and Deutch, D. and Milo, T. and Stoyanovich, J. and Tannen, V.},
	Journal = {Proceedings of the VLDB Endowment},
	Number = {4},
	Pages = {346--357},
	Title = {{Putting Lipstick on Pig: Enabling Database-style Workflow Provenance}},
	Volume = {5},
	Year = {2011}}

@article{DM14,
	Author = {Deutch, Daniel and Moskovitch, Yuval and Tannen, Val},
	Journal = {Proceedings of the VLDB Endowment},
	Number = {6},
	Title = {A Provenance Framework for Data-Dependent Process Analysis},
	Volume = {7},
	Year = {2014}}

@inproceedings{BD11,
	Author = {Beneventano, D. and Dannoui, A.R. and Sala, A.},
	Booktitle = {Data Engineering Workshops (ICDEW), 2011 IEEE 27th International Conference on},
	Organization = {IEEE},
	Pages = {53--58},
	Title = {Data lineage in the MOMIS data fusion system},
	Year = {2011}}

@inproceedings{DS13a,
	Author = {Dong, Xin Luna and Srivastava, Divesh},
	Booktitle = {Proceedings of the 22nd international conference on World Wide Web},
	Organization = {International World Wide Web Conferences Steering Committee},
	Pages = {379--390},
	Title = {Compact explanation of data fusion decisions},
	Year = {2013}}

@incollection{HA13,
	Author = {Hara, Carmem Satie and de Aguiar Ciferri, Cristina Dutra and Ciferri, Ricardo Rodrigues},
	Booktitle = {In Search of Elegance in the Theory and Practice of Computation},
	Pages = {339--365},
	Publisher = {Springer},
	Title = {Incremental Data Fusion Based on Provenance Information},
	Year = {2013}}

@article{QY14,
	Author = {Qiang, Zhao and Yongxin, Zhang and Dequan, Wang and Yanhui, Ding},
	Journal = {Open Cybernetics \& Systemics Journal},
	Pages = {462--467},
	Title = {A Traceable Data Fusion Based on Data Provenance},
	Volume = {8},
	Year = {2014}}

@inproceedings{IK05,
	Author = {Ives, Zachary G. and Khandelwal, Nitin and Kapur, Aneesh and Cakir, Murat},
	Booktitle = {CIDR '05: Proceedings of the 2th Conference on Innovative Data Systems Research},
	Title = {{ORCHESTRA: Rapid, Collaborative Sharing of Dynamic Data}},
	Year = {2005}}

@article{IG08,
	Author = {Zachary G. Ives and Todd J. Green and Grigoris Karvounarakis and Nicholas E. Taylor and Val Tannen and Partha Pratim Talukdar and Marie Jacob and Fernando Pereira},
	Journal = {SIGMOD Record},
	Number = {2},
	Pages = {26--32},
	Title = {{The ORCHESTRA Collaborative Data Sharing System}},
	Volume = {37},
	Year = {2008}}

@inproceedings{GI12,
	Author = {Green, Todd J and Ives, Zachary G},
	Booktitle = {Data Engineering (ICDE), 2012 IEEE 28th International Conference on},
	Organization = {IEEE},
	Pages = {330--341},
	Title = {Recomputing materialized instances after changes to mappings and data},
	Year = {2012}}

@article{KG13,
	Author = {Karvounarakis, Grigoris and Green, Todd J and Ives, Zachary G and Tannen, Val},
	Journal = {ACM Transactions on Database Systems (TODS)},
	Number = {3},
	Pages = {19},
	Title = {Collaborative data sharing via update exchange and provenance},
	Volume = {38},
	Year = {2013}}

@conference{AC06,
	Author = {Alexe, B. and Chiticariu, L. and Tan, W.C.},
	Booktitle = {Proceedings of the 32nd international conference on Very large data bases},
	Organization = {VLDB Endowment},
	Pages = {1179--1182},
	Title = {{SPIDER: a schema mapPIng DEbuggeR}},
	Year = {2006}}

@inproceedings{VM05,
	Author = {Velegrakis, Yannis and Miller, Ren{\'e}e J. and Mylopoulos, John},
	Booktitle = {ICDE '05: Proceedings of the 21th International Conference on Data Engineering},
	Pages = {81--92},
	Title = {{Representing and Querying Data Transformations}},
	Year = {2005}}

@inproceedings{BS06,
	Author = {Omar Benjelloun and Anish Das Sarma and Alon Y. Halevy and Jennifer Widom},
	Booktitle = {Proceedings of the 32th International Conference on Very Large Data Bases (VLDB)},
	Pages = {953-964},
	Title = {{ULDBs: Databases with Uncertainty and Lineage}},
	Year = {2006}}

@article{W08,
	Author = {Widom, Jennifer},
	Journal = {Managing and Mining Uncertain Data},
	Pages = {113-148},
	Title = {{Trio: A System for Managing Data, Uncertainty, and Lineage}},
	Year = {2008}}

@inproceedings{WT08,
	Author = {Jennifer Widom and Theobald, Martin and Anish Das Sarma},
	Booktitle = {ICDE '08: Proceedings of the 24th International Conference on Data Engineering},
	Month = {April},
	Pages = {1023-1032},
	Title = {{Exploiting Lineage for Confidence Computation in Uncertain and Probabilistic Databases}},
	Year = {2008}}

@inproceedings{DT10,
	Address = {Heidelberg, Germany},
	Author = {Das Sarma, Anish and Theobald, Martin and Widom, Jennifer},
	Booktitle = {22nd International Conference on Scientific and Statistical Database Management (SSDBM)},
	Pages = {.},
	Publisher = {Springer},
	Series = {LLNCS},
	Title = {LIVE: A Lineage-Supported Versioned DBMS},
	Year = {2010}}

@inproceedings{WC15a,
	Author = {Wang, Jianwu and Crawl, Daniel and Purawat, Shweta and Nguyen, Mai and Altintas, Ilkay},
	Booktitle = {Big Data (Big Data), 2015 IEEE International Conference on},
	Organization = {IEEE},
	Pages = {2509--2516},
	Title = {Big data provenance: Challenges, state of the art and opportunities},
	Year = {2015}}

@misc{DB12a,
	Author = {Agrawal Divyakant and Elisa Bertino and Susan Davidson and Michael Franklin and Alon Halevy and Jiawei Han and H. V. Jagadish and Sam Madden and Yannis Papakonstantinou and Raghu Ramakrishnan and Kenneth Ross and Cyrus Shahabi and Shiv Vaithyanathan and Jennifer Widom},
	Title = {Challenges and Opportunities with Big Data},
	Year = {2012}}

@article{SP05,
	Author = {Yogesh L. Simmhan and Beth Plale and Dennis Gannon},
	Journal = {SIGMOD Record},
	Number = {3},
	Pages = {31--36},
	Title = {{A Survey of Data Provenance in e-science}},
	Volume = {34},
	Year = {2005}}

@article{DC07,
	Author = {Davidson, Susan B. and Cohen-Boulakia, Sarah and Eyal, Anat and Lud\"{a}scher, Bertram and McPhillips, Timothy and Bowers, Shawn and Freire, Juliana},
	Journal = {IEEE Data Engineering Bulletin},
	Number = {4},
	Pages = {44--50},
	Title = {{Provenance in Scientific Workflow Systems}},
	Volume = {32},
	Year = {2007}}

@inproceedings{DF08,
	Author = {Davidson, Susan B and Freire, Juliana},
	Booktitle = {Proceedings of the 2008 ACM SIGMOD international conference on Management of data},
	Organization = {ACM},
	Pages = {1345--1350},
	Title = {Provenance and scientific workflows: challenges and opportunities},
	Year = {2008}}

@article{FK08a,
	Author = {Freire, Juliana and Koop, David and Santos, Emanuele and Silva, Claudio T.},
	Journal = {Computing in Science and Engineering},
	Number = {3},
	Pages = {11--21},
	Title = {{Provenance for Computational Tasks: A Survey}},
	Volume = {10},
	Year = {2008}}

@inproceedings{HF06a,
	Author = {Halevy, A. and Franklin, M. and Maier, D.},
	Booktitle = {Database Systems for Advanced Applications},
	Organization = {Springer},
	Pages = {1--2},
	Title = {Dataspaces: A new abstraction for information management},
	Year = {2006}}

@article{JC07,
	Author = {Jagadish, HV and Chapman, A. and Elkiss, A. and Jayapandian, M. and Li, Y. and Nandi, A. and Yu, C.},
	Journal = {Proceedings of the 2007 ACM SIGMOD international conference on Management of data},
	Pages = {13--24},
	Title = {{Making database systems usable}},
	Year = {2007}}

@inproceedings{LJ09,
	Author = {Liu, Bin and Jagadish, HV},
	Booktitle = {Data Engineering, 2009. ICDE'09. IEEE 25th International Conference on},
	Organization = {IEEE},
	Pages = {417--428},
	Title = {A spreadsheet algebra for a direct data manipulation query interface},
	Year = {2009}}

@book{SG00,
	Author = {Snodgrass, Richard T and Gray, Jim and Melton, Jim},
	Publisher = {Morgan Kaufmann Publishers San Francisco},
	Title = {Developing time-oriented database applications in SQL},
	Volume = {42},
	Year = {2000}}

@article{MG13b,
	Author = {Moreau, Luc and Groth, Paul},
	Journal = {Synthesis Lectures on the Semantic Web: Theory and Technology},
	Number = {4},
	Pages = {1--129},
	Title = {Provenance: An introduction to {PROV}},
	Volume = {3},
	Year = {2013}}


@incollection{A12a,
	Author = {Asuncion, Hazeline U},
	Booktitle = {Provenance and Annotation of Data and Processes},
	Pages = {1--10},
	Publisher = {Springer},
	Title = {SourceTrac: tracing data sources within spreadsheets},
	Year = 2012}


@inproceedings{DBLP:conf/icde/BeskalesSIBK10,
  author    = {George Beskales and
               Mohamed A. Soliman and
               Ihab F. Ilyas and
               Shai Ben{-}David and
               Yubin Kim},
  title     = {ProbClean: {A} probabilistic duplicate detection system},
  booktitle = {Proceedings of the 26th International Conference on Data Engineering,
               {ICDE} 2010, March 1-6, 2010, Long Beach, California, {USA}},
  pages     = {1193--1196},
  year      = {2010},
  crossref  = {DBLP:conf/icde/2010},
  url       = {http://dx.doi.org/10.1109/ICDE.2010.5447744},
  doi       = {10.1109/ICDE.2010.5447744},
  timestamp = {Sat, 09 Aug 2014 14:37:14 +0200},
  biburl    = {http://dblp.uni-trier.de/rec/bib/conf/icde/BeskalesSIBK10},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}
@inproceedings{DBLP:conf/sigmod/ElmagarmidIOQ0Y14,
  author    = {Ahmed K. Elmagarmid and
               Ihab F. Ilyas and
               Mourad Ouzzani and
               Jorge{-}Arnulfo Quian{\'{e}}{-}Ruiz and
               Nan Tang and
               Si Yin},
  title     = {{NADEEF/ER:} generic and interactive entity resolution},
  booktitle = {International Conference on Management of Data, {SIGMOD} 2014, Snowbird,
               UT, USA, June 22-27, 2014},
  pages     = {1071--1074},
  year      = {2014},
  crossref  = {DBLP:conf/sigmod/2014},
  url       = {http://doi.acm.org/10.1145/2588555.2594511},
  doi       = {10.1145/2588555.2594511},
  timestamp = {Sun, 22 Jun 2014 11:31:11 +0200},
  biburl    = {http://dblp.uni-trier.de/rec/bib/conf/sigmod/ElmagarmidIOQ0Y14},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@proceedings{DBLP:conf/sigmod/2014,
  editor    = {Curtis E. Dyreson and
               Feifei Li and
               M. Tamer {\"{O}}zsu},
  title     = {International Conference on Management of Data, {SIGMOD} 2014, Snowbird,
               UT, USA, June 22-27, 2014},
  publisher = {{ACM}},
  year      = {2014},
  url       = {http://dl.acm.org/citation.cfm?id=2588555},
  isbn      = {978-1-4503-2376-5},
  timestamp = {Sun, 22 Jun 2014 11:31:11 +0200},
  biburl    = {http://dblp.uni-trier.de/rec/bib/conf/sigmod/2014},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}