Expanding on the interface section a little

2016-04-09 14:26:16 -04:00 · 2016-04-09 14:26:16 -04:00 · 3b20969d4e
parent 230f55104f
commit 3b20969d4e
15 changed files with 5322 additions and 15 deletions
--- a/freire.bib
+++ b/freire.bib
--- a/graphics/vizir-ui-menu.graffle/data.plist
+++ b/graphics/vizir-ui-menu.graffle/data.plist
--- a/graphics/vizir-ui-menu.graffle/image1.pdf
+++ b/graphics/vizir-ui-menu.graffle/image1.pdf
--- a/graphics/vizir-ui-menu.pdf
+++ b/graphics/vizir-ui-menu.pdf
--- a/main.out
+++ b/main.out
@ -0,0 +1,6 @@
+\BOOKMARK [1][-]{section.1}{Introduction}{}% 1
+\BOOKMARK [1][-]{section.2}{Interface}{}% 2
+\BOOKMARK [1][-]{section.3}{Language}{}% 3
+\BOOKMARK [1][-]{section.4}{Generalizing Singletons}{}% 4
+\BOOKMARK [1][-]{section.5}{Related Work}{}% 5
+\BOOKMARK [1][-]{section.6}{References}{}% 6
--- a/main.synctex.gz
+++ b/main.synctex.gz
--- a/main.tex
+++ b/main.tex
@ -1,11 +1,33 @@
 \documentclass{sig-alternate-05-2015}
-\usepackage[utf8]{inputenc}
-\usepackage{natbib}
-\usepackage{graphicx}
-\usepackage{url}
+\input{preamble}
+
+\newcommand{\oksays}[1]{\todo[inline]{\textbf{Oliver says:} #1}}
+\newcommand{\bgsays}[1]{\todo[inline,color=green!40]{\textbf{Boris says:} #1}}
+\newcommand{\jfsays}[1]{\todo[inline,color=blue!40]{\textbf{Juliana says:} #1}}
+
+
+\setcopyright{acmcopyright}
+\toappear{}

 \title{The Exception that Improves the Rule}

+\numberofauthors{3}
+\author{
+% Alphabetical
+\alignauthor
+Juliana Freire\\
+       \affaddr{New York University}\\
+       \email{juliana.freire@nyu.edu}
+\alignauthor
+Boris Glavic\\
+       \affaddr{Illinois Institute of Tech.}\\
+       \email{bglavic@iit.edu}
+\alignauthor
+Oliver Kennedy\\
+       \affaddr{University at Buffalo}\\
+       \email{okennedy@buffalo}
+}
+

 \begin{document}

@ -17,7 +39,7 @@

 \section{Interface}
 \label{sec:interface}
-What is a spreadsheet?
+\input{sections/interface}

 \section{Language}
 \label{sec:language}
@ -30,5 +52,5 @@ What is a spreadsheet?
 \input{sections/related}

 \bibliographystyle{plain}
-\bibliography{references}
+\bibliography{freire,okennedy,urban,vistrails,vizier}
 \end{document}
--- a/okennedy.bib
+++ b/okennedy.bib
@ -0,0 +1,96 @@
+%% This BibTeX bibliography file was created using BibDesk.
+%% http://bibdesk.sourceforge.net/
+
+%% Created for Oliver Kennedy at 2016-04-04 12:02:57 -0400 
+
+
+%% Saved with string encoding Unicode (UTF-8) 
+
+
+
+@inproceedings{ives2015looking,
+	Author = {Ives, Zachary G and Yan, Zhepeng and Zheng, Nan and Litt, Brian and Wagenaar, Joost B},
+	Date-Added = {2016-04-04 15:50:57 +0000},
+	Date-Modified = {2016-04-04 15:50:57 +0000},
+	Title = {Looking at Everything in Context.}}
+
+@article{Wang:2016aa,
+	Abstract = {Data-driven applications rely on the correctness of their data to function properly and effectively. Errors in data can be incredibly costly and disruptive, leading to loss of revenue, incorrect conclusions, and misguided policy decisions. While data cleaning tools can purge datasets of many errors before the data is used, applications and users interacting with the data can introduce new errors. Subsequent valid updates can obscure these errors and propagate them through the dataset causing more discrepancies. Even when some of these discrepancies are discovered, they are often corrected superficially, on a case-by-case basis, further obscuring the true underlying cause, and making detection of the remaining errors harder. In this paper, we propose QFix, a framework that derives explanations and repairs for discrepancies in relational data, by analyzing the effect of queries that operated on the data and identifying potential mistakes in those queries. QFix is flexible, handling scenarios where only a subset of the true discrepancies is known, and robust to different types of update workloads. We make four important contributions: (a) we formalize the problem of diagnosing the causes of data errors based on the queries that operated on and introduced errors to a dataset; (b) we develop exact methods for deriving diagnoses and fixes for identified errors using state-of-the-art tools; (c) we present several optimization techniques that improve our basic approach without compromising accuracy, and (d) we leverage a tradeoff between accuracy and performance to scale diagnosis to large datasets and query logs, while achieving near-optimal results. We demonstrate the effectiveness of QFix through extensive evaluation over benchmark and synthetic data.},
+	Author = {Xiaolan Wang and Alexandra Meliou and Eugene Wu},
+	Date-Added = {2016-04-04 15:31:10 +0000},
+	Date-Modified = {2016-04-04 15:31:10 +0000},
+	Eprint = {1601.07539},
+	Month = {01},
+	Title = {QFix: Diagnosing errors through query histories},
+	Url = {http://arxiv.org/abs/1601.07539},
+	Year = {2016},
+	Bdsk-Url-1 = {http://arxiv.org/abs/1601.07539}}
+
+@article{Krishnan:2016aa,
+	Abstract = {Data cleaning is often an important step to ensure that predictive models, such as regression and classification, are not affected by systematic errors such as inconsistent, out-of-date, or outlier data. Identifying dirty data is often a manual and iterative process, and can be challenging on large datasets. However, many data cleaning workflows can introduce subtle biases into the training processes due to violation of independence assumptions. We propose ActiveClean, a progressive cleaning approach where the model is updated incrementally instead of re-training and can guarantee accuracy on partially cleaned data. ActiveClean supports a popular class of models called convex loss models (e.g., linear regression and SVMs). ActiveClean also leverages the structure of a user's model to prioritize cleaning those records likely to affect the results. We evaluate ActiveClean on five real-world datasets UCI Adult, UCI EEG, MNIST, Dollars For Docs, and WorldBank with both real and synthetic errors. Our results suggest that our proposed optimizations can improve model accuracy by up-to 2.5x for the same amount of data cleaned. Furthermore for a fixed cleaning budget and on all real dirty datasets, ActiveClean returns more accurate models than uniform sampling and Active Learning.},
+	Author = {Sanjay Krishnan and Jiannan Wang and Eugene Wu and Michael J. Franklin and Ken Goldberg},
+	Date-Added = {2016-04-04 15:31:02 +0000},
+	Date-Modified = {2016-04-04 15:31:02 +0000},
+	Eprint = {1601.03797},
+	Month = {01},
+	Title = {ActiveClean: Interactive Data Cleaning While Learning Convex Loss Models},
+	Url = {http://arxiv.org/abs/1601.03797},
+	Year = {2016},
+	Bdsk-Url-1 = {http://arxiv.org/abs/1601.03797}}
+
+@article{Haas:2015:WNS:2824032.2824122,
+	Acmid = {2824122},
+	Author = {Haas, Daniel and Krishnan, Sanjay and Wang, Jiannan and Franklin, Michael J. and Wu, Eugene},
+	Date-Added = {2016-04-04 15:29:23 +0000},
+	Date-Modified = {2016-04-04 15:29:23 +0000},
+	Doi = {10.14778/2824032.2824122},
+	Issn = {2150-8097},
+	Issue_Date = {August 2015},
+	Journal = {Proc. VLDB Endow.},
+	Month = aug,
+	Number = {12},
+	Numpages = {4},
+	Pages = {2004--2007},
+	Publisher = {VLDB Endowment},
+	Title = {Wisteria: Nurturing Scalable Data Cleaning Infrastructure},
+	Url = {http://dx.doi.org/10.14778/2824032.2824122},
+	Volume = {8},
+	Year = {2015},
+	Bdsk-Url-1 = {http://dx.doi.org/10.14778/2824032.2824122}}
+
+@inproceedings{Wang:2014:SFF:2588555.2610505,
+	Acmid = {2610505},
+	Address = {New York, NY, USA},
+	Author = {Wang, Jiannan and Krishnan, Sanjay and Franklin, Michael J. and Goldberg, Ken and Kraska, Tim and Milo, Tova},
+	Booktitle = {Proceedings of the 2014 ACM SIGMOD International Conference on Management of Data},
+	Date-Added = {2016-04-04 15:28:48 +0000},
+	Date-Modified = {2016-04-04 15:28:48 +0000},
+	Doi = {10.1145/2588555.2610505},
+	Isbn = {978-1-4503-2376-5},
+	Keywords = {aggregate query, data cleaning, dirty data, sampling},
+	Location = {Snowbird, Utah, USA},
+	Numpages = {12},
+	Pages = {469--480},
+	Publisher = {ACM},
+	Series = {SIGMOD '14},
+	Title = {A Sample-and-clean Framework for Fast and Accurate Query Processing on Dirty Data},
+	Url = {http://doi.acm.org/10.1145/2588555.2610505},
+	Year = {2014},
+	Bdsk-Url-1 = {http://doi.acm.org/10.1145/2588555.2610505},
+	Bdsk-Url-2 = {http://dx.doi.org/10.1145/2588555.2610505}}
+
+@book{suciu2011probabilistic,
+	Author = {Suciu, Dan and Olteanu, Dan and R{\'e}, Christopher and Koch, Christoph},
+	Date-Added = {2016-04-03 20:39:53 +0000},
+	Date-Modified = {2016-04-03 20:40:12 +0000},
+	Publisher = {Morgan \& Claypool},
+	Title = {Probabilistic databases, synthesis lectures on data management},
+	Year = {2011}}
+
+@book{norman2013design,
+	Author = {Norman, Donald A},
+	Date-Added = {2016-04-03 17:47:41 +0000},
+	Date-Modified = {2016-04-03 17:47:41 +0000},
+	Publisher = {Basic books},
+	Title = {The design of everyday things: Revised and expanded edition},
+	Year = {2013}}
--- a/preamble.tex
+++ b/preamble.tex
@ -0,0 +1,47 @@
+%%%%%% Standard Packages %%%%%%
+\usepackage{graphicx}
+\usepackage{amssymb}
+%\usepackage[noend]{algorithmic}
+\usepackage{algorithm}
+\usepackage{algpseudocode}
+\usepackage{textcomp}
+\usepackage{listings}
+\usepackage[usenames,dvipsnames]{xcolor}
+\usepackage{subcaption}
+\usepackage{cite}
+\usepackage{hyperref}
+\usepackage{lipsum}
+\usepackage[normalem]{ulem}
+\usepackage{listings}
+\usepackage{xspace}
+\usepackage{color}
+\usepackage{wrapfig}
+\usepackage[textsize=tiny]{todonotes}
+\usepackage{cleveref}
+
+%%%%%% Package Configuration %%%%%%
+%%% Listings
+\lstset{language=sql,morekeywords={LENS,SCHEMA_MATCHING,string},basicstyle=\small\upshape\ttfamily,keywordstyle=\color{blue}}
+%%% Algorithmic
+\renewcommand{\algorithmicrequire}{\textbf{In:}}
+\renewcommand{\algorithmicensure}{\textbf{Out:}}
+
+%%%%%% Standard Theorem Environments %%%%%%
+\newtheorem{example}{Example}
+\newtheorem{scenario}{Scenario}
+\newtheorem{definition}{Definition}
+
+%%%%%% Common Math-Mode Aliases %%%%%%
+\newcommand{\comprehension}[2]{\left\{\;{#1}\;|\;{#2}\;\right\}}
+\newcommand{\tuple}[1]{\left<\;{#1}\;\right>}
+\newcommand{\ordefn}{\;|\;}
+\newcommand{\sch}[1]{\texttt{schema}({#1})}
+\newcommand{\projection}{\pi}
+\newcommand{\selection}{\sigma}
+
+%%%%%% TODOs %%%%%%
+
+%%%%%% Other Aliases %%%%%%
+\newcommand{\ccomment}[1]{{\small\texttt{/*} #1 \texttt{*/}}}
+\newcommand{\tinysection}[1]{\smallskip\noindent \textbf{#1.}$\,$}
+\newcommand{\keyword}[1]{\textcolor{blue}{\texttt{#1}}}
--- a/references.bib
+++ b/references.bib
@ -1,8 +0,0 @@
-@book{adams1995hitchhiker,
-  title={The Hitchhiker's Guide to the Galaxy},
-  author={Adams, D.},
-  isbn={9781417642595},
-  url={http://books.google.com/books?id=W-xMPgAACAAJ},
-  year={1995},
-  publisher={San Val}
-}
--- a/sections/interface.tex
+++ b/sections/interface.tex
@ -0,0 +1,51 @@
+%!TEX root = ../main.tex
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{figure}
+  \centering
+  \includegraphics[width=\columnwidth]{graphics/vizir-ui-menu}
+  \caption{An example of Vizier's UI}
+    \label{fig:hybridinterface}
+\end{figure}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+Vizier's interface (illustrated in Figure~\ref{fig:hybridinterface}) combines elements of both notebooks and spreadsheets.  Notebook interfaces like Jupyter use an analogy of pages in a notebook that consist of a block of code, as well as an output for the block like a table, visualization, or documentation text.  Blocks are part of a continuous program, allowing a user to quickly probe intermediate state by creating new visualizations or views of the data, or to safely insert hypothetical, exploratory modifications by adding or disabling pages.
+
+Spreadsheets give users an infinite 2-dimensional grid of cells that can hold either constant values or computed values derived from other cells through \textit{formulas}.
+Collections, as they exist, are defined implicitly as any 1- or 2-dimensional region of cells that has meaning to the user.
+Thus, instead of classical programmatic specification of bulk, set-at-a-time operations as operations over named collection objects, spreadsheets use the metaphor of copying code to a (user-specified) range of cells, combined with relative, positional data dependencies to quite literally ``map'' singleton operations over entire collections.
+In addition to making it easy to perform bulk operations over the entire collection, this approach provides a clear affordance for declaring exceptions: Even after being copied, each cell's formula is (and is presented as) a singleton, logically independent of other cells' formulas.
+
+\oksays{Need to discuss how creating figures is easier in spreadsheets}
+
+Between the simplicity of creating singleton operations and the simplicity of creating visualizations, spreadsheets are a powerful tool for data curation and exploration.  Indeed, spreadsheet users often ``do not appear inclined to use other software packages for their tasks, even if these packages might be more suitable"~\cite{Chan1996119}.  Our goal in Vizier is to empower users with a similar level of flexibility for transforming, visualizing, and exploring relational data.  
+
+\begin{itemize}
+\item Edit any cell (overwrite, cast, etc...)
+\item Add/Delete rows, columns, etc...
+\item Define target regions for ``bulk operations'' by selection
+\item Enable singletons
+\end{itemize}
+
+
+
+
+Vizier's users can edit tables and visualizations directly, and have those edits reflected in the notebook's code block through database-style table updates that are propagated to all subsequent code blocks.
+As a result, the user's edits, however they are applied, 
+are recorded in the notebook as a part of the workflow.
+Although we will not reproduce
+the full spreadsheet interface entirely, our goal is to replicate as many of the
+flexible data and schema manipulation features of spreadsheets as possible.  Concretely, Vizier's UI allows users to:
+\begin{itemize}
+\item Overwrite arbitrary values with constants or formulas
+\item Cast cells to a new type
+\item Cut/Copy/Paste cells, tiled over target regions
+\item Add/Delete/Reorder columns or rows
+\item Sort data
+\item Filter data
+\end{itemize}
+Operations that affect multiple cells are applied to an area currently selected by the user, rather than to an entire column or row.  
+
+ are applied to an area currently selected by the user, as opposed to the entire 
+
+target regions, as opposed to  entire 
--- a/sections/related.tex
+++ b/sections/related.tex
@ -8,4 +8,7 @@
 \item View maintenance?
 \item Workflow systems (VisTrails)
 \item Reenactment
-\end{itemize}
+\end{itemize}
+
+The simplicity of spreadsheets, combined with spreadsheet users who ``do not appear inclined to use other software packages for their tasks, even if these packages might be more suitable"~\cite{Chan1996119}, 
+has encouraged many database-driven efforts to resolve the impedance mismatch between positional and set-at-a-time query semantics~\cite{LJ09,JC07}, make spreadsheets more structured~\cite{Bakke:2011:SUI:1978942.1979313,bakke2011schema} or make databases more spreadsheet-like~\cite{DBLP:journals/ijcse/JagadishQN15}.
--- a/urban.bib
+++ b/urban.bib
@ -0,0 +1,189 @@
+@misc{furmandata,
+  key = {furman},                  
+  title = {Furman Center: Data Services},
+  howpublished = {\url{http://furmancenter.org/data}},
+  OPTyear = {2013}, 
+  note = "[Online; accessed 9-Dec-2014]"
+}
+
+
+@misc{green-taxis,
+  key = {nycgreentaxis},                  
+  title = {{NYC} Green Taxis},
+  howpublished = {\url{http://www.nyc.gov/html/tlc/html/passenger/shl_passenger.shtml}},
+  OPTyear = {2013}, 
+  note = "[Online; accessed 9-Dec-2014]"
+}
+
+@misc{socrata,
+  author = {Socrata},
+  howpublished = "\url{http://www.socrata.com}",
+  OPTyear = {2013}, 
+  note = "[Online; accessed 28-May-2014]"
+}
+
+@misc{ckan,
+  author = {CKAN},
+  howpublished = "\url{http://ckan.org}",
+  OPTyear = {2013}, 
+  note = "[Online; accessed 28-May-2014]"
+}
+
+@Misc{nycopendata,
+  key = 	 {opendata},
+  OPTauthor = 	 {},
+  title = 	 {{NYC OpenData}},
+  howpublished = {\url{https://nycopendata.socrata.com}},
+  OPTmonth = 	 {},
+  OPTyear = 	 {},
+  OPTnote = 	 {},
+  OPTannote = 	 {}
+}
+
+
+@Misc{chicagoopendata,
+  key = 	 {opendata},
+  OPTauthor = 	 {},
+  title = 	 {{City of Chicago Data Portal}},
+  howpublished = {\url{https://data.cityofchicago.org}},
+  OPTmonth = 	 {},
+  OPTyear = 	 {},
+  OPTnote = 	 {},
+  OPTannote = 	 {}
+}
+
+@Misc{sfoopendata,
+  key = 	 {opendata},
+  OPTauthor = 	 {},
+  title = 	 {{San Francisco Data}},
+  howpublished = {\url{https://data.sfgov.org}},
+  OPTmonth = 	 {},
+  OPTyear = 	 {},
+  OPTnote = 	 {},
+  OPTannote = 	 {}
+}
+
+@article{Horn2014109,
+title = "Do Housing Choice Voucher Holders Live Near Good Schools?",
+journal = "Journal of Housing Economics ",
+volume = "24",
+number = "0",
+pages = "109-121",
+year = "2014",
+OPTnote = "Housing Policy in the United States ",
+issn = "1051-1377",
+OPTdoi = "http://dx.doi.org/10.1016/j.jhe.2014.04.004",
+OPTurl = "http://www.sciencedirect.com/science/article/pii/S1051137714000205",
+author = "Keren Mertens Horn and Ingrid Gould Ellen and Amy Ellen Schwartz",
+OPTkeywords = "Housing Choice Vouchers",
+OPTkeywords = "Federal housing assistance",
+OPTkeywords = "Schools ",
+}
+
+@Misc{police-success@economis2014,
+  key = 	 {Economist},
+  OPTauthor = 	 {},
+  title = 	 {The secret of success},
+  howpublished = {\url{http://www.economist.com/news/united-states/21633878-americas-great-crime-wave-receding-some-cities-faster-others-secret}},
+  month = 	 {November},
+  year = 	 {2014},
+  OPTnote = 	 {},
+  OPTannote = 	 {}
+}
+
+@TechReport{furman-crime-foreclosure-2013,
+  author = 	 {Ingrid Gould Ellen and Johanna Ruth Lacoe},
+  title = 	 {Do foreclosures cause crime?},
+  institution =  {New York University},
+  year = 	 {2013},
+  OPTkey = 	 {},
+  OPTtype = 	 {},
+  OPTnumber = 	 {},
+  OPTaddress = 	 {},
+  OPTmonth = 	 {},
+  OPTnote = 	 {\url{http://furmancenter.org/files/publications/DoForeclosuresCauseCrime.pdf}},
+  OPTannote = 	 {Furman Center Policy Brief}
+}
+
+@Article{ellen@jecon2013,
+  author = 	 {Ellen, I.G. and Lacoe, J. and Sharygin, C.A.},
+  title = 	 {Do foreclosures cause crime?},
+  journal = 	 {Journal of Urban Economics},
+  year = 	 {2013},
+  OPTkey = 	 {},
+  volume = 	 {74},
+  OPTnumber = 	 {},
+  pages = 	 {59-70},
+  OPTmonth = 	 {},
+  OPTnote = 	 {},
+  OPTannote = 	 {}
+}
+
+@article{Schwartz2006679,
+title = "The external effects of place-based subsidized housing ",
+journal = "Regional Science and Urban Economics ",
+volume = "36",
+number = "6",
+pages = "679 - 707",
+year = "2006",
+note = "",
+issn = "0166-0462",
+doi = "http://dx.doi.org/10.1016/j.regsciurbeco.2006.04.002",
+url = "http://www.sciencedirect.com/science/article/pii/S0166046206000548",
+author = "Amy Ellen Schwartz and Ingrid Gould Ellen and Ioan Voicu and Michael H. Schill",
+keywords = "Development/revitalization",
+keywords = "Externalities",
+keywords = "Housing",
+keywords = "Neighborhood "
+}
+
+@Misc{furmanreport2011,
+  OPTkey = 	 {furman},
+  author = 	 {Vicki Been and Sam Dastrup and Ingrid Gould Ellen and Ben Gross and Andrew Hayashi and Susan Latham and  Meghan Lewit and Josiah Madar and Vincent Reina and Mary Weselcouch and Michael Williams},
+  title = 	 {State of New York City's Housing and Neighborhoods},
+  OPThowpublished = {},
+  OPTmonth = 	 {},
+  year = 	 {2011},
+  note = 	 {\url{http://furmancenter.org/files/sotc/SOC\_2011.pdf}},
+  OPTnote =      {\url{http://furmancenter.org/files/Furman\_Center\_Current\_Projects.pdf}},
+  OPTannote = 	 {}
+}
+
+@Misc{furmanreport2012,
+  OPTkey = 	 {furman},
+  author = 	 {Vicki Been and Sam Dastrup and Ingrid Gould Ellen and Ben Gross and Andrew Hayashi and Susan Latham and  Meghan Lewit and Josiah Madar and Vincent Reina and Mary Weselcouch and Michael Williams},
+  title = 	 {State of New York City's Housing and Neighborhoods},
+  OPThowpublished = {},
+  OPTmonth = 	 {},
+  year = 	 {2012},
+  note = 	 {\url{http://furmancenter.org/research/sonychan/2012-report}},
+  OPTnote =      {\url{http://furmancenter.org/files/Furman\_Center\_Current\_Projects.pdf}},
+  OPTannote = 	 {}
+}
+
+@Misc{furmanreport2013,
+  OPTkey = 	 {furman},
+  author = 	 {Sean Capperis and Jorge De la Roca and Kevin Findlan and Ingrid Gould Ellen and Josiah Madar and Shannon Moriarti and Justin Steil and Mary Weselcouch and Mark Williams},
+  title = 	 {State of New York City's Housing and Neighborhoods},
+  OPThowpublished = {},
+  OPTmonth = 	 {},
+  year = 	 {2013},
+  note = 	 {\url{http://furmancenter.org/research/sonychan/2013-state-of-new-york-citys-housing-and-neighborhoods-report}},
+  OPTnote =      {\url{http://furmancenter.org/files/Furman\_Center\_Current\_Projects.pdf}},
+  OPTannote = 	 {}
+}
+
+@Misc{furmanreport2014,
+  OPTkey = 	 {furman},
+  author = 	 {Sean Capperis and Jorge De la Roca and Ingrid Gould Ellen and and Brian Karfunkel and Yiwen (Xavier) Kuai and Shannon Moriarty and Justin Steil and Eric Stern
+Michael Suher and Max Weselcouch and Mark Willis and Jessica Yager},
+  title = 	 {State of New York City's Housing and Neighborhoods},
+  OPThowpublished = {},
+  OPTmonth = 	 {},
+  year = 	 {2014},
+  note = 	 {\url{http://furmancenter.org/research/sonychan}},
+  OPTnote =      {\url{http://furmancenter.org/files/Furman\_Center\_Current\_Projects.pdf}},
+  OPTannote = 	 {}
+}
+
+
--- a/vistrails.bib
+++ b/vistrails.bib
@ -0,0 +1,316 @@
+@Book{norman@book1994,
+  author = 	 {Donald A. Norman},
+  title = 	 {Things That Make Us Smart: Defending Human Attributes in the Age of the Machine},
+  publisher = 	 {Addison Wesley},
+  year = 	 {1994},
+}
+
+@article{avs,
+  OPTauthor =    {Craig Upson and Thomas Faulhaber, Jr. and David
+                  Kamins and David H. Laidlaw and David Schlegel and
+                  Jefrey Vroom and Robert Gurwitz and Andries van Dam},
+  author =   {Craig {Upson et al}},
+  title =    {The Application Visualization System: A
+                  Computational Environment for Scientific
+                  Visualization},
+  journal =  cganda,
+  volume =   9,
+  number =   4,
+  year =     1989,
+  OPTissn =  {0272-1716},
+  pages =    {30-42},
+  OPTpublisher =     {IEEE Computer Society Press},
+}
+
+@Misc{dx,
+  author =   {IBM},
+  title =    {{OpenDX}},
+  howpublished = {\url{http://www.research.ibm.com/dx}},
+}
+
+@Misc{visit,
+  OPTkey =   {},
+  author =   {{Lawrence Livermore National Laboratory}},
+  title =    {{VisIt: Visualize It in Parallel Visualization Application} },
+  howpublished = {\url{https://wci.llnl.gov/codes/visit} [29 March 2008]},
+  OPTmonth =     {},
+  OPTyear =      {},
+  OPTnote =      {},
+  OPTannote =    {}
+}
+
+@Misc{pegasus,
+  key =      {Pegasus},
+  title =    {{The Pegasus Project}},
+  OPTyear =      {},
+  note =     {\url{http://pegasus.isi.edu/}},
+}
+                  
+@Misc{kepler,
+  key =      {Kepler},
+  title =    {{The Kepler Project}},
+  OPTyear =      {},
+  note =     {\url{http://kepler-project.org}},
+}
+
+@Misc{taverna,
+  key =      {Taverna},
+  title =    {{The Taverna Project}},
+  OPTyear =      {},
+  note =     {\url{http://taverna.sourceforge.net}},
+}
+
+
+@Misc{vds,
+  key =      {VDS},
+  title =    {{VDS - The GriPhyN Virtual Data System}},
+  OPTyear =      {},
+  note =     {\\ \url{http://www.ci.uchicago.edu/wiki/bin/view/VDS/VDSWeb/WebMain}},
+}
+
+
+@Misc{yahoo-pipes,
+  key =      {Yahoo},
+  title =    {{Yahoo! Pipes}},
+  OPTyear =      {},
+  note =     {\url{http://pipes.yahoo.com}},
+}
+
+@Misc{euprovenance,
+  key =      {Provenance},
+  title =    {{The EU Provenance  Project}},
+  OPTyear =      {},
+  note =     {\url{http://twiki.gridprovenance.org/bin/view/Provenance}},
+}
+
+
+@Misc{gtb,
+  author =   {Wagner Correa and Louis Bavoil and Shachar Fleishman
+                  and Walter Jimenez and James T. Klosowski and
+                  Gilberto C. A. G. Martins and Dirce Uesu and Sinesio
+                  Pesco and Lourena Rocha and Carlos Eduardo
+                  Scheidegger and Claudio T. Silva},
+  title =    {Graphics Tool Box (GTB)},
+  howpublished = {\url{http://gtb.sourceforge.net}},
+}
+
+@Misc{activizcom,
+  author =   {Kitware},
+  title =    {ActiViz/COM},
+  howpublished = {\url{http://www.kitware.com/products/activiz/activizCOM.html}},
+}
+
+@Misc{paraview,
+  author =   {Kitware},
+  title =    {Paraview},
+  howpublished = {\url{http://www.paraview.org}},
+}
+
+@Misc{dataone,
+  key =      {dataone},
+  title =    {{The Data Observation Network for Earth (DataONE)}},
+  note =     {\url{https://dataone.org/}},
+}
+                
+@misc{rdav,
+key = {RDAV},
+title = {Remote Data Analysis and Visualization (RDAV)},
+year = {2009},
+note = {\url{http://rdav.nics.tennessee.edu/node/5}}
+} 
+
+@Misc{cleo,
+  key =      {CLEO},
+  title =    {{CLEO Experiment}},
+  note =     {\url{http://www.lepp.cornell.edu/Research/EPP/CLEO}},
+}
+
+@misc{alps,
+key = {ALPS},
+title = {The ALPS project},
+note = {\url{http://alps.comp-phys.org}}
+}                  
+
+@Misc{vistrails-nsf-discovery,
+  key = 	 {NSFDiscovery},
+  OPTauthor = 	 {NSF},
+  title = 	 {NSF Discovery: A New Vision for Scientific Visualizations},
+  howpublished = {http://www.nsf.gov/discoveries/disc\_summ.jsp?cntn\_id=114322\&org=OLPA\&preview=false},
+  month = 	 {March},
+  year = 	 {2009},
+  OPTnote = 	 {},
+  OPTannote = 	 {}
+}
+
+@TechReport{vanzyl@tr2011,
+  author = 	 {Van Zyl, TL and McFerren, G and Vahed, A},
+  title = 	 {Earth observation scientific workflows in a distributed computing environment},
+  institution =  {CSIR},
+  year = 	 {2011},
+  OPTkey = 	 {},
+  OPTtype = 	 {},
+  number = 	 {7727},
+  OPTaddress = 	 {},
+  OPTmonth = 	 {},
+  note = 	 {http://hdl.handle.net/10204/5435},
+  OPTannote = 	 {}
+}
+
+@Misc{csir,
+  key = 	 {csir},
+  OPTauthor = 	 {},
+  title = 	 {{Council for Scientific and Industrial Research (CSIR) in South Africa}},
+  OPThowpublished = {},
+  note = 	 {\url{http://www.csir.co.za}}
+}
+
+
+@Misc{dv3d,
+  key = 	 {vtdv3d},
+  OPTauthor = 	 {},
+  title = 	 {{vtDV3D VisTrails Package}},
+  OPThowpublished = {},
+  note = 	 {\url{http://portal.nccs.nasa.gov/DV3D/vtDV3D/\_build/html/index.html}},
+}
+
+@Misc{vt-guide,
+  key = 	 {vistrails},
+  OPTauthor = 	 {},
+  title = 	 {{VisTrails User’s Guide}},
+  OPThowpublished = {},
+  note = 	 {\url{http://www.vistrails.org/usersguide/dev/html}},
+}
+
+@Misc{uvcdat,
+  key = 	 {uvcdat},
+  OPTauthor = 	 {},
+  title = 	 {{Ultrascale Visualization - Climate Data Analysis Tools (UV-CDAT)}},
+  OPThowpublished = {},
+  note = 	 {\url{http://uvcdat.llnl.gov}},
+}
+
+@Misc{sahm,
+  key = 	 {sahm},
+  OPTauthor = 	 {},
+  title = 	 {{Software for Assisted Habitat Modeling Package for VisTrails
+(SAHM: VisTrails)}},
+  OPThowpublished = {},
+  note = 	 {\url{http://www.fort.usgs.gov/products/software/sahm}}
+}
+
+@article{morisette:2012:sahm,
+  author = {Morisette, J. and Jarnevich, C. and Holcombe, T. and Talbert, C. and Ignizio, D. and Talbert, M. and Silva, C. T. and Koop, D. and Swanson, A. and Young, N.},
+  title = "Vis{T}rails {SAHM}: {V}isualization and workflow management for ecological niche modeling",
+  journal = {Ecography},
+  year = {2012},
+  note = "To appear",
+}
+
+@article{freedman@physrev2012,
+  author = {Freedman, M. H. and Gukelberger, J. and Hastings, M. B. and Trebst, S. and Troyer, M. and Wang, Z.},
+  volume = {85},
+  journal = {Phys. Rev. B},
+  month = {Jan},
+  numpages = {15},
+  title = {Galois conjugates of topological phases},
+  year = {2012},
+  url = {http://link.aps.org/doi/10.1103/PhysRevB.85.045414},
+  doi = {10.1103/PhysRevB.85.045414},
+  issue = {4},
+  publisher = {American Physical Society},
+  pages = {045414}
+}
+
+
+
+@article{uv-cdat@cise2013,
+author = {E. Santos and J. Poco and  Yaxing Wei and  Shishi Liu and B. Cook and D.N. Williams and C.T. Silva},
+title = {UV-CDAT: Analyzing Climate Datasets from a User's Perspective},
+journal ={Computing in Science and Engineering},
+volume = {15},
+number = {1},
+issn = {1521-9615},
+year = {2013},
+pages = {94-103},
+doi = {http://doi.ieeecomputersociety.org/10.1109/MCSE.2013.15},
+publisher = {IEEE Computer Society},
+address = {Los Alamitos, CA, USA},
+}
+
+@inproceedings{vanLangeveld@fdg2009,
+ author = {van Langeveld, Mark and Kessler, Robert},
+ title = {Educational impact of digital visualization and auditing tools on a digital character production course},
+ booktitle = {FDG '09: Proceedings of the 4th International Conference on Foundations of Digital Games},
+ year = {2009},
+ isbn = {978-1-60558-437-9},
+ pages = {316--323},
+ location = {Orlando, Florida},
+ doi = {http://doi.acm.org/10.1145/1536513.1536567},
+ publisher = {ACM},
+ address = {New York, NY, USA},
+ }
+
+@article{tohline@cise2009,
+author = {Joel E. Tohline and Jinghya Ge and Wesley Even and Erik Anderson},
+title = {A Customized Python Module for CFD Flow Analysis within VisTrails},
+journal ={Computing in Science and Engineering},
+volume = {11},
+number = {3},
+issn = {1521-9615},
+year = {2009},
+pages = {68-73},
+doi = {http://doi.ieeecomputersociety.org/10.1109/MCSE.2009.44},
+publisher = {IEEE Computer Society},
+address = {Los Alamitos, CA, USA},
+}
+
+@InProceedings{heiland@hpc2010,
+  author = 	 {Randy Heiland and Maciek Swat and Benjamin Zaitlen and James Glazier and Andrew Lumsdale},
+  title = 	 {Workflows for Parameter Studies of Multi-Cell Modeling (HPC)},
+  OPTcrossref =  {},
+  OPTkey = 	 {},
+  booktitle = {Proceedings of the ACM High Performance Computing Symposium},
+  OPTpages = 	 {},
+  year = 	 {2010},
+  OPTeditor = 	 {},
+  OPTvolume = 	 {},
+  OPTnumber = 	 {},
+  OPTseries = 	 {},
+  OPTaddress = 	 {},
+  OPTmonth = 	 {},
+  OPTorganization = {},
+  OPTpublisher = {},
+  note = 	 {To appear},
+  OPTannote = 	 {http://hosting.cs.vt.edu/hpc2010/}
+}
+
+@Misc{cdat,
+  key =      {cdat},
+  OPTauthor =    {},
+  title =    {{Climate Data Analysis Tools (CDAT)}},
+  OPThowpublished = {},
+  note =     {\url{http://www-pcmdi.llnl.gov/software-portal/cdat}},
+  OPTanote =     {CDAT makes use of an open-source, object-oriented, easy-to-learn scripting language (Python) to link together separate software subsystems and packages to form an integrated environment for data analysis. Outside collaborators work independently and contribute on an equal basis with PCMDI.CDAT Climate Data Analysis Tools (CDAT) is a software system designed to provide access to and management of gridded climate data. It uses an object-oriented scripting language to link together separate software subsystems and packages thus forming an integrated environment for solving model diagnosis problems. The power of the system comes from Python and its ability to seamlessly interconnect software. Python provides a general purpose and full-featured scripting language with a variety of user interfaces including command-line interaction, stand-alone scripts (applications) and graphical user interfaces (GUI). The modular CDAT subsystems provide access to the data, to large-array numerical operations (via Numerical Python), and visualization.},
+}
+
+@article{dolgert@cise2008,
+author = {Andrew Dolgert and Lawrence Gibbons and Christopher D. Jones and Valentin Kuznetsov and Mirek Riedewald and Daniel Riley and Gregory J. Sharp and Peter Wittich},
+title = {Provenance in High-Energy Physics Workflows},
+journal ={Computing in Science and Engineering},
+volume = {10},
+number = {3},
+issn = {1521-9615},
+year = {2008},
+pages = {22-29},
+OPTdoi = {http://doi.ieeecomputersociety.org/10.1109/MCSE.2008.81},
+OPTpublisher = {IEEE Computer Society},
+OPTaddress = {Los Alamitos, CA, USA},
+}
+                  
+                  @Misc{cmop@web,
+  key =      {CMOP},
+  title =    {{NSF Center for Coastal Margin Observation and Prediction (CMOP)}},
+  note =     {\url{http://www.stccmop.org}},
+}
+
+     
--- a/vizier.bib
+++ b/vizier.bib