pull/1/head
Oliver Kennedy 2016-06-06 10:47:16 -04:00
parent 72b54d7d4f
commit 0a1346ef2c
7 changed files with 121 additions and 12 deletions

View File

@ -46,12 +46,14 @@
"commitment" : { "summer" : 0.5 }
},
{ "title" : "Curating Uncertainty and Reliable Exploitation (CURE)",
"agency" : "US Naval Postgraduate School",
"agency" : "The US Naval Postgraduate School",
"role" : "Co-I",
"amount" : 263215,
"effort" : "50%",
"status" : "submitted",
"start" : "05/2016", "end" : "04/2018",
"status" : "accepted",
"copis" : ["Moises Sudit"],
"start" : "05/2016", "end" : "04/2017",
"optional_end" : "04/2017",
"type" : "grant" },
{ "title" : "III: Small: Just in Time Datastructures",
"agency" : "NSF: CISE: IIS: III",

View File

@ -33,7 +33,7 @@
"venue": "TaPP",
"year": 2016,
"length":6,
"projects" : ["mimir"],
"projects" : ["mimir", "vizier"],
"urls" : {
"paper" : "http://odin.cse.buffalo.edu/papers/2016/TAPP-PVVDW-submitted.pdf"
}

View File

@ -149,15 +149,19 @@ module GemSmith
}
end
def apply_template(template)
erb = ERB.new(File.open(template) { |fp| fp.read })
def apply_template(default_template)
erb = Hash.new { |h,k|
h[k] = ERB.new(File.open(k) { |fp| fp.read })
}
lambda { |f|
template = f.fetch(:template, default_template)
f[:dependencies].push(template)
f[:stream].transform_all { |body|
b = binding()
f.each { |k,v| b.local_variable_set(k, v) }
$gemsmith[:now_rendering] = File.join(f[:rel_dir], File.basename(f[:out_path]))
erb.result(b)
erb[template].result(b)
}
}
end

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 KiB

52
src/vizier/index.erb Normal file
View File

@ -0,0 +1,52 @@
---
title: Vizier
template: templates/empty.erb
---
<h1 style="text-align: center">Vizier</h1>
<p>Data curation (also called preparation, wrangling, or cleaning) is a critical stage in data science in which raw data is structured, validated, and repaired. Data validation and repair establish trust in analytical results, while appropriate structuring streamlines analytics. A new collaboration between the University at Buffalo, New York University, and the Illinois Institute of Technology is looking to build software to streamline this process, making it easier and faster to explore and analyze raw data.</p>
<p>Our goal is to make curation more...<ul>
<li>efficient, by modeling your workflow and using the model to offer suggestions.</li>
<li>simple, by using educated guesses to set default parameters.</li>
<li>reliable, by tracking changes and guesses, and communicating their effects on analytical results.</li>
<li>exploratory, by making it easier to quickly "try on" some changes.</li>
<li>speedy, by immediately updating visualizations to reflect small changes to your data or workflow.</li>
</ul></p>
<p>Our tool, Vizier, will combine a simple "notebook-style" interface based on JuPyTer with powerful back-end tools that track changes, edits, and the effects of automation. These forms of "provenance" capture both the exploratory curation process---how the cleaning workflows evolve, and how data changes over time. By connecting these different types of provenance, Vizier will not only support the auditing of curation processes, but also explain the context in which they were applied, making it faster and easier to curate data.</p>
<hr>
<h3>Provenance for Curation and Exploration</h3>
<img src="graphics/Overview.png" style="height: 1123; width: 404;" class="img-thumbnail"/>
<p>Vizier enables worry-free exploration. A simple notebook interface mirrors a spreadsheet view of your data, tracking the provenance of your edits. Provenance is at the heart of Vizier, making it easy to undo and redo actions and allowing Vizier to suggest new curation steps, visualizations, or to make guesses about your data. Finally provenance allows you to develop curation workflows on small data sets and then seamlessly deploy them to larger datasets (e.g., via Spark or Hadoop)</p>
<hr/>
<h2>The Team</h2>
<ul>
<li>Heiko Mueller (NYU)</li>
<li>Jacob Varghese (UB)</li>
<li>Juliana Freire (NYU)</li>
<li>Oliver Kennedy (UB)</li>
<li>Boris Glavic (IIT)</li>
</ul>
<hr/>
<h2>Publications</h2>
<ul><%
$db["publications"].
where { |pub| pub.fetch("projects", []).include? "vizier" }.
each do |pub|
%>
<div class="pub"><strong><%=pub["title"]%></strong>&nbsp;&nbsp;&nbsp;<i><%=pub["venue"]%></i>&nbsp;&nbsp;&nbsp;( <span class="resources"><%= pub["urls"].map { |r, url| "<a href=\"#{url}\">#{r}</a>"}.join(" | ") %></span> )</div>
<% end %>
</ul>
<hr/>

View File

@ -1,5 +0,0 @@
<h1 style="text-align: center">Vizier</h1>
<p>Data curation (aka wrangling and cleaning) is a critical stage in data science in which raw data is structured, validated, and repaired. Data validation and repair establish trust in analytical results, while appropriate structuring streamlines analytics. Unfortunately, even with advances in automated tools (e.g., Oracle's Data Guide and Trifacta's Wrangler), wrangling is still a major bottleneck in data exploration.</p>
<p>

56
templates/empty.erb Executable file
View File

@ -0,0 +1,56 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<meta name="description" content="">
<meta name="author" content="">
<link rel="icon" href="../../favicon.ico">
<title>The ODIn Lab - <%= title %></title>
<!-- Bootstrap core CSS -->
<link href="<%= asset_path("bootstrap")%>/css/bootstrap.min.css" rel="stylesheet" />
<!-- Custom styles for this template -->
<link href="<%= asset_path("odin.css")%>" rel="stylesheet" />
<% if defined? extraCSS then extraCSS.each do |data| %>
<% if data.has_key? "asset" %>
<link href="<%= asset_path(data["asset"]) %>" rel="stylesheet"/>
<% elsif data.has_key? "file" %>
<link href="<%= data["file"] %>" rel="stylesheet"/>
<% else raise "Invalid extraCSS: #{data}" end %>
<% end end %>
<!-- MathJax Configuration -->
<script type="text/x-mathjax-config">
MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]}});
</script>
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<div class="container">
<%= body %>
</div><!-- /.container -->
<!-- Bootstrap core JavaScript
================================================== -->
<!-- Placed at the end of the document so the pages load faster -->
<script src="<%=asset_path("jquery.js")%>"></script>
<script src="<%= asset_path("bootstrap")%>/js/bootstrap.min.js"></script>
<script src="<%= asset_path("mathjax")%>/MathJax.js"></script>
<script src="<%= asset_path("mathjax")%>/config/TeX-AMS_HTML-full.js"></script>
</body>
</html>