Website/slides/talks/2018-5-UpBeat/index.html

519 lines
19 KiB
HTML

<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Embracing Uncertainty</title>
<meta name="description" content="Mimir">
<meta name="author" content="Oliver Kennedy">
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
<link rel="stylesheet" href="../../reveal.js-3.7.0/css/reveal.css">
<link rel="stylesheet" href="ubodin.css" id="theme">
<!-- Code syntax highlighting -->
<link rel="stylesheet" href="../../reveal.js-3.7.0/lib/css/zenburn.css">
<!-- Printing and PDF exports -->
<script>
var link = document.createElement( 'link' );
link.rel = 'stylesheet';
link.type = 'text/css';
link.href = window.location.search.match( /print-pdf/gi ) ? '../../reveal.js-3.7.0/css/print/pdf.css' : '../../reveal.js-3.7.0/css/print/paper.css';
document.getElementsByTagName( 'head' )[0].appendChild( link );
</script>
<script>document.write('<script src="http://' + (location.host || 'localhost').split(':')[0] + ':35729/livereload.js?snipver=1"></' + 'script>')</script>
<!--[if lt IE 9]>
<script src="../../reveal.js-3.7.0/lib/js/html5shiv.js"></script>
<![endif]-->
</head>
<body>
<div class="reveal">
<!-- Any section element inside of this container is displayed as a slide -->
<div class="header">
<!-- Any Talk-Specific Header Content Goes Here -->
Don't Wrangle, Debug
</div>
<div class="footer">
<!-- Any Talk-Specific Footer Content Goes Here -->
<div style="float: left; margin-top: 15px; ">
Exploring <u><b>O</b></u>nline <u><b>D</b></u>ata <u><b>In</b></u>teractions
</div>
<img src="graphics/FullText-white.png" height="40" style="float: right;"/>
</div>
<div class="slides">
<section>
<img src="graphics/mimir_logo_final.png" />
<h4>Oliver Kennedy</h4>
</section>
<section>
<section>
<h3>A Big Data Fairy Tale</h3>
</section>
<section>
<img src="graphics/dagobert83-female-user-icon-800px.png" height="300" />
<h4>Meet Alice</h4>
<imagecredits>(OpenClipArt.org)</imagecredits>
</section>
<section>
<img src="graphics/dagobert83-female-user-icon-800px.png" height="300" />
<img src="graphics/littlestorefront-800px.png" height="300" />
<h4>Alice has a Store</h4>
<imagecredits>(OpenClipArt.org)</imagecredits>
</section>
<section>
<img src="graphics/littlestorefront-800px.png" height="300" style=" vertical-align: middle;"/>
<span style="font-size: 3em; vertical-align: middle;"></span>
<img src="graphics/matt-icons_text-x-log-300px.png" height="300" style=" vertical-align: middle;" />
<h4>Alice's store collects sales data</h4>
<imagecredits>(OpenClipArt.org)</imagecredits>
</section>
<section>
<img src="graphics/dagobert83-female-user-icon-800px.png" height="300" style=" vertical-align: middle;"/>
<span style="font-size: 3em; vertical-align: middle;">+</span>
<img src="graphics/matt-icons_text-x-log-300px.png" height="300" style=" vertical-align: middle;" />
<span style="font-size: 3em; vertical-align: middle;">=</span>
<img src="graphics/saco-800px.png" height="300" style=" vertical-align: middle;" />
<h4>Alice wants to use her sales data to run a promotion</h4>
<imagecredits>(OpenClipArt.org)</imagecredits>
</section>
<section>
<img src="graphics/matt-icons_text-x-log-300px.png" height="300" style=" vertical-align: middle;"/>
<span style="font-size: 3em; vertical-align: middle;"></span>
<img src="graphics/database-server-800px.png" height="300" style=" vertical-align: middle;" />
<h4>So Alice loads up her sales data in her trusty database/hadoop/spark/etc... server.</h4>
<imagecredits>(OpenClipArt.org)</imagecredits>
</section>
<section>
<img src="graphics/database-server-800px.png" height="300" style=" vertical-align: middle;" />
<span style="font-size: 3em; vertical-align: middle;">+&nbsp;?</span>
<h4>... asks her question ...</h4>
<imagecredits>(OpenClipArt.org)</imagecredits>
</section>
<section>
<img src="graphics/database-server-800px.png" height="300" style=" vertical-align: middle;" />
<span style="font-size: 3em; vertical-align: middle;">+&nbsp;?&nbsp;</span>
<img src="graphics/crystalball-800px.png" height="300" style=" vertical-align: middle;" />
<h4>... and basks in the limitless possibilities of big data.</h4>
<imagecredits>(OpenClipArt.org)</imagecredits>
</section>
</section>
<section>
<section>
<h2>Why is this a fairy tale?</h2>
</section>
<section>
<img src="graphics/matt-icons_text-x-log-300px.png" height="300" style=" vertical-align: middle;"/>
<span style="font-size: 3em; vertical-align: middle;"></span>
<img src="graphics/database-server-800px.png" height="300" style=" vertical-align: middle;" />
<span style="font-size: 3em; vertical-align: middle;"></span>
<img src="graphics/crystalball-800px.png" height="300" style=" vertical-align: middle;" />
<h4>It's never this easy...</h4>
</section>
<section>
<h3>File Formats</h3>
<h4>(JSON/XML, CSV, 1000s of Files in Directories)</h4>
<ul style="width:800px">
<li>Manually Explore Examples</li>
<li>Automatic Summarization (Oracle Data Guides)</li>
<li>Manual Segmentation (Log Data, Files)</li>
</ul>
</section>
<section>
<h3>Missing Data</h3>
<h4>(Sensor errors, Survey Data)</h4>
<ul style="width:800px">
<li>Discover Outliers</li>
<li>Fix (pick one): <span class="fragment" style="color:red">Don't guess wrong!</span><ul>
<li>Impute Missing Values</li>
<li>Interpolate Missing Values</li>
<li>Drop Rows with Missing Data</li>
</ul></li>
</ul>
</section>
<section>
<h3>Documentation</h3>
<h4>("The CSV is in GIT")</h4>
<ul style="width:800px">
<li>Rediscover Column/Variable Meanings</li>
<li>Units / Measurement Techniques</li>
<li>Caveats on Data Usage / Cleaning Techniques</li>
<li>Make &amp; Rediscover Assumptions About Data</li>
</ul>
</section>
</section>
<section>
<section>
<h3>Mimir &amp; Family</h3>
<dl>
<div class="fragment">
<dt>SchemaDrill</dt>
<dd>JSON/Filesystem Schemas</dd>
</div>
<div class="fragment">
<dt>UADBs</dt>
<dd>Data-Associated Documentation</dd>
</div>
<div class="fragment">
<dt>LOKI</dt>
<dd>Automatic Reverse-Engineered Documentation</dd>
</div>
<div class="fragment">
<dt>Vizier</dt>
<dd>Multi-Modal, Interactive Data Exploration</dd>
</div>
</ul>
</section>
</section>
<section>
<section>
<h3>SchemaDrill</h3>
<pre><code class="json">
{ "foo" : 1, "bar" : 2 }
{ "foo" : 3, "bar" : 4, "baz" : 5 }
{ "baz" : 6, "frob" : 7 }
</code></pre>
<p class="fragment">What's the schema of these objects?</p>
</section>
<section>
<h3>SchemaDrill</h3>
<pre><code class="json">
{ "name" : "Alice", "address" : "123 A Street" }
{ "name" : "Bob", "address" : "456 B Street", "city" : "Buffalo" }
{ "city" : "Buffalo", "state" : "New York" }
</code></pre>
<p>What's the schema of these objects?</p>
</section>
<section>
<h3>SchemaDrill</h3>
<pre><code class="json">
{ "restaurant" : "10-21", "menu" : ["Wings", "Beer"] }
{ "restaurant" : "11-21", "menu" : ["Gnocchi"], "bar" : "17-00" }
{ "bar" : "18-02", "patio" : true }
</code></pre>
<p>What's the schema of these objects?</p>
</section>
<section>
<h3>SchemaDrill</h3>
<dl>
<dt class="fragment" data-fragment-index="1">Does a collection of objects encode one type of entity or multiple?</dt>
<dd class="fragment" data-fragment-index="2">Non-Negative Matrix Factorization</dd>
<dt class="fragment" data-fragment-index="3">Does a nested array / object represent a tuple or a collection?</dt>
<dd class="fragment" data-fragment-index="4">Key Entropy (higher → more collection-like)</dd>
<dd class="fragment" data-fragment-index="4">Type Entropy (higher → more tuple-like)</dd>
</dl>
</section>
</section>
<section>
<section>
<h3>UADBs</h3>
<p>Classical Ways to Query Uncertain Data</p>
<ul>
<li><b>Certain Answers</b>: Answers we're $100\%$ confident in.</li>
<li><b>Possible Answers</b>: Answers we're $>0\%$ confident in.</li>
<li class="fragment"><b>Best-Guess Answer</b>: <i>Some</i> internally self consistent answer that <i>looks right</i>.</li>
</ul>
<p class="fragment">Certain is principled; Best-Guess is fast.</p>
</section>
<section>
<h3>UADBs</h3>
<ol>
<li>Mark possibly erroneous inputs</li>
<li>Trace marks through queries</li>
<li>Automatically mark outputs</li>
</ol>
</section>
<section>
<h3>UADBs</h3>
<img src="https://odin.cse.buffalo.edu/research/mimir/screenshots/cli_plot.png" />
</section>
<section>
<h3>UADBs</h3>
<ul>
<li>How do we efficiently compute which results are based on marked inputs?</li>
<li>Can we guarantee that all marked outputs depend on marked inputs?</li>
<li>How do we tie these results to existing encodings uncertain data?</li>
</ul>
</section>
</section>
<section>
<section>
<h3>LOKI</h3>
<h4>Label Once and Keep It</h4>
<svg data-src="graphics/LOKI.svg" height=500px>
</section>
</section>
<section>
<section>
<h3><img src="graphics/vizier.svg" height="80px" style="vertical-align: bottom; margin-right: 10px">Vizier</h3>
<img src="graphics/Vizier-1.png" height="400px">
</section>
<section>
<h3><img src="graphics/vizier.svg" height="80px" style="vertical-align: bottom; margin-right: 10px">Vizier</h3>
<img src="graphics/Vizier-2.png" height="400px">
</section>
<section>
<h3><img src="graphics/vizier.svg" height="80px" style="vertical-align: bottom; margin-right: 10px">Vizier</h3>
<img src="graphics/Vizier-3.png" height="400px">
</section>
<section>
<h3><img src="graphics/vizier.svg" height="80px" style="vertical-align: bottom; margin-right: 10px">Vizier</h3>
<img src="graphics/Vizier-4.png" height="400px">
</section>
</section>
<section>
<table style="display: inline-block;">
<tr>
<th colspan="4" style="font-size: 12pt">Students</th>
</tr>
<tr height="80px">
<td width="100px">
<img src="people/poonam.jpg" width="70px" height="80px" style="margin-bottom: 0px"/>
<p style="margin-top: 0px; font-size: 10pt;">Poonam<br/>(PhD-3Y)</p>
</td>
<td width="100px">
<img src="people/will.png" width="61px" height="80px" style="margin-bottom: 0px"/>
<p style="margin-top: 0px; font-size: 10pt;">Will<br/>(PhD-2Y)</p>
</td>
<td width="100px">
<img src="people/aaron.jpg" width="64px" height="80px" style="margin-bottom: 0px"/>
<p style="margin-top: 0px; font-size: 10pt; font-weight: bold;">Aaron<br/>(PhD-3Y)</p>
</td>
<td width="100px">
<img src="people/mercy.png" width="50px" height="80px" style="margin-bottom: 0px"/>
<p style="margin-top: 0px; font-size: 10pt;">Mercy<br/>(BS)</p>
</td>
</tr>
</table>
<table style="display: inline-block; margin-left: 100px">
<tr>
<th colspan="1" style="font-size: 12pt">Dev</th>
</tr>
<tr>
<td width="100px">
<img src="people/mike.jpg" width="80px" height="80px" style="margin-bottom: 0px"/>
<p style="margin-top: 0px; font-size: 10pt;">Mike<br/>(Sr. Rsrch. Dev.)</p>
</td>
</tr>
</table>
<table>
<tr>
<th colspan="7" style="font-size: 12pt">Alumni</th>
</tr>
<tr height="80px">
<td width="100px">
<img src="people/ying.jpg" width="60px" height="80px" style="margin-bottom: 0px"/>
<p style="margin-top: 0px; font-size: 10pt;">Ying<br/>(PhD 2017)</p>
</td>
<td width="100px">
<img src="people/niccolo.png" width="50px" height="80px" style="margin-bottom: 0px"/>
<p style="margin-top: 0px; font-size: 10pt;">Niccolò<br/>(PhD 2016)</p>
</td>
<td width="100px">
<img src="people/arindam.jpg" width="80px" height="80px" style="margin-bottom: 0px"/>
<p style="margin-top: 0px; font-size: 10pt;">Arindam<br/>(MS 2016)</p>
</td>
<td width="100px">
<img src="people/shivang.jpg" width="55px" height="80px" style="margin-bottom: 0px"/>
<p style="margin-top: 0px; font-size: 10pt;">Shivang<br/>(MS 2018)</p>
</td>
<td width="100px">
<img src="people/olivia.png" width="50px" height="80px" style="margin-bottom: 0px"/>
<p style="margin-top: 0px; font-size: 10pt;">Olivia<br/>(BS 2017)</p>
</td>
<td width="100px">
<img src="people/gourab.jpg" width="80px" height="80px" style="margin-bottom: 0px"/>
<p style="margin-top: 0px; font-size: 10pt;">Gourab<br/>(MS 2018)</p>
</td>
</tr>
</table>
<table>
<tr>
<th colspan="6" style="font-size: 12pt">External Collaborators</th>
</tr>
<tr>
<td width="130px" style="font-size: 10pt;">
Dieter Gawlick<br/>(Oracle)
</td>
<td width="130px" style="font-size: 10pt;">
Zhen Hua Liu<br/>(Oracle)
</td>
<td width="130px" style="font-size: 10pt;">
Ronny Fehling<br/>(Airbus)
</td>
<td width="130px" style="font-size: 10pt;">
Beda Hammerschmidt<br/>(Oracle)
</td>
<td width="140px" style="font-size: 10pt;">
Boris Glavic<br/>(IIT)
</td>
<td width="140px" style="font-size: 10pt;">
Su Feng<br/>(IIT)
</td>
</tr>
</table>
<table style="margin-top: 5px">
<tr>
<td width="140px" style="font-size: 10pt;">
Juliana Freire<br/>(NYU)
</td>
<td width="140px" style="font-size: 10pt;">
Wolfgang Gatterbauer<br/>(NEU)
</td>
<td width="140px" style="font-size: 10pt;">
Heiko Mueller<br/>(NYU)
</td>
<td width="140px" style="font-size: 10pt;">
Remi Rampin<br/>(NYU)
</td>
<td width="140px" style="font-size: 10pt;">
Sonia Castelo Quispe<br/>(NYU)
</td>
</tr>
</table>
<p style="font-size: 10pt; text-decoration: underline;">Mimir is supported by NSF Awards ACI-1640864, IIS-1750460, and gifts from Oracle. Prior support from NPS Award N00244-16-1-0022.</p>
</section>
</div></div>
<script src="../../reveal.js-3.7.0/lib/js/head.min.js"></script>
<script src="../../reveal.js-3.7.0/js/reveal.js"></script>
<script>
// Full list of configuration options available at:
// https://github.com/hakimel/../reveal.js#configuration
Reveal.initialize({
controls: false,
progress: true,
history: true,
center: true,
slideNumber: true,
transition: 'fade', // none/fade/slide/convex/concave/zoom
chart: {
defaults: {
global: {
title: { fontColor: "#333", fontSize: 24 },
legend: {
labels: { fontColor: "#333", fontSize: 20 },
},
responsiveness: true
},
scale: {
scaleLabel: { fontColor: "#333", fontSize: 20 },
gridLines: { color: "#333", zeroLineColor: "#333" },
ticks: { fontColor: "#333", fontSize: 16 },
}
},
line: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ], "borderDash": [ [5,10], [0,0] ]},
bar: { backgroundColor: [
"rgba(220,220,220,0.8)",
"rgba(151,187,205,0.8)",
"rgba(205,151,187,0.8)",
"rgba(187,205,151,0.8)"
]
},
pie: { backgroundColor: [ ["rgba(0,0,0,.8)" , "rgba(220,20,20,.8)", "rgba(20,220,20,.8)", "rgba(220,220,20,.8)", "rgba(20,20,220,.8)"] ]},
radar: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ]},
},
// Optional ../reveal.js plugins
dependencies: [
{ src: '../../reveal.js-3.7.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
{ src: '../../reveal.js-3.7.0/plugin/math/math.js',
condition: function() { return true; },
mathjax: '../../reveal.js-3.7.0/js/MathJax.js'
},
{ src: '../../reveal.js-3.7.0/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../../reveal.js-3.7.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../../reveal.js-3.7.0/plugin/highlight/highlight.js', async: true, condition: function() { return !!document.querySelector( 'pre code' ); }, callback: function() { hljs.initHighlightingOnLoad(); } },
{ src: '../../reveal.js-3.7.0/plugin/zoom-js/zoom.js', async: true },
{ src: '../../reveal.js-3.7.0/plugin/notes/notes.js', async: true },
// Chart.min.js
{ src: '../../reveal.js-3.7.0/plugin/chart/Chart.min.js'},
// the plugin
{ src: '../../reveal.js-3.7.0/plugin/chart/csv2chart.js'},
{ src: '../../reveal.js-3.7.0/plugin/svginline/es6-promise.auto.js', async: false },
{ src: '../../reveal.js-3.7.0/plugin/svginline/data-src-svg.js', async: false }
]
});
</script>
</body>
</html>