274 lines
9.6 KiB
HTML
274 lines
9.6 KiB
HTML
<!doctype html>
|
|
<html lang="en">
|
|
|
|
<head>
|
|
<meta charset="utf-8">
|
|
|
|
<title>Embracing Uncertainty</title>
|
|
|
|
<meta name="description" content="Mimir, an awesome system for embracing uncertainty">
|
|
<meta name="author" content="Oliver Kennedy">
|
|
|
|
<meta name="apple-mobile-web-app-capable" content="yes" />
|
|
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
|
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
|
|
|
|
<link rel="stylesheet" href="../reveal.js-3.1.0/css/reveal.css">
|
|
<link rel="stylesheet" href="ubodin.css" id="theme">
|
|
|
|
<!-- Code syntax highlighting -->
|
|
<link rel="stylesheet" href="../reveal.js-3.1.0/lib/css/zenburn.css">
|
|
|
|
<!-- Printing and PDF exports -->
|
|
<script>
|
|
var link = document.createElement( 'link' );
|
|
link.rel = 'stylesheet';
|
|
link.type = 'text/css';
|
|
link.href = window.location.search.match( /print-pdf/gi ) ? '../reveal.js-3.1.0/css/print/pdf.css' : '../reveal.js-3.1.0/css/print/paper.css';
|
|
document.getElementsByTagName( 'head' )[0].appendChild( link );
|
|
</script>
|
|
|
|
<!--[if lt IE 9]>
|
|
<script src="../reveal.js-3.1.0/lib/js/html5shiv.js"></script>
|
|
<![endif]-->
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<div class="reveal">
|
|
<!-- Any section element inside of this container is displayed as a slide -->
|
|
|
|
<div class="header">
|
|
<!-- Any Talk-Specific Header Content Goes Here -->
|
|
Embracing Uncertainty with Mimir
|
|
</div>
|
|
<div class="footer">
|
|
<!-- Any Talk-Specific Footer Content Goes Here -->
|
|
<div style="float: left; margin-top: 15px; ">
|
|
Exploring <u><b>O</b></u>nline <u><b>D</b></u>ata <u><b>In</b></u>teractions
|
|
</div>
|
|
<a href="http://odin.cse.buffalo.edu" target="_blank">
|
|
<img src="graphics/FullText-white.png" height="40" style="float: right;"/>
|
|
</a>
|
|
</div>
|
|
|
|
<div class="slides">
|
|
|
|
<section>
|
|
|
|
<section>
|
|
<img src="graphics/FullText-black.png" height="100"/>
|
|
<h5><a href="http://odin.cse.buffalo.edu">http://odin.cse.buffalo.edu</a></h5>
|
|
<img src="graphics/qrcode.31361737.png" />
|
|
</section>
|
|
|
|
<section>
|
|
<h2>Embracing Uncertainty</h2>
|
|
<div class="headertext" style="float: left; color: #041a9b; height: 3em;">U.B.</div>
|
|
<div class="headertext" style="color: #041a9b">
|
|
Ying Yang, Niccolo Meneghetti, <br/>
|
|
<u>Arindam Nandi</u>, Vinayak Karuppasamy, <br/>
|
|
<u>Oliver Kennedy</u>, Jan Chomicki</div>
|
|
<div class="headertext" style="float: left; color: red;">Oracle</div>
|
|
<div class="headertext" style="color: red;">Ronny Fehling, Zhen-Hua Liu, Dieter Gawlick</div>
|
|
</section>
|
|
</section>
|
|
|
|
|
|
<section>
|
|
|
|
<section>
|
|
<h3>A Big Data Fairy Tale</h3>
|
|
</section>
|
|
|
|
<section>
|
|
<img src="graphics/dagobert83-female-user-icon-800px.png" height="300" />
|
|
<h4>Meet Alice</h4>
|
|
|
|
<attribution>(OpenClipArt.org)</attribution>
|
|
</section>
|
|
|
|
<section>
|
|
<img src="graphics/dagobert83-female-user-icon-800px.png" height="300" />
|
|
<img src="graphics/littlestorefront-800px.png" height="300" />
|
|
<h4>Alice has a Store</h4>
|
|
|
|
<attribution>(OpenClipArt.org)</attribution>
|
|
</section>
|
|
|
|
<section>
|
|
<img src="graphics/littlestorefront-800px.png" height="300" style=" vertical-align: middle;"/>
|
|
<span style="font-size: 3em; vertical-align: middle;">→</span>
|
|
<img src="graphics/matt-icons_text-x-log-300px.png" height="300" style=" vertical-align: middle;" />
|
|
<h4>Alice's store collects sales data</h4>
|
|
|
|
<attribution>(OpenClipArt.org)</attribution>
|
|
</section>
|
|
|
|
<section>
|
|
<img src="graphics/dagobert83-female-user-icon-800px.png" height="300" style=" vertical-align: middle;"/>
|
|
<span style="font-size: 3em; vertical-align: middle;">+</span>
|
|
<img src="graphics/matt-icons_text-x-log-300px.png" height="300" style=" vertical-align: middle;" />
|
|
<span style="font-size: 3em; vertical-align: middle;">=</span>
|
|
<img src="graphics/saco-800px.png" height="300" style=" vertical-align: middle;" />
|
|
<h4>Alice wants to use her sales data to run a promotion</h4>
|
|
|
|
<attribution>(OpenClipArt.org)</attribution>
|
|
</section>
|
|
|
|
<section>
|
|
<img src="graphics/matt-icons_text-x-log-300px.png" height="300" style=" vertical-align: middle;"/>
|
|
<span style="font-size: 3em; vertical-align: middle;">→</span>
|
|
<img src="graphics/database-server-800px.png" height="300" style=" vertical-align: middle;" />
|
|
<h4>So Alice loads up her sales data in her trusty database/hadoop/spark/etc... server.</h4>
|
|
|
|
<attribution>(OpenClipArt.org)</attribution>
|
|
</section>
|
|
|
|
<section>
|
|
<img src="graphics/database-server-800px.png" height="300" style=" vertical-align: middle;" />
|
|
<span style="font-size: 3em; vertical-align: middle;">+ ?</span>
|
|
<h4>... asks her question ...</h4>
|
|
|
|
<attribution>(OpenClipArt.org)</attribution>
|
|
</section>
|
|
|
|
<section>
|
|
<img src="graphics/database-server-800px.png" height="300" style=" vertical-align: middle;" />
|
|
<span style="font-size: 3em; vertical-align: middle;">+ ? →</span>
|
|
<img src="graphics/crystalball-800px.png" height="300" style=" vertical-align: middle;" />
|
|
<h4>... and basks in the limitless possibilities of big data.</h4>
|
|
|
|
<attribution>(OpenClipArt.org)</attribution>
|
|
</section>
|
|
</section>
|
|
|
|
<section>
|
|
<section>
|
|
<h2>Why is this a fairy tale?</h2>
|
|
</section>
|
|
|
|
<section>
|
|
<img src="graphics/matt-icons_text-x-log-300px.png" height="300" style=" vertical-align: middle;"/>
|
|
<span style="font-size: 3em; vertical-align: middle;">→</span>
|
|
<img src="graphics/database-server-800px.png" height="300" style=" vertical-align: middle;" />
|
|
<h4>It's never this easy...</h4>
|
|
</section>
|
|
|
|
<section>
|
|
<h2>Loading Data<h2>
|
|
<small>
|
|
<ul>
|
|
<li>Validating and Fixing Outliers</li>
|
|
<li>Handling Missing Data</li>
|
|
<li>Matching Schemas</li>
|
|
<li>Fixing Schemas</li>
|
|
<li>Managing Stale Data</li>
|
|
<li>Deduplicating Records</li>
|
|
<li>... and lots more</li>
|
|
</ul>
|
|
</small>
|
|
</section>
|
|
|
|
</section>
|
|
|
|
<section>
|
|
|
|
<section>
|
|
<h3>Data Cleaning is Hard!</h3>
|
|
|
|
<img src="graphics/BI-Analyst.jpg" height="400" />
|
|
<attribution>(skilledup.com)</attribution>
|
|
|
|
<p>Alice spends weeks cleaning her data before using it.</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Newer State of the Art</h3>
|
|
<img src="graphics/azure-data-lake.png" height=500 />
|
|
<attribution>(azure.microsoft.com)</attribution>
|
|
</section>
|
|
|
|
<section>
|
|
<img src="graphics/data-lake-to-data-swamp.jpg" height=500 />
|
|
<attribution>(timoelliott.com)</attribution>
|
|
</section>
|
|
</section>
|
|
|
|
<section>
|
|
<section>
|
|
<h2>Making Cleaning Easier</h2>
|
|
<svg width=500 height=300>
|
|
<polygon
|
|
points="60,50 60,60 40,50 60,40 60,50 440,50 440,40 460,50 440,60 440,50"
|
|
style="
|
|
stroke: black;
|
|
fill: black;
|
|
stroke-width: 2;
|
|
"
|
|
/>
|
|
<text x=0 y=30 style="font-size: 0.75em">Scalability</text>
|
|
<text x=370 y=30 style="font-size: 0.75em">Reliability</text>
|
|
<text class="fragment" x=-220 y=400 style="font-size: 0.75em" transform="rotate(-90 20,20)">Expert Analysis</text>
|
|
<text class="fragment" x=-220 y=250 style="font-size: 0.75em" transform="rotate(-90 20,20)">Crowdsourcing</text>
|
|
<text class="fragment" x=-180 y=100 style="font-size: 0.75em" transform="rotate(-90 20,20)">Automation</text>
|
|
</svg>
|
|
<p class="fragment">Can we start with automation and work our way up?</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h2>Mimir!</h2>
|
|
|
|
<p><a href="http://demo.odin.cse.buffalo.edu" target="_blank"><img src="http://odin.cse.buffalo.edu/wp-content/uploads/2015/08/Mimir_Screenshot.png" height="400"/></a></p>
|
|
</section>
|
|
|
|
<section>
|
|
|
|
<h2>Intuitive Uncertainty</h2>
|
|
|
|
<p><b>UB</b>: Ying Yang, Niccolo Meneghetti, <br/> Arindam Nandi, Vinayak Karuppasamy, <br/>Oliver Kennedy, Jan Chomicki</p>
|
|
<p><b>Oracle</b>: Ronny Fehling, Zhen-Hua Liu, Dieter Gawlick</p>
|
|
|
|
<h4>Thanks to Oracle for multiple gifts that make this research possible</h4>
|
|
</section>
|
|
</section>
|
|
|
|
</div></div>
|
|
|
|
<script src="../reveal.js-3.1.0/lib/js/head.min.js"></script>
|
|
<script src="../reveal.js-3.1.0/js/reveal.js"></script>
|
|
|
|
<script>
|
|
|
|
// Full list of configuration options available at:
|
|
// https://github.com/hakimel/../reveal.js#configuration
|
|
Reveal.initialize({
|
|
controls: false,
|
|
progress: true,
|
|
history: true,
|
|
center: true,
|
|
slideNumber: true,
|
|
|
|
transition: 'fade', // none/fade/slide/convex/concave/zoom
|
|
|
|
// Optional ../reveal.js plugins
|
|
dependencies: [
|
|
{ src: '../reveal.js-3.1.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
|
|
{ src: '../reveal.js-3.1.0/plugin/math/math.js',
|
|
condition: function() { return true; },
|
|
mathjax: '../reveal.js-3.1.0/js/MathJax.js'
|
|
},
|
|
{ src: '../reveal.js-3.1.0/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
|
|
{ src: '../reveal.js-3.1.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
|
|
{ src: '../reveal.js-3.1.0/plugin/highlight/highlight.js', async: true, condition: function() { return !!document.querySelector( 'pre code' ); }, callback: function() { hljs.initHighlightingOnLoad(); } },
|
|
{ src: '../reveal.js-3.1.0/plugin/zoom-js/zoom.js', async: true },
|
|
{ src: '../reveal.js-3.1.0/plugin/notes/notes.js', async: true }
|
|
]
|
|
});
|
|
|
|
</script>
|
|
|
|
</body>
|
|
</html>
|