Website/slides/cse4562sp2018/2018-01-29-Intro.html

558 lines
22 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>CSE 4/562 - Spring 2018</title>
<meta name="description" content="CSE 4/562 - Spring 2018">
<meta name="author" content="Oliver Kennedy">
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
<link rel="stylesheet" href="../reveal.js-3.6.0/css/reveal.css">
<link rel="stylesheet" href="ubodin.css" id="theme">
<!-- Code syntax highlighting -->
<link rel="stylesheet" href="../reveal.js-3.6.0/lib/css/zenburn.css">
<!-- Printing and PDF exports -->
<script>
var link = document.createElement( 'link' );
link.rel = 'stylesheet';
link.type = 'text/css';
link.href = window.location.search.match( /print-pdf/gi ) ? '../reveal.js-3.6.0/css/print/pdf.css' : '../reveal.js-3.6.0/css/print/paper.css';
document.getElementsByTagName( 'head' )[0].appendChild( link );
</script>
<!--[if lt IE 9]>
<script src="../reveal.js-3.6.0/lib/js/html5shiv.js"></script>
<![endif]-->
</head>
<body>
<div class="reveal">
<!-- Any section element inside of this container is displayed as a slide -->
<div class="header">
<!-- Any Talk-Specific Header Content Goes Here -->
CSE 4/562 - Database Systems
</div>
<div class="slides">
<section>
<h1>Intro</h1>
<h3>CSE 4/562 Database Systems</h3>
<h5>January 29, 2018</h5>
</section>
<section>
<section>
<h2>Why Are Databases Awesome?</h2>
</section>
<section>
<h2>They're Everywhere</h2>
<img src="graphics/Clipart/SqlitePhone.png">
</section>
<section>
<h2>&dollar;&dollar;&dollar;</h2>
<table class="plainrowheads" style="text-align:center; font-size: small;">
<thead><tr>
<th colspan="2">Rank</th>
<th>Organization</th>
<th>Sales (B$)</th>
<th>FY</th>
<th>Market cap (B$)</th>
<th>Headquarters</th>
</tr></thead><tbody>
<tr class="fragment highlight-blue" data-fragment-index="1">
<td>1</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Microsoft</td><td>86.6</td><td>2017</td><td>601</td><td>Redmond, WA, US</td>
</tr>
<tr class="fragment highlight-blue" data-fragment-index="1">
<td>2</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Oracle</td><td>37.2</td><td>2017</td><td>205</td><td>Redwood City, CA, US</td>
</tr>
<tr class="fragment highlight-blue" data-fragment-index="1">
<td>3</td><td><img style="margin: 0px;" alt="Germany" src="graphics/2018-01-29-23px-Flag_of_Germany.svg.png" width="23" height="14"/></td><td>SAP</td><td>23.2</td><td>2017</td><td>117</td><td>Walldorf, Germany</td>
</tr>
<tr class="fragment highlight-blue" data-fragment-index="1">
<td>4</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Salesforce.com</td><td>8.4</td><td>2017</td><td>69</td><td>San Francisco, CA, US</td>
</tr>
<tr>
<td>5</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>VMware</td><td>6.7</td><td>2017</td><td>48</td><td>Palo Alto, CA, US</td>
</tr>
<tr class="fragment highlight-blue" data-fragment-index="1">
<td>6</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Fiserv</td><td>5.3</td><td>2017</td><td>26</td><td>Brookfield, WI, US</td>
</tr>
<tr>
<td>7</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Adobe Systems</td><td>5</td><td>2017</td><td>84</td><td>San Jose, CA, US</td>
</tr>
<tr>
<td>8</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Symantec</td><td>5.4</td><td>2017</td><td>19</td><td>Mountain View, CA, US</td>
</tr>
<tr class="fragment highlight-blue" data-fragment-index="1">
<td>9</td><td><img style="margin: 0px;" alt="Spain" src="graphics/2018-01-29-23px-Flag_of_Spain.svg.png" width="23" height="15"/></td><td>Amadeus IT Holdings</td><td>4.3</td><td>2017</td><td>25</td><td>Madrid, Spain</td>
</tr>
</tbody><tfoot></tfoot>
</table>
<p class="fragment" data-fragment-index="1">5 of 9 Forbes Top Software Companies<br/>Have a Focus on Data Management Systems</p>
<imagecredits><a href="https://en.wikipedia.org/wiki/List_of_the_largest_software_companies">(Source wikipedia.org)</a></imagecredits>
</section>
<section>
<h2>Interesting Problems</h2>
<img src="graphics/2018-01-29-db_interesting.svg" />
</section>
</section>
<!-- ================================================================ -->
<section>
<section>
<h1>What is "Databases"?
</section>
<section>
<h4 style="margin-bottom: 0px;">How do we ask and answer questions about data?</h4>
<ul style="font-size: smaller" class="tight fragment">
<li>Efficiency</li>
<li>Accuracy</li>
<li>Summarization</li>
<li>Data Curation</li>
</ul>
<h4 style="margin-bottom: 0px; margin-top: 20px;">How do we manipulate and persist data?</h4>
<ul style="font-size: smaller" class="tight fragment">
<li>Consistency</li>
<li>Correctness</li>
<li>Parallelism</li>
</ul>
</section>
<section>
<h3>Databases</h3>
<table>
<tr>
<td style="font-weight: bold; vertical-align: middle;">Techniques</td>
<td>
<p class="fragment">Data Modeling</p>
<p class="fragment">Cost-Based Optimization</p>
</td>
</tr>
<tr>
<td style="font-weight: bold; vertical-align: middle;">Recipes</td>
<td>
<p class="fragment">Join Algorithms</p>
<p class="fragment">Index Data Structures</p>
</td>
</tr>
<tr>
<td style="font-weight: bold; vertical-align: middle;">Knowledge</td>
<td>
<p class="fragment">The Memory Hierarchy</p>
<p class="fragment">Data Consistency</p>
</td>
</tr>
</table>
</section>
<section>
<h2>Which Tools To Use</h2>
<h2 class="fragment">And When?</h2>
</section>
<section>
<h3>Template for 90% of Database Systems</h3>
<p style="font-size: larger;">For <b>X</b>, the best, correct choice is <b>Y</b>, at least when <b>Z</b>.</p>
<ol style="margin-top: 20px;">
<li class="fragment">How do you define <i>Correct</i> and <i>Best</i>?</li>
<li class="fragment">What correct alternatives are available?</li>
<li class="fragment">How do you find the best available alternative</li>
</ol>
</section>
</section>
<!-- ================================================================ -->
<section>
<section>
<h2>General Course Information</h2>
</section>
<section>
<h3>People</h3>
<ul>
<li>Oliver Kennedy (me)</li>
<li>Gokhan Kul (Practicum Lead)</li>
<li>William Spoth (Project TA)</li>
<li>Saurav Singhi (Concept TA)</li>
<li>Carl Nuessle (Ninja)</li>
<li>[ TBD ] </li>
</ul>
</section>
<section>
<h3>Syllabus and Projects</h3>
<p><a href="https://odin.cse.buffalo.edu/teaching/cse-462/">https://odin.cse.buffalo.edu/teaching/cse-462/</a></p>
<p><a href="https://odin.cse.buffalo.edu/teaching/cse-562/">https://odin.cse.buffalo.edu/teaching/cse-562/</a></p>
<p style="font-size: small">(same link)</p>
<h3 style="margin-top: 50px;">Course Forum</h3>
<p><a href="https://piazza.com/buffalo/spring2018/cse4562/home">https://piazza.com/buffalo/spring2018/cse4562/home</a></p>
</section>
<section>
<h3>Course Structure</h3>
<dl>
<dt>Concepts (50% of Grade; Lectures on Mon/Wed)</dt>
<dd><ul class="tight">
<li>Homework (10%; 12-15 Assignments, Keep Best 10)</li>
<li>Midterm (20% or 15%)</li>
<li>Comprehensive Final (20% or 25%)</li>
</ul></dd>
<dt>Practicum (50% of Grade; Lectures on Fri)</dt>
<dd><ul class="tight">
<li>Build a Relational Query Engine</li>
<li>3-Person Group Project</li>
<li>5 "Checkpoints"</li>
</ul></dd>
</dl>
</section>
<section>
<table>
<tr>
<td>
<img src="graphics/Books/DBSystemsHardcover.jpg" height="200px">
</td>
<td>
<img src="graphics/Books/DBSystemsSoftcover.jpg" height="200px">
</td>
</tr>
<tr class="fragment">
<td>$150</td>
<td>$50</td>
</tr>
<tr class="fragment">
<td>Index<br/>ToC</td>
<td>No Index<br/>ToC Summary</td>
</tr>
</table>
</section>
</section>
<!-- ================================================================ -->
<section>
<section>
<h3>Embedded Databases</h3>
<ul>
<li>SQLite (In your browser, computer, phone, fridge...)</li>
<li>Simple, Easy-To-Use Declarative Data Management</li>
<li>Critical for future tech: Part of Mobile, IoT, Web</li>
</ul>
<p class="fragment"><b>Your Startup:</b> Build the next great <i>Embedded Database</i></p>
</section>
<section>
<h3>We give you...</h3>
<p style="font-size: smaller;">Data (CSV Files)</p>
<p style="font-size: smaller;">Schema Information (CREATE TABLE)</p>
<p style="font-size: smaller;">Questions (SQL Queries)</p>
<div class="fragment">
<hr/>
<h3>You give us...</h3>
<p style="font-size: smaller;">Answers</p>
<p style="font-size: smaller;" class="fragment">(really really fast)</p>
</div>
</section>
<section>
<h3>Real World Challenge</h3>
<p>You get graded on your code's...</p>
<dl>
<dt>Correctness</dt>
<dd>~1/3 credit for getting the right answer.</dd>
<dt>Performance</dt>
<dd>~2/3 credit for getting it reasonably fast.</dd>
</dl>
</section>
<section>
<svg data-src="graphics/2018-01-29-project-overview.svg" class="stretch"/>
</section>
<section>
<h3>Checkpoint 0: "Hello World"</h3>
<p style="font-size: smaller;">5/50 pts</p>
<ul>
<li>Form groups</li>
<li>Submit a simple Java program</li>
<li>Make sure that the submission workflow works for you.</li>
</ul>
</section>
<section>
<h3>Checkpoint 1: "Intro to CSV"</h3>
<p style="font-size: smaller;">10/50 pts</p>
<ul>
<li>Parse SQL with JSQLParser</li>
<li>Load CSV Files</li>
<li>Project (Map) Data</li>
<li>Select (Filter) Data</li>
</ul>
</section>
<section>
<h3>Checkpoint 2: "Real SQL"</h3>
<p style="font-size: smaller;">10/50 pts</p>
<ul>
<li>Joins</li>
<li>Order By</li>
<li>Limit</li>
<li>Nested Queries</li>
<li>Interactive Prompt</li>
</ul>
</section>
<section>
<h3>Checkpoint 3: "Optimization"</h3>
<p style="font-size: smaller;">15/50 pts</p>
<ul>
<li>Aggregation</li>
<li>Actual Data (no naive algorithms)</li>
</ul>
</section>
<section>
<h3>Checkpoint 4: "The Real World"</h3>
<p style="font-size: smaller;">10/50 pts</p>
<ul>
<li>Too much data for memory</li>
<li>Time for precomputation</li>
</ul>
</section>
</section>
<!-- ================================================================ -->
<section>
<section>
<h3>Ways to Fail</h3>
<ul>
<li>Start your project at the last minute</li>
<li>Dont go to office hours</li>
<li>Dont ask questions on Piazza</li>
<li>Wait until the deadline to submit for the first time</li>
<li class="fragment">Cheat</li>
</ul>
</section>
<section>
<img src="graphics/Clipart/Graargh.png">
</section>
<section>
<h3>Academic Integrity</h3>
<p>Cheating is submitting any work that you did not perform by yourself <span class="fragment highlight-red">as if you did</span>.</p>
</section>
<section>
<dl>
<dt>References (when cited)</dt>
<dd>Wikipedia, Wikibooks (or similar): <span style="color: #00882B; font-weight: bold;">OK</span></dd>
<dt>Public Code</dt>
<dd>Stack Exchange (or similar): <span style="color: #C82506; font-weight: bold;">Not OK</span></dd>
<dt><i>Discussing</i> concepts/ideas with classmates</dt>
<dd>“A hash index has O(1) lookups”: <span style="color: #00882B; font-weight: bold;">OK</span> <span style="color: #C82506; font-size: small;">(except during exams 😇 )</span></dd>
<dt><i>Sharing</i> code or answers with anyone</dt>
<dd>“Just have a look at how I implemented it”: <span style="color: #C82506; font-weight: bold;">NOT OK</span></dd>
<dd>For-hire code: <span style="color: #C82506; font-weight: bold;">NOT OK</span></dd>
</dl>
<p>
</section>
<section>
<h3>MOSS</h3>
<img src="graphics/2018-01-29-MOSS.png" height="400px"/>
</section>
<section>
<h3>MOSS</h3>
<img src="graphics/2018-01-29-MOSSDetails.png" height="400px"/>
</section>
<section>
<dl>
<dt>Zero Tolerance</dt>
<dd>If I catch you submitting someone elses code, <span style="font-weight: bold; text-decoration: underline;">you will fail the class</span>.</dd>
<dt>Group Responsibility</dt>
<dd>If your teammate cheats on a group project, <span style="font-weight: bold; text-decoration: underline;">the entire group will be penalized.</span></dd>
<dt>Share Code, Share Blame</dt>
<dd>If someone else submits your code as their own, <span style="font-weight: bold; text-decoration: underline;">you will be penalized as well.</span></dd>
</dl>
</section>
<section>
<h3>Questions/Concerns?</h3>
</section>
</section>
<!-- ================================================================ -->
<section>
<section>
<h2>What does a Data Management System Do?</h2>
</section>
<section>
<dl>
<dt>Analysis: Answering user-provided questions about data</dt>
<dd class="fragment" data-fragment-index="1">What kind of tools can we give end-users? <ul class="tight">
<li class="fragment">Declarative Languages</li>
<li class="fragment">Organizational Datastructures (e.g., Indexes)</li>
</ul></dd>
<dt>Manipulation: Safely persisting and sharing data updates</dt>
<dd class="fragment" data-fragment-index="1">What kind of tools can we give end-users?<ul class="tight">
<li class="fragment">Consistency Primitives</li>
<li class="fragment">Data Validation Primitives</li>
</ul></dd>
</dl>
</section>
<section>
<svg data-src="graphics/2018-01-29-fs_vs_db.svg" />
</section>
<section>
<h2>So let's talk structure...</h2>
</section>
<section>
<dl style="font-size: smaller;">
<dt>Primitive</dt>
<dd>Basic building blocks like Int, Float, Char, String</dd>
<dt>Tuple</dt>
<dd>Several fields of different types. (N-Tuple = N fields)</dd>
<dd>A Tuple has a schema defining each field</dd>
<dt>Set</dt>
<dd>A collection of unique records, all of the same type</dd>
<dt>Bag</dt>
<dd>An unordered collection of records, all of the same type</dd>
<dt>List</dt>
<dd>An ordered collection of records, all of the same type</dd>
</dl>
</section>
<section>
<svg data-src="graphics/2018-01-29-rel-schemas.svg" />
</section>
<section>
<p>
Your data is currently an <i>Unordered Set</i> <br/>
of <i>Tuples</i> with 100 fields each.
</p>
<p class="fragment" style="margin-top: 50px;">
Tomorrow, youll be repeatedly asked for <i>1 specific attribute</i><br/>
of <i>5 specific rows</i> identified by the <i>first attribute</i>
</p>
<h3 class="fragment">Can you do better?</p>
</section>
<section>
<p><b>Better Idea</b>: Rewrite data into a 99-Tuple of Maps keyed on the 1st attribute</p>
<p class="fragment" style="margin-top: 50px;">This representation is <u>equivalent</u> and <u>better</u> for your needs.</p>
<p class="fragment" style="margin-top: 50px; font-weight: bold;">Declarative specifications make it easier to find equivalences.</p>
</section>
</section>
</div></div>
<script src="../reveal.js-3.6.0/lib/js/head.min.js"></script>
<script src="../reveal.js-3.6.0/js/reveal.js"></script>
<script>
// Full list of configuration options available at:
// https://github.com/hakimel/../reveal.js#configuration
Reveal.initialize({
controls: true,
progress: true,
history: true,
center: true,
slideNumber: true,
transition: 'fade', // none/fade/slide/convex/concave/zoom
chart: {
defaults: {
global: {
title: { fontColor: "#333", fontSize: 24 },
legend: {
labels: { fontColor: "#333", fontSize: 20 },
},
responsiveness: true
},
scale: {
scaleLabel: { fontColor: "#333", fontSize: 20 },
gridLines: { color: "#333", zeroLineColor: "#333" },
ticks: { fontColor: "#333", fontSize: 16 },
}
},
line: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ], "borderDash": [ [5,10], [0,0] ]},
bar: { backgroundColor: [
"rgba(220,220,220,0.8)",
"rgba(151,187,205,0.8)",
"rgba(205,151,187,0.8)",
"rgba(187,205,151,0.8)"
]
},
pie: { backgroundColor: [ ["rgba(0,0,0,.8)" , "rgba(220,20,20,.8)", "rgba(20,220,20,.8)", "rgba(220,220,20,.8)", "rgba(20,20,220,.8)"] ]},
radar: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ]},
},
// Optional ../reveal.js plugins
dependencies: [
{ src: '../reveal.js-3.6.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
{ src: '../reveal.js-3.6.0/plugin/math/math.js',
condition: function() { return true; },
mathjax: '../reveal.js-3.6.0/js/MathJax.js'
},
{ src: '../reveal.js-3.6.0/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../reveal.js-3.6.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../reveal.js-3.6.0/plugin/highlight/highlight.js', async: true, condition: function() { return !!document.querySelector( 'pre code' ); }, callback: function() { hljs.initHighlightingOnLoad(); } },
{ src: '../reveal.js-3.6.0/plugin/zoom-js/zoom.js', async: true },
{ src: '../reveal.js-3.6.0/plugin/notes/notes.js', async: true },
// Chart.min.js
{ src: '../reveal.js-3.6.0/plugin/chart/Chart.min.js'},
// the plugin
{ src: '../reveal.js-3.6.0/plugin/chart/csv2chart.js'},
{ src: '../reveal.js-3.6.0/plugin/svginline/es6-promise.auto.js', async: false },
{ src: '../reveal.js-3.6.0/plugin/svginline/data-src-svg.js', async: false }
]
});
</script>
</body>
</html>