Website/slides/cse4562sp2019/2019-01-28-Intro.html
2019-01-28 09:19:16 -05:00

571 lines
22 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>CSE 4/562</title>
<meta name="description" content="CSE 4/562">
<meta name="author" content="Oliver Kennedy">
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
<link rel="stylesheet" href="../reveal.js-3.6.0/css/reveal.css">
<link rel="stylesheet" href="ubodin.css" id="theme">
<!-- Code syntax highlighting -->
<link rel="stylesheet" href="../reveal.js-3.6.0/lib/css/zenburn.css">
<!-- Printing and PDF exports -->
<script>
var link = document.createElement( 'link' );
link.rel = 'stylesheet';
link.type = 'text/css';
link.href = window.location.search.match( /print-pdf/gi ) ? '../reveal.js-3.6.0/css/print/pdf.css' : '../reveal.js-3.6.0/css/print/paper.css';
document.getElementsByTagName( 'head' )[0].appendChild( link );
</script>
<!--[if lt IE 9]>
<script src="../reveal.js-3.6.0/lib/js/html5shiv.js"></script>
<![endif]-->
</head>
<body>
<div class="reveal">
<!-- Any section element inside of this container is displayed as a slide -->
<div class="header">
<!-- Any Talk-Specific Header Content Goes Here -->
CSE 4/562 - Database Systems
</div>
<div class="slides">
<section>
<h1>Intro</h1>
<h3>CSE 4/562 Database Systems</h3>
<h5>January 28, 2019</h5>
</section>
<section>
<section>
<h2>Why Are Databases Awesome?</h2>
</section>
<section>
<h2>They're Everywhere</h2>
<img src="graphics/Clipart/SqlitePhone.png">
</section>
<section>
<h2>&dollar;&dollar;&dollar;</h2>
<table class="plainrowheads" style="text-align:center; font-size: small;">
<thead><tr>
<th colspan="2">Rank</th>
<th>Organization</th>
<th>Sales (B$)</th>
<th>FY</th>
<th>Market cap (B$)</th>
<th>Headquarters</th>
</tr></thead><tbody>
<tr class="fragment highlight-blue" data-fragment-index="1">
<td>1</td><td><img style="margin: 0px;" alt="United States" src="../cse4562sp2018/graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Microsoft</td><td>86.6</td><td>2017</td><td>601</td><td>Redmond, WA, US</td>
</tr>
<tr class="fragment highlight-blue" data-fragment-index="1">
<td>2</td><td><img style="margin: 0px;" alt="United States" src="../cse4562sp2018/graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Oracle</td><td>37.2</td><td>2017</td><td>205</td><td>Redwood City, CA, US</td>
</tr>
<tr class="fragment highlight-blue" data-fragment-index="1">
<td>3</td><td><img style="margin: 0px;" alt="Germany" src="../cse4562sp2018/graphics/2018-01-29-23px-Flag_of_Germany.svg.png" width="23" height="14"/></td><td>SAP</td><td>23.2</td><td>2017</td><td>117</td><td>Walldorf, Germany</td>
</tr>
<tr class="fragment highlight-blue" data-fragment-index="1">
<td>4</td><td><img style="margin: 0px;" alt="United States" src="../cse4562sp2018/graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Salesforce.com</td><td>8.4</td><td>2017</td><td>69</td><td>San Francisco, CA, US</td>
</tr>
<tr>
<td>5</td><td><img style="margin: 0px;" alt="United States" src="../cse4562sp2018/graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>VMware</td><td>6.7</td><td>2017</td><td>48</td><td>Palo Alto, CA, US</td>
</tr>
<tr class="fragment highlight-blue" data-fragment-index="1">
<td>6</td><td><img style="margin: 0px;" alt="United States" src="../cse4562sp2018/graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Fiserv</td><td>5.3</td><td>2017</td><td>26</td><td>Brookfield, WI, US</td>
</tr>
<tr>
<td>7</td><td><img style="margin: 0px;" alt="United States" src="../cse4562sp2018/graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Adobe Systems</td><td>5</td><td>2017</td><td>84</td><td>San Jose, CA, US</td>
</tr>
<tr>
<td>8</td><td><img style="margin: 0px;" alt="United States" src="../cse4562sp2018/graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Symantec</td><td>5.4</td><td>2017</td><td>19</td><td>Mountain View, CA, US</td>
</tr>
<tr class="fragment highlight-blue" data-fragment-index="1">
<td>9</td><td><img style="margin: 0px;" alt="Spain" src="../cse4562sp2018/graphics/2018-01-29-23px-Flag_of_Spain.svg.png" width="23" height="15"/></td><td>Amadeus IT Holdings</td><td>4.3</td><td>2017</td><td>25</td><td>Madrid, Spain</td>
</tr>
</tbody><tfoot></tfoot>
</table>
<p class="fragment" data-fragment-index="1">5 of 9 Forbes Top Software Companies<br/>Have a Focus on Data Management Systems</p>
<imagecredits><a href="https://en.wikipedia.org/wiki/List_of_the_largest_software_companies">(Source wikipedia.org)</a></imagecredits>
</section>
<section>
<h2>Interesting Problems</h2>
<img src="../cse4562sp2018/graphics/2018-01-29-db_interesting.svg" />
</section>
</section>
<!-- ================================================================ -->
<section>
<section>
<h1>What is "Databases"?
</section>
<section>
<h3 style="padding-bottom: 60px;">How do we ask and answer questions about data?</h3>
<h3>How do we manipulate and persist data?</h3>
</section>
<section>
<h3>Databases</h3>
<table>
<tr>
<td style="font-weight: bold; vertical-align: middle;">Techniques</td>
<td>
<p class="fragment">Data Modeling</p>
<p class="fragment">Cost-Based Optimization</p>
</td>
</tr>
<tr>
<td style="font-weight: bold; vertical-align: middle;">Recipes</td>
<td>
<p class="fragment">Join Algorithms</p>
<p class="fragment">Index Data Structures</p>
</td>
</tr>
<tr>
<td style="font-weight: bold; vertical-align: middle;">Knowledge</td>
<td>
<p class="fragment">The Memory Hierarchy</p>
<p class="fragment">Data Consistency</p>
</td>
</tr>
</table>
</section>
<section>
<h2>Which Tools To Use</h2>
<h2 class="fragment">And When?</h2>
</section>
<section>
<h3>Template for 90% of this class</h3>
<p style="font-size: larger;">What is the best, correct technique for task <b>X</b>, when <b>Y</b> is true?</p>
<ol style="margin-top: 20px;">
<li class="fragment">How do you define <i>Correct</i> and <i>Best</i>?</li>
<li class="fragment">What correct alternatives are available?</li>
<li class="fragment">How do you find the best available alternative</li>
</ol>
</section>
</section>
<!-- ================================================================ -->
<section>
<section>
<h2>General Course Information</h2>
</section>
<section>
<h3>Expectations</h3>
<dl>
<div class="fragment">
<dt>Algorithms / Data Structures</dt>
<dd>$O(\cdot)$ Analysis, Sort Algos, Trees, Hash Tables</dd>
</div>
<div class="fragment">
<dt>How to <i>use</i> a database</dt>
<dd>CSE 4/560 (or equivalent)</dd>
</div>
<div class="fragment">
<dt>Java (or Scala)</dt>
<dd>This means actual programming experience</dd>
<dd class="fragment">(C++, C# or similar is usually good enough)</dd>
</div>
</dl>
</section>
<section>
<h3>Me</h3>
Oliver Kennedy
<h3 style="padding-top: 50px;">TAs</h3>
<span style="margin: 20px;">Vicky Zheng</span>
<span style="margin: 20px;">Qiuling Suo</span>
<h3 style="padding-top: 50px;">Ninjas</h3>
<span style="margin: 20px;">William Spoth</span>
<span style="margin: 20px;">Darshana Balakrishnan</span>
<span style="margin: 20px;">Carl Nuessle</span>
</section>
<section>
<h3>Syllabus and Projects</h3>
<p><a href="https://odin.cse.buffalo.edu/teaching/cse-462/">https://odin.cse.buffalo.edu/teaching/cse-462/</a></p>
<p><a href="https://odin.cse.buffalo.edu/teaching/cse-562/">https://odin.cse.buffalo.edu/teaching/cse-562/</a></p>
<p style="font-size: small">(same link)</p>
<h3 style="margin-top: 50px;">Course Forum</h3>
<p><a href="https://piazza.com/buffalo/spring2018/cse4562/home">https://piazza.com/buffalo/spring2019/cse4562/home</a></p>
</section>
<section>
<table>
<tr>
<td>
<img src="graphics/Books/DBSystemsHardcover.jpg" height="200px">
</td>
<td>
<img src="graphics/Books/DBSystemsSoftcover.jpg" height="200px">
</td>
</tr>
<tr class="fragment">
<td>$150</td>
<td>$50</td>
</tr>
<tr class="fragment">
<td>Index<br/>ToC</td>
<td>No Index<br/>ToC Summary</td>
</tr>
</table>
</section>
<section>
<h3>Course Structure</h3>
<dl>
<dt>Concepts (50% of Grade)</dt>
<dd><ul class="tight">
<li>Homework (10%; ~12 Assignments, Drop any 4)</li>
<li><b>March 13</b>: Midterm (20%<span class="fragment" data-fragment-index="1"> or 15%</span>)</li>
<li><b>May 17?</b>: Comprehensive Final (20%<span class="fragment" data-fragment-index="1"> or 25%</span>)</li>
</ul></dd>
<dt>Practicum (50% of Grade)</dt>
<dd><ul class="tight">
<li>Build a Relational Query Engine</li>
<li>3-Person Group Project</li>
<li>4 Checkpoints (+ 5 free points for Checkpoint 0)</li>
</ul></dd>
</dl>
</section>
</section>
<!-- ================================================================ -->
<section>
<section>
<h3>Embedded Databases</h3>
<ul>
<li>SQLite (In your browser, computer, phone, fridge...)</li>
<li>Simple, Easy-To-Use Declarative Data Management</li>
<li>Critical for future tech: Part of Mobile, IoT, Web</li>
</ul>
<p class="fragment"><b>Your Startup:</b> Build the next great <i>Embedded Database</i></p>
</section>
<section>
<h3>We give you...</h3>
<p style="font-size: smaller;">Data (CSV Files)</p>
<p style="font-size: smaller;">Schema Information (CREATE TABLE)</p>
<p style="font-size: smaller;">Questions (SQL Queries)</p>
<div class="fragment">
<hr/>
<h3>You give us...</h3>
<p style="font-size: smaller;">Answers</p>
<p style="font-size: smaller;" class="fragment">(really really fast)</p>
</div>
</section>
<section>
<h3>Real World Challenge</h3>
<p>You get graded on your code's...</p>
<dl>
<dt>Correctness</dt>
<dd>~1/3 credit for getting the right answer.</dd>
<dt>Performance</dt>
<dd>~2/3 credit for getting it reasonably fast.</dd>
</dl>
</section>
<section>
<svg data-src="graphics/2019-01-28-project-overview.svg" class="stretch"/>
</section>
<section>
<h3>Checkpoint 0: "Hello World"</h3>
<p style="font-size: smaller;">5/50 pts</p>
<ul>
<li>Form groups</li>
<li>Submit a simple Java program</li>
<li>Make sure that the submission workflow works for you.</li>
</ul>
</section>
<section>
<h3>Checkpoint 1: "Get it Working"</h3>
<p style="font-size: smaller;">10/50 pts</p>
<ul>
<li>Translate SQL to Relational Algebra</li>
<li>Load CSV Files</li>
<li>Run Basic Select, Project, Join Queries</li>
</ul>
</section>
<section>
<h3>Checkpoint 2: "Big Data"</h3>
<p style="font-size: smaller;">10/50 pts</p>
<ul>
<li>Order By</li>
<li>Limit</li>
<li>Nested Queries</li>
<li>Aggregation</li>
<li>Too much data for memory</li>
</ul>
</section>
<section>
<h3>Checkpoint 3: "Precomputation"</h3>
<p style="font-size: smaller;">15/50 pts</p>
<ul>
<li>You get a few minutes to pre-compute</li>
<li>Load data</li>
<li>Cache views</li>
<li>Build indexes</li>
</ul>
</section>
<section>
<h3>Checkpoint 4: "The Real World"</h3>
<p style="font-size: smaller;">10/50 pts</p>
<ul>
<li>We give you a Buffer Manager</li>
<li>Dynamic, skewed workload</li>
</ul>
</section>
</section>
<!-- ================================================================ -->
<section>
<section>
<h3>Ways to Fail</h3>
<ul>
<li>Start your project at the last minute</li>
<li>Dont go to office hours</li>
<li>Dont ask questions on Piazza</li>
<li>Wait until the deadline to submit for the first time</li>
<li class="fragment">Cheat</li>
</ul>
</section>
<section>
<img src="graphics/Clipart/Graargh.png">
</section>
<section>
<h3>Academic Integrity</h3>
<p>Cheating is submitting any work that you did not perform by yourself as if you did.</p>
</section>
<section>
<dl>
<dt>References (when cited)</dt>
<dd>Wikipedia, Wikibooks (or similar): <span style="color: #00882B; font-weight: bold;">OK</span></dd>
<dt>Public Code</dt>
<dd>Stack Exchange (or similar): <span style="color: #C82506; font-weight: bold;">Not OK</span></dd>
<dt><i>Discussing</i> concepts/ideas with classmates</dt>
<dd>“A hash index has O(1) lookups”: <span style="color: #00882B; font-weight: bold;">OK</span> <span style="color: #C82506; font-size: small;">(except during exams 😇 )</span></dd>
<dt><i>Sharing</i> code or answers with anyone</dt>
<dd>“Just have a look at how I implemented it”: <span style="color: #C82506; font-weight: bold;">NOT OK</span></dd>
<dd>For-hire code: <span style="color: #C82506; font-weight: bold;">NOT OK</span></dd>
</dl>
<p>
</section>
<section>
<h3>MOSS</h3>
<img src="../cse4562sp2018/graphics/2018-01-29-MOSS.png" height="400px"/>
</section>
<section>
<h3>MOSS</h3>
<img src="../cse4562sp2018/graphics/2018-01-29-MOSSDetails.png" height="400px"/>
</section>
<section>
<dl>
<dt>Zero Tolerance</dt>
<dd>If I catch you submitting someone elses code (including pay-for-code services), <span style="font-weight: bold; text-decoration: underline;">you will fail the class</span>.</dd>
<dt>Group Responsibility</dt>
<dd>If your teammate cheats on a group project, <span style="font-weight: bold; text-decoration: underline;">the entire group will be penalized.</span></dd>
<dt>Share Code, Share Blame</dt>
<dd>If someone else submits your code as their own, <span style="font-weight: bold; text-decoration: underline;">you will be penalized as well.</span></dd>
</dl>
</section>
<section>
<h3>Questions/Concerns?</h3>
</section>
</section>
<!-- ================================================================ -->
<section>
<section>
<h2>What does a Data Management System Do?</h2>
</section>
<section>
<dl>
<dt>Analysis: Answering user-provided questions about data</dt>
<dd class="fragment" data-fragment-index="1">What kind of tools can we give end-users? <ul class="tight">
<li class="fragment">Declarative Languages</li>
<li class="fragment">Organizational Datastructures (e.g., Indexes)</li>
</ul></dd>
<dt>Manipulation: Safely persisting and sharing data updates</dt>
<dd class="fragment" data-fragment-index="1">What kind of tools can we give end-users?<ul class="tight">
<li class="fragment">Consistency Primitives</li>
<li class="fragment">Data Validation Primitives</li>
</ul></dd>
</dl>
</section>
<section>
<svg data-src="../cse4562sp2018/graphics/2018-01-29-fs_vs_db.svg" />
</section>
<section>
<h2>So let's talk structure...</h2>
</section>
<section>
<dl style="font-size: smaller;">
<dt>Primitive</dt>
<dd>Basic building blocks like Int, Float, Char, String</dd>
<dt>Tuple</dt>
<dd>Several fields of different types. (N-Tuple = N fields)</dd>
<dd>A Tuple has a schema defining each field</dd>
<dt>Set</dt>
<dd>A collection of unique records, all of the same type</dd>
<dt>Bag</dt>
<dd>An unordered collection of records, all of the same type</dd>
<dt>List</dt>
<dd>An ordered collection of records, all of the same type</dd>
</dl>
</section>
<section>
<svg data-src="../cse4562sp2018/graphics/2018-01-29-rel-schemas.svg" />
</section>
<section>
<p>
Your data is currently an <i>Unordered Set</i> <br/>
of <i>Tuples</i> with 100 fields each.
</p>
<p class="fragment" style="margin-top: 50px;">
Tomorrow, youll be repeatedly asked for <i>1 specific attribute</i><br/>
of <i>5 specific rows</i> identified by the <i>first attribute</i>
</p>
<h3 class="fragment">Can you do better?</p>
</section>
<section>
<p><b>Better Idea</b>: Rewrite data into a 99-Tuple of Maps keyed on the 1st attribute</p>
<p class="fragment" style="margin-top: 50px;">This representation is <u>equivalent</u> and <u>better</u> for your needs.</p>
<p class="fragment" style="margin-top: 50px; font-weight: bold;">Declarative specifications make it easier to find equivalences.</p>
</section>
</section>
</div></div>
<script src="../reveal.js-3.6.0/lib/js/head.min.js"></script>
<script src="../reveal.js-3.6.0/js/reveal.js"></script>
<script>
// Full list of configuration options available at:
// https://github.com/hakimel/../reveal.js#configuration
Reveal.initialize({
controls: true,
progress: true,
history: true,
center: true,
slideNumber: true,
transition: 'fade', // none/fade/slide/convex/concave/zoom
chart: {
defaults: {
global: {
title: { fontColor: "#333", fontSize: 24 },
legend: {
labels: { fontColor: "#333", fontSize: 20 },
},
responsiveness: true
},
scale: {
scaleLabel: { fontColor: "#333", fontSize: 20 },
gridLines: { color: "#333", zeroLineColor: "#333" },
ticks: { fontColor: "#333", fontSize: 16 },
}
},
line: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ], "borderDash": [ [5,10], [0,0] ]},
bar: { backgroundColor: [
"rgba(220,220,220,0.8)",
"rgba(151,187,205,0.8)",
"rgba(205,151,187,0.8)",
"rgba(187,205,151,0.8)"
]
},
pie: { backgroundColor: [ ["rgba(0,0,0,.8)" , "rgba(220,20,20,.8)", "rgba(20,220,20,.8)", "rgba(220,220,20,.8)", "rgba(20,20,220,.8)"] ]},
radar: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ]},
},
// Optional ../reveal.js plugins
dependencies: [
{ src: '../reveal.js-3.6.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
{ src: '../reveal.js-3.6.0/plugin/math/math.js',
condition: function() { return true; },
mathjax: '../reveal.js-3.6.0/js/MathJax.js'
},
{ src: '../reveal.js-3.6.0/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../reveal.js-3.6.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../reveal.js-3.6.0/plugin/highlight/highlight.js', async: true, condition: function() { return !!document.querySelector( 'pre code' ); }, callback: function() { hljs.initHighlightingOnLoad(); } },
{ src: '../reveal.js-3.6.0/plugin/zoom-js/zoom.js', async: true },
{ src: '../reveal.js-3.6.0/plugin/notes/notes.js', async: true },
// Chart.min.js
{ src: '../reveal.js-3.6.0/plugin/chart/Chart.min.js'},
// the plugin
{ src: '../reveal.js-3.6.0/plugin/chart/csv2chart.js'},
{ src: '../reveal.js-3.6.0/plugin/svginline/es6-promise.auto.js', async: false },
{ src: '../reveal.js-3.6.0/plugin/svginline/data-src-svg.js', async: false }
]
});
</script>
</body>
</html>