
558 lines
22 KiB
Raw Normal View History

2018-01-29 08:38:48 -05:00
<!doctype html>
<html lang="en">
<meta charset="utf-8">
<title>CSE 4/562 - Spring 2018</title>
<meta name="description" content="CSE 4/562 - Spring 2018">
<meta name="author" content="Oliver Kennedy">
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
<link rel="stylesheet" href="../reveal.js-3.6.0/css/reveal.css">
<link rel="stylesheet" href="ubodin.css" id="theme">
<!-- Code syntax highlighting -->
<link rel="stylesheet" href="../reveal.js-3.6.0/lib/css/zenburn.css">
<!-- Printing and PDF exports -->
var link = document.createElement( 'link' );
link.rel = 'stylesheet';
link.type = 'text/css';
link.href = /print-pdf/gi ) ? '../reveal.js-3.6.0/css/print/pdf.css' : '../reveal.js-3.6.0/css/print/paper.css';
document.getElementsByTagName( 'head' )[0].appendChild( link );
<!--[if lt IE 9]>
<script src="../reveal.js-3.6.0/lib/js/html5shiv.js"></script>
<div class="reveal">
<!-- Any section element inside of this container is displayed as a slide -->
<div class="header">
<!-- Any Talk-Specific Header Content Goes Here -->
CSE 4/562 - Database Systems
<div class="slides">
<h3>CSE 4/562 Database Systems</h3>
<h5>January 29, 2018</h5>
2018-01-29 10:48:33 -05:00
<h2>Why Are Databases Awesome?</h2>
<h2>They're Everywhere</h2>
<img src="graphics/Clipart/SqlitePhone.png">
2018-01-29 08:38:48 -05:00
<table class="plainrowheads" style="text-align:center; font-size: small;">
<th colspan="2">Rank</th>
<th>Sales (B$)</th>
<th>Market cap (B$)</th>
<tr class="fragment highlight-blue" data-fragment-index="1">
2018-01-29 10:48:33 -05:00
<td>1</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Microsoft</td><td>86.6</td><td>2017</td><td>601</td><td>Redmond, WA, US</td>
2018-01-29 08:38:48 -05:00
<tr class="fragment highlight-blue" data-fragment-index="1">
2018-01-29 10:48:33 -05:00
<td>2</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Oracle</td><td>37.2</td><td>2017</td><td>205</td><td>Redwood City, CA, US</td>
2018-01-29 08:38:48 -05:00
<tr class="fragment highlight-blue" data-fragment-index="1">
2018-01-29 10:48:33 -05:00
<td>3</td><td><img style="margin: 0px;" alt="Germany" src="graphics/2018-01-29-23px-Flag_of_Germany.svg.png" width="23" height="14"/></td><td>SAP</td><td>23.2</td><td>2017</td><td>117</td><td>Walldorf, Germany</td>
2018-01-29 08:38:48 -05:00
<tr class="fragment highlight-blue" data-fragment-index="1">
2018-01-29 10:48:33 -05:00
<td>4</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td></td><td>8.4</td><td>2017</td><td>69</td><td>San Francisco, CA, US</td>
2018-01-29 08:38:48 -05:00
2018-01-29 10:48:33 -05:00
<td>5</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>VMware</td><td>6.7</td><td>2017</td><td>48</td><td>Palo Alto, CA, US</td>
2018-01-29 08:38:48 -05:00
<tr class="fragment highlight-blue" data-fragment-index="1">
2018-01-29 10:48:33 -05:00
<td>6</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Fiserv</td><td>5.3</td><td>2017</td><td>26</td><td>Brookfield, WI, US</td>
2018-01-29 08:38:48 -05:00
2018-01-29 10:48:33 -05:00
<td>7</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Adobe Systems</td><td>5</td><td>2017</td><td>84</td><td>San Jose, CA, US</td>
2018-01-29 08:38:48 -05:00
2018-01-29 10:48:33 -05:00
<td>8</td><td><img style="margin: 0px;" alt="United States" src="graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width="23" height="12"/></td><td>Symantec</td><td>5.4</td><td>2017</td><td>19</td><td>Mountain View, CA, US</td>
2018-01-29 08:38:48 -05:00
<tr class="fragment highlight-blue" data-fragment-index="1">
2018-01-29 10:48:33 -05:00
<td>9</td><td><img style="margin: 0px;" alt="Spain" src="graphics/2018-01-29-23px-Flag_of_Spain.svg.png" width="23" height="15"/></td><td>Amadeus IT Holdings</td><td>4.3</td><td>2017</td><td>25</td><td>Madrid, Spain</td>
2018-01-29 08:38:48 -05:00
<p class="fragment" data-fragment-index="1">5 of 9 Forbes Top Software Companies<br/>Have a Focus on Data Management Systems</p>
<imagecredits><a href="">(Source</a></imagecredits>
<h2>Interesting Problems</h2>
<img src="graphics/2018-01-29-db_interesting.svg" />
<!-- ================================================================ -->
2018-01-29 10:48:33 -05:00
<h1>What is "Databases"?
2018-01-29 08:38:48 -05:00
<h4 style="margin-bottom: 0px;">How do we ask and answer questions about data?</h4>
2018-01-29 10:48:33 -05:00
<ul style="font-size: smaller" class="tight fragment">
<li>Data Curation</li>
2018-01-29 08:38:48 -05:00
<h4 style="margin-bottom: 0px; margin-top: 20px;">How do we manipulate and persist data?</h4>
2018-01-29 10:48:33 -05:00
<ul style="font-size: smaller" class="tight fragment">
2018-01-29 08:38:48 -05:00
<td style="font-weight: bold; vertical-align: middle;">Techniques</td>
<p class="fragment">Data Modeling</p>
<p class="fragment">Cost-Based Optimization</p>
<td style="font-weight: bold; vertical-align: middle;">Recipes</td>
<p class="fragment">Join Algorithms</p>
<p class="fragment">Index Data Structures</p>
<td style="font-weight: bold; vertical-align: middle;">Knowledge</td>
<p class="fragment">The Memory Hierarchy</p>
<p class="fragment">Data Consistency</p>
2018-01-29 10:48:33 -05:00
<h2>Which Tools To Use</h2>
<h2 class="fragment">And When?</h2>
<h3>Template for 90% of Database Systems</h3>
<p style="font-size: larger;">For <b>X</b>, the best, correct choice is <b>Y</b>, at least when <b>Z</b>.</p>
<ol style="margin-top: 20px;">
<li class="fragment">How do you define <i>Correct</i> and <i>Best</i>?</li>
<li class="fragment">What correct alternatives are available?</li>
<li class="fragment">How do you find the best available alternative</li>
2018-01-29 08:38:48 -05:00
<!-- ================================================================ -->
<h2>General Course Information</h2>
<li>Oliver Kennedy (me)</li>
<li>Gokhan Kul (Practicum Lead)</li>
<li>William Spoth (Project TA)</li>
<li>Saurav Singhi (Concept TA)</li>
2018-01-29 10:48:33 -05:00
<li>Carl Nuessle (Ninja)</li>
<li>[ TBD ] </li>
2018-01-29 08:38:48 -05:00
<h3>Syllabus and Projects</h3>
2018-01-29 10:48:33 -05:00
<p><a href=""></a></p>
2018-01-29 08:38:48 -05:00
<p><a href=""></a></p>
2018-01-29 10:48:33 -05:00
<p style="font-size: small">(same link)</p>
2018-01-29 08:38:48 -05:00
<h3 style="margin-top: 50px;">Course Forum</h3>
<p><a href=""></a></p>
<h3>Course Structure</h3>
2018-01-29 10:48:33 -05:00
<dt>Concepts (50% of Grade; Lectures on Mon/Wed)</dt>
<dd><ul class="tight">
2018-01-29 08:38:48 -05:00
<li>Homework (10%; 12-15 Assignments, Keep Best 10)</li>
<li>Midterm (20% or 15%)</li>
<li>Comprehensive Final (20% or 25%)</li>
2018-01-29 10:48:33 -05:00
<dt>Practicum (50% of Grade; Lectures on Fri)</dt>
<dd><ul class="tight">
2018-01-29 08:38:48 -05:00
<li>Build a Relational Query Engine</li>
<li>3-Person Group Project</li>
<li>5 "Checkpoints"</li>
2018-01-29 16:40:42 -05:00
<img src="graphics/Books/DBSystemsHardcover.jpg" height="200px">
<img src="graphics/Books/DBSystemsSoftcover.jpg" height="200px">
<tr class="fragment">
<tr class="fragment">
<td>No Index<br/>ToC Summary</td>
2018-01-29 08:38:48 -05:00
<!-- ================================================================ -->
2018-01-29 10:48:33 -05:00
<h3>Embedded Databases</h3>
<li>SQLite (In your browser, computer, phone, fridge...)</li>
<li>Simple, Easy-To-Use Declarative Data Management</li>
<li>Critical for future tech: Part of Mobile, IoT, Web</li>
<p class="fragment"><b>Your Startup:</b> Build the next great <i>Embedded Database</i></p>
<h3>We give you...</h3>
<p style="font-size: smaller;">Data (CSV Files)</p>
<p style="font-size: smaller;">Schema Information (CREATE TABLE)</p>
<p style="font-size: smaller;">Questions (SQL Queries)</p>
<div class="fragment">
<h3>You give us...</h3>
<p style="font-size: smaller;">Answers</p>
<p style="font-size: smaller;" class="fragment">(really really fast)</p>
<h3>Real World Challenge</h3>
<p>You get graded on your code's...</p>
<dd>~1/3 credit for getting the right answer.</dd>
<dd>~2/3 credit for getting it reasonably fast.</dd>
<svg data-src="graphics/2018-01-29-project-overview.svg" class="stretch"/>
2018-01-29 08:38:48 -05:00
<h3>Checkpoint 0: "Hello World"</h3>
<p style="font-size: smaller;">5/50 pts</p>
<li>Form groups</li>
<li>Submit a simple Java program</li>
<li>Make sure that the submission workflow works for you.</li>
<h3>Checkpoint 1: "Intro to CSV"</h3>
<p style="font-size: smaller;">10/50 pts</p>
<li>Parse SQL with JSQLParser</li>
<li>Load CSV Files</li>
<li>Project (Map) Data</li>
<li>Select (Filter) Data</li>
<h3>Checkpoint 2: "Real SQL"</h3>
<p style="font-size: smaller;">10/50 pts</p>
<li>Order By</li>
<li>Nested Queries</li>
<li>Interactive Prompt</li>
<h3>Checkpoint 3: "Optimization"</h3>
<p style="font-size: smaller;">15/50 pts</p>
<li>Actual Data (no naive algorithms)</li>
<h3>Checkpoint 4: "The Real World"</h3>
<p style="font-size: smaller;">10/50 pts</p>
<li>Too much data for memory</li>
<li>Time for precomputation</li>
2018-01-29 10:48:33 -05:00
<!-- ================================================================ -->
<h3>Ways to Fail</h3>
<li>Start your project at the last minute</li>
<li>Dont go to office hours</li>
<li>Dont ask questions on Piazza</li>
<li>Wait until the deadline to submit for the first time</li>
<li class="fragment">Cheat</li>
<img src="graphics/Clipart/Graargh.png">
<h3>Academic Integrity</h3>
<p>Cheating is submitting any work that you did not perform by yourself <span class="fragment highlight-red">as if you did</span>.</p>
<dt>References (when cited)</dt>
<dd>Wikipedia, Wikibooks (or similar): <span style="color: #00882B; font-weight: bold;">OK</span></dd>
<dt>Public Code</dt>
<dd>Stack Exchange (or similar): <span style="color: #C82506; font-weight: bold;">Not OK</span></dd>
<dt><i>Discussing</i> concepts/ideas with classmates</dt>
<dd>“A hash index has O(1) lookups”: <span style="color: #00882B; font-weight: bold;">OK</span> <span style="color: #C82506; font-size: small;">(except during exams 😇 )</span></dd>
<dt><i>Sharing</i> code or answers with anyone</dt>
<dd>“Just have a look at how I implemented it”: <span style="color: #C82506; font-weight: bold;">NOT OK</span></dd>
<dd>For-hire code: <span style="color: #C82506; font-weight: bold;">NOT OK</span></dd>
<img src="graphics/2018-01-29-MOSS.png" height="400px"/>
<img src="graphics/2018-01-29-MOSSDetails.png" height="400px"/>
<dt>Zero Tolerance</dt>
<dd>If I catch you submitting someone elses code, <span style="font-weight: bold; text-decoration: underline;">you will fail the class</span>.</dd>
<dt>Group Responsibility</dt>
<dd>If your teammate cheats on a group project, <span style="font-weight: bold; text-decoration: underline;">the entire group will be penalized.</span></dd>
<dt>Share Code, Share Blame</dt>
<dd>If someone else submits your code as their own, <span style="font-weight: bold; text-decoration: underline;">you will be penalized as well.</span></dd>
<!-- ================================================================ -->
<h2>What does a Data Management System Do?</h2>
<dt>Analysis: Answering user-provided questions about data</dt>
<dd class="fragment" data-fragment-index="1">What kind of tools can we give end-users? <ul class="tight">
<li class="fragment">Declarative Languages</li>
<li class="fragment">Organizational Datastructures (e.g., Indexes)</li>
<dt>Manipulation: Safely persisting and sharing data updates</dt>
<dd class="fragment" data-fragment-index="1">What kind of tools can we give end-users?<ul class="tight">
<li class="fragment">Consistency Primitives</li>
<li class="fragment">Data Validation Primitives</li>
<svg data-src="graphics/2018-01-29-fs_vs_db.svg" />
<h2>So let's talk structure...</h2>
<dl style="font-size: smaller;">
<dd>Basic building blocks like Int, Float, Char, String</dd>
<dd>Several fields of different types. (N-Tuple = N fields)</dd>
<dd>A Tuple has a schema defining each field</dd>
<dd>A collection of unique records, all of the same type</dd>
<dd>An unordered collection of records, all of the same type</dd>
<dd>An ordered collection of records, all of the same type</dd>
<svg data-src="graphics/2018-01-29-rel-schemas.svg" />
Your data is currently an <i>Unordered Set</i> <br/>
of <i>Tuples</i> with 100 fields each.
<p class="fragment" style="margin-top: 50px;">
Tomorrow, youll be repeatedly asked for <i>1 specific attribute</i><br/>
of <i>5 specific rows</i> identified by the <i>first attribute</i>
<h3 class="fragment">Can you do better?</p>
<p><b>Better Idea</b>: Rewrite data into a 99-Tuple of Maps keyed on the 1st attribute</p>
<p class="fragment" style="margin-top: 50px;">This representation is <u>equivalent</u> and <u>better</u> for your needs.</p>
<p class="fragment" style="margin-top: 50px; font-weight: bold;">Declarative specifications make it easier to find equivalences.</p>
2018-01-29 08:38:48 -05:00
<script src="../reveal.js-3.6.0/lib/js/head.min.js"></script>
<script src="../reveal.js-3.6.0/js/reveal.js"></script>
// Full list of configuration options available at:
2018-02-02 01:24:15 -05:00
controls: true,
2018-01-29 08:38:48 -05:00
progress: true,
history: true,
center: true,
slideNumber: true,
transition: 'fade', // none/fade/slide/convex/concave/zoom
chart: {
defaults: {
global: {
title: { fontColor: "#333", fontSize: 24 },
legend: {
labels: { fontColor: "#333", fontSize: 20 },
responsiveness: true
scale: {
scaleLabel: { fontColor: "#333", fontSize: 20 },
gridLines: { color: "#333", zeroLineColor: "#333" },
ticks: { fontColor: "#333", fontSize: 16 },
line: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ], "borderDash": [ [5,10], [0,0] ]},
bar: { backgroundColor: [
pie: { backgroundColor: [ ["rgba(0,0,0,.8)" , "rgba(220,20,20,.8)", "rgba(20,220,20,.8)", "rgba(220,220,20,.8)", "rgba(20,20,220,.8)"] ]},
radar: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ]},
// Optional ../reveal.js plugins
dependencies: [
{ src: '../reveal.js-3.6.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
{ src: '../reveal.js-3.6.0/plugin/math/math.js',
condition: function() { return true; },
mathjax: '../reveal.js-3.6.0/js/MathJax.js'
{ src: '../reveal.js-3.6.0/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../reveal.js-3.6.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../reveal.js-3.6.0/plugin/highlight/highlight.js', async: true, condition: function() { return !!document.querySelector( 'pre code' ); }, callback: function() { hljs.initHighlightingOnLoad(); } },
{ src: '../reveal.js-3.6.0/plugin/zoom-js/zoom.js', async: true },
{ src: '../reveal.js-3.6.0/plugin/notes/notes.js', async: true },
// Chart.min.js
{ src: '../reveal.js-3.6.0/plugin/chart/Chart.min.js'},
// the plugin
{ src: '../reveal.js-3.6.0/plugin/chart/csv2chart.js'},
{ src: '../reveal.js-3.6.0/plugin/svginline/', async: false },
{ src: '../reveal.js-3.6.0/plugin/svginline/data-src-svg.js', async: false }