2018-01-29 08:38:48 -05:00
<!doctype html>
< html lang = "en" >
< head >
< meta charset = "utf-8" >
< title > CSE 4/562 - Spring 2018< / title >
< meta name = "description" content = "CSE 4/562 - Spring 2018" >
< meta name = "author" content = "Oliver Kennedy" >
< meta name = "apple-mobile-web-app-capable" content = "yes" / >
< meta name = "apple-mobile-web-app-status-bar-style" content = "black-translucent" / >
< meta name = "viewport" content = "width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui" >
< link rel = "stylesheet" href = "../reveal.js-3.6.0/css/reveal.css" >
< link rel = "stylesheet" href = "ubodin.css" id = "theme" >
<!-- Code syntax highlighting -->
< link rel = "stylesheet" href = "../reveal.js-3.6.0/lib/css/zenburn.css" >
<!-- Printing and PDF exports -->
< script >
var link = document.createElement( 'link' );
link.rel = 'stylesheet';
link.type = 'text/css';
link.href = window.location.search.match( /print-pdf/gi ) ? '../reveal.js-3.6.0/css/print/pdf.css' : '../reveal.js-3.6.0/css/print/paper.css';
document.getElementsByTagName( 'head' )[0].appendChild( link );
< / script >
<!-- [if lt IE 9]>
< script src = "../reveal.js-3.6.0/lib/js/html5shiv.js" > < / script >
<![endif]-->
< / head >
< body >
< div class = "reveal" >
<!-- Any section element inside of this container is displayed as a slide -->
< div class = "header" >
<!-- Any Talk - Specific Header Content Goes Here -->
CSE 4/562 - Database Systems
< / div >
< div class = "slides" >
< section >
< h1 > Intro< / h1 >
< h3 > CSE 4/562 – Database Systems< / h3 >
< h5 > January 29, 2018< / h5 >
< / section >
< section >
< section >
2018-01-29 10:48:33 -05:00
< h2 > Why Are Databases Awesome?< / h2 >
< / section >
< section >
< h2 > They're Everywhere< / h2 >
< img src = "graphics/Clipart/SqlitePhone.png" >
2018-01-29 08:38:48 -05:00
< / section >
< section >
< h2 > $ $ $ < / h2 >
< table class = "plainrowheads" style = "text-align:center; font-size: small;" >
< thead > < tr >
< th colspan = "2" > Rank< / th >
< th > Organization< / th >
< th > Sales (B$)< / th >
< th > FY< / th >
< th > Market cap (B$)< / th >
< th > Headquarters< / th >
< / tr > < / thead > < tbody >
< tr class = "fragment highlight-blue" data-fragment-index = "1" >
2018-01-29 10:48:33 -05:00
< td > 1< / td > < td > < img style = "margin: 0px;" alt = "United States" src = "graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width = "23" height = "12" / > < / td > < td > Microsoft< / td > < td > 86.6< / td > < td > 2017< / td > < td > 601< / td > < td > Redmond, WA, US< / td >
2018-01-29 08:38:48 -05:00
< / tr >
< tr class = "fragment highlight-blue" data-fragment-index = "1" >
2018-01-29 10:48:33 -05:00
< td > 2< / td > < td > < img style = "margin: 0px;" alt = "United States" src = "graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width = "23" height = "12" / > < / td > < td > Oracle< / td > < td > 37.2< / td > < td > 2017< / td > < td > 205< / td > < td > Redwood City, CA, US< / td >
2018-01-29 08:38:48 -05:00
< / tr >
< tr class = "fragment highlight-blue" data-fragment-index = "1" >
2018-01-29 10:48:33 -05:00
< td > 3< / td > < td > < img style = "margin: 0px;" alt = "Germany" src = "graphics/2018-01-29-23px-Flag_of_Germany.svg.png" width = "23" height = "14" / > < / td > < td > SAP< / td > < td > 23.2< / td > < td > 2017< / td > < td > 117< / td > < td > Walldorf, Germany< / td >
2018-01-29 08:38:48 -05:00
< / tr >
< tr class = "fragment highlight-blue" data-fragment-index = "1" >
2018-01-29 10:48:33 -05:00
< td > 4< / td > < td > < img style = "margin: 0px;" alt = "United States" src = "graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width = "23" height = "12" / > < / td > < td > Salesforce.com< / td > < td > 8.4< / td > < td > 2017< / td > < td > 69< / td > < td > San Francisco, CA, US< / td >
2018-01-29 08:38:48 -05:00
< / tr >
< tr >
2018-01-29 10:48:33 -05:00
< td > 5< / td > < td > < img style = "margin: 0px;" alt = "United States" src = "graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width = "23" height = "12" / > < / td > < td > VMware< / td > < td > 6.7< / td > < td > 2017< / td > < td > 48< / td > < td > Palo Alto, CA, US< / td >
2018-01-29 08:38:48 -05:00
< / tr >
< tr class = "fragment highlight-blue" data-fragment-index = "1" >
2018-01-29 10:48:33 -05:00
< td > 6< / td > < td > < img style = "margin: 0px;" alt = "United States" src = "graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width = "23" height = "12" / > < / td > < td > Fiserv< / td > < td > 5.3< / td > < td > 2017< / td > < td > 26< / td > < td > Brookfield, WI, US< / td >
2018-01-29 08:38:48 -05:00
< / tr >
< tr >
2018-01-29 10:48:33 -05:00
< td > 7< / td > < td > < img style = "margin: 0px;" alt = "United States" src = "graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width = "23" height = "12" / > < / td > < td > Adobe Systems< / td > < td > 5< / td > < td > 2017< / td > < td > 84< / td > < td > San Jose, CA, US< / td >
2018-01-29 08:38:48 -05:00
< / tr >
< tr >
2018-01-29 10:48:33 -05:00
< td > 8< / td > < td > < img style = "margin: 0px;" alt = "United States" src = "graphics/2018-01-29-23px-Flag_of_the_United_States.svg.png" width = "23" height = "12" / > < / td > < td > Symantec< / td > < td > 5.4< / td > < td > 2017< / td > < td > 19< / td > < td > Mountain View, CA, US< / td >
2018-01-29 08:38:48 -05:00
< / tr >
< tr class = "fragment highlight-blue" data-fragment-index = "1" >
2018-01-29 10:48:33 -05:00
< td > 9< / td > < td > < img style = "margin: 0px;" alt = "Spain" src = "graphics/2018-01-29-23px-Flag_of_Spain.svg.png" width = "23" height = "15" / > < / td > < td > Amadeus IT Holdings< / td > < td > 4.3< / td > < td > 2017< / td > < td > 25< / td > < td > Madrid, Spain< / td >
2018-01-29 08:38:48 -05:00
< / tr >
< / tbody > < tfoot > < / tfoot >
< / table >
< p class = "fragment" data-fragment-index = "1" > 5 of 9 Forbes Top Software Companies< br / > Have a Focus on Data Management Systems< / p >
< imagecredits > < a href = "https://en.wikipedia.org/wiki/List_of_the_largest_software_companies" > (Source wikipedia.org)< / a > < / imagecredits >
< / section >
< section >
< h2 > Interesting Problems< / h2 >
< img src = "graphics/2018-01-29-db_interesting.svg" / >
< / section >
< / section >
<!-- ================================================================ -->
< section >
< section >
2018-01-29 10:48:33 -05:00
< h1 > What is "Databases"?
2018-01-29 08:38:48 -05:00
< / section >
< section >
< h4 style = "margin-bottom: 0px;" > How do we ask and answer questions about data?< / h4 >
2018-01-29 10:48:33 -05:00
< ul style = "font-size: smaller" class = "tight fragment" >
< li > Efficiency< / li >
< li > Accuracy< / li >
< li > Summarization< / li >
< li > Data Curation< / li >
2018-01-29 08:38:48 -05:00
< / ul >
< h4 style = "margin-bottom: 0px; margin-top: 20px;" > How do we manipulate and persist data?< / h4 >
2018-01-29 10:48:33 -05:00
< ul style = "font-size: smaller" class = "tight fragment" >
< li > Consistency< / li >
< li > Correctness< / li >
< li > Parallelism< / li >
2018-01-29 08:38:48 -05:00
< / ul >
< / section >
< section >
< h3 > Databases< / h3 >
< table >
< tr >
< td style = "font-weight: bold; vertical-align: middle;" > Techniques< / td >
< td >
< p class = "fragment" > Data Modeling< / p >
< p class = "fragment" > Cost-Based Optimization< / p >
< / td >
< / tr >
< tr >
< td style = "font-weight: bold; vertical-align: middle;" > Recipes< / td >
< td >
< p class = "fragment" > Join Algorithms< / p >
< p class = "fragment" > Index Data Structures< / p >
< / td >
< / tr >
< tr >
< td style = "font-weight: bold; vertical-align: middle;" > Knowledge< / td >
< td >
< p class = "fragment" > The Memory Hierarchy< / p >
< p class = "fragment" > Data Consistency< / p >
< / td >
< / tr >
< / table >
< / section >
2018-01-29 10:48:33 -05:00
< section >
< h2 > Which Tools To Use< / h2 >
< h2 class = "fragment" > And When?< / h2 >
< / section >
< section >
< h3 > Template for 90% of Database Systems< / h3 >
< p style = "font-size: larger;" > For < b > X< / b > , the best, correct choice is < b > Y< / b > , at least when < b > Z< / b > .< / p >
< ol style = "margin-top: 20px;" >
< li class = "fragment" > How do you define < i > Correct< / i > and < i > Best< / i > ?< / li >
< li class = "fragment" > What correct alternatives are available?< / li >
< li class = "fragment" > How do you find the best available alternative< / li >
< / ol >
< / section >
2018-01-29 08:38:48 -05:00
< / section >
<!-- ================================================================ -->
< section >
< section >
< h2 > General Course Information< / h2 >
< / section >
< section >
< h3 > People< / h3 >
< ul >
< li > Oliver Kennedy (me)< / li >
< li > Gokhan Kul (Practicum Lead)< / li >
< li > William Spoth (Project TA)< / li >
< li > Saurav Singhi (Concept TA)< / li >
2018-01-29 10:48:33 -05:00
< li > Carl Nuessle (Ninja)< / li >
< li > [ TBD ] < / li >
2018-01-29 08:38:48 -05:00
< / ul >
< / section >
< section >
< h3 > Syllabus and Projects< / h3 >
2018-01-29 10:48:33 -05:00
< p > < a href = "https://odin.cse.buffalo.edu/teaching/cse-462/" > https://odin.cse.buffalo.edu/teaching/cse-462/< / a > < / p >
2018-01-29 08:38:48 -05:00
< p > < a href = "https://odin.cse.buffalo.edu/teaching/cse-562/" > https://odin.cse.buffalo.edu/teaching/cse-562/< / a > < / p >
2018-01-29 10:48:33 -05:00
< p style = "font-size: small" > (same link)< / p >
2018-01-29 08:38:48 -05:00
< h3 style = "margin-top: 50px;" > Course Forum< / h3 >
< p > < a href = "https://piazza.com/buffalo/spring2018/cse4562/home" > https://piazza.com/buffalo/spring2018/cse4562/home< / a > < / p >
< / section >
< section >
< h3 > Course Structure< / h3 >
< dl >
2018-01-29 10:48:33 -05:00
< dt > Concepts (50% of Grade; Lectures on Mon/Wed)< / dt >
< dd > < ul class = "tight" >
2018-01-29 08:38:48 -05:00
< li > Homework (10%; 12-15 Assignments, Keep Best 10)< / li >
< li > Midterm (20% or 15%)< / li >
< li > Comprehensive Final (20% or 25%)< / li >
< / ul > < / dd >
2018-01-29 10:48:33 -05:00
< dt > Practicum (50% of Grade; Lectures on Fri)< / dt >
< dd > < ul class = "tight" >
2018-01-29 08:38:48 -05:00
< li > Build a Relational Query Engine< / li >
< li > 3-Person Group Project< / li >
< li > 5 "Checkpoints"< / li >
< / ul > < / dd >
< / dl >
< / section >
2018-01-29 16:40:42 -05:00
< section >
< table >
< tr >
< td >
< img src = "graphics/Books/DBSystemsHardcover.jpg" height = "200px" >
< / td >
< td >
< img src = "graphics/Books/DBSystemsSoftcover.jpg" height = "200px" >
< / td >
< / tr >
< tr class = "fragment" >
< td > $150< / td >
< td > $50< / td >
< / tr >
< tr class = "fragment" >
< td > Index< br / > ToC< / td >
< td > No Index< br / > ToC Summary< / td >
< / tr >
< / table >
< / section >
2018-01-29 08:38:48 -05:00
< / section >
<!-- ================================================================ -->
< section >
2018-01-29 10:48:33 -05:00
< section >
< h3 > Embedded Databases< / h3 >
< ul >
< li > SQLite (In your browser, computer, phone, fridge...)< / li >
< li > Simple, Easy-To-Use Declarative Data Management< / li >
< li > Critical for future tech: Part of Mobile, IoT, Web< / li >
< / ul >
< p class = "fragment" > < b > Your Startup:< / b > Build the next great < i > Embedded Database< / i > < / p >
< / section >
< section >
< h3 > We give you...< / h3 >
< p style = "font-size: smaller;" > Data (CSV Files)< / p >
< p style = "font-size: smaller;" > Schema Information (CREATE TABLE)< / p >
< p style = "font-size: smaller;" > Questions (SQL Queries)< / p >
< div class = "fragment" >
< hr / >
< h3 > You give us...< / h3 >
< p style = "font-size: smaller;" > Answers< / p >
< p style = "font-size: smaller;" class = "fragment" > (really really fast)< / p >
< / div >
< / section >
< section >
< h3 > Real World Challenge< / h3 >
< p > You get graded on your code's...< / p >
< dl >
< dt > Correctness< / dt >
< dd > ~1/3 credit for getting the right answer.< / dd >
< dt > Performance< / dt >
< dd > ~2/3 credit for getting it reasonably fast.< / dd >
< / dl >
< / section >
< section >
< svg data-src = "graphics/2018-01-29-project-overview.svg" class = "stretch" / >
< / section >
2018-01-29 08:38:48 -05:00
< section >
< h3 > Checkpoint 0: "Hello World"< / h3 >
< p style = "font-size: smaller;" > 5/50 pts< / p >
< ul >
< li > Form groups< / li >
< li > Submit a simple Java program< / li >
< li > Make sure that the submission workflow works for you.< / li >
< / ul >
< / section >
< section >
< h3 > Checkpoint 1: "Intro to CSV"< / h3 >
< p style = "font-size: smaller;" > 10/50 pts< / p >
< ul >
< li > Parse SQL with JSQLParser< / li >
< li > Load CSV Files< / li >
< li > Project (Map) Data< / li >
< li > Select (Filter) Data< / li >
< / ul >
< / section >
< section >
< h3 > Checkpoint 2: "Real SQL"< / h3 >
< p style = "font-size: smaller;" > 10/50 pts< / p >
< ul >
< li > Joins< / li >
< li > Order By< / li >
< li > Limit< / li >
< li > Nested Queries< / li >
< li > Interactive Prompt< / li >
< / ul >
< / section >
< section >
< h3 > Checkpoint 3: "Optimization"< / h3 >
< p style = "font-size: smaller;" > 15/50 pts< / p >
< ul >
< li > Aggregation< / li >
< li > Actual Data (no naive algorithms)< / li >
< / ul >
< / section >
< section >
< h3 > Checkpoint 4: "The Real World"< / h3 >
< p style = "font-size: smaller;" > 10/50 pts< / p >
< ul >
< li > Too much data for memory< / li >
< li > Time for precomputation< / li >
< / ul >
< / section >
< / section >
2018-01-29 10:48:33 -05:00
<!-- ================================================================ -->
< section >
< section >
< h3 > Ways to Fail< / h3 >
< ul >
< li > Start your project at the last minute< / li >
< li > Don’ t go to office hours< / li >
< li > Don’ t ask questions on Piazza< / li >
< li > Wait until the deadline to submit for the first time< / li >
< li class = "fragment" > Cheat< / li >
< / ul >
< / section >
< section >
< img src = "graphics/Clipart/Graargh.png" >
< / section >
< section >
< h3 > Academic Integrity< / h3 >
< p > Cheating is submitting any work that you did not perform by yourself < span class = "fragment highlight-red" > as if you did< / span > .< / p >
< / section >
< section >
< dl >
< dt > References (when cited)< / dt >
< dd > Wikipedia, Wikibooks (or similar): < span style = "color: #00882B; font-weight: bold;" > OK< / span > < / dd >
< dt > Public Code< / dt >
< dd > Stack Exchange (or similar): < span style = "color: #C82506; font-weight: bold;" > Not OK< / span > < / dd >
< dt > < i > Discussing< / i > concepts/ideas with classmates< / dt >
< dd > “A hash index has O(1) lookups”: < span style = "color: #00882B; font-weight: bold;" > OK< / span > < span style = "color: #C82506; font-size: small;" > (except during exams 😇 )< / span > < / dd >
< dt > < i > Sharing< / i > code or answers with anyone< / dt >
< dd > “Just have a look at how I implemented it”: < span style = "color: #C82506; font-weight: bold;" > NOT OK< / span > < / dd >
< dd > For-hire code: < span style = "color: #C82506; font-weight: bold;" > NOT OK< / span > < / dd >
< / dl >
< p >
< / section >
< section >
< h3 > MOSS< / h3 >
< img src = "graphics/2018-01-29-MOSS.png" height = "400px" / >
< / section >
< section >
< h3 > MOSS< / h3 >
< img src = "graphics/2018-01-29-MOSSDetails.png" height = "400px" / >
< / section >
< section >
< dl >
< dt > Zero Tolerance< / dt >
< dd > If I catch you submitting someone else’ s code, < span style = "font-weight: bold; text-decoration: underline;" > you will fail the class< / span > .< / dd >
< dt > Group Responsibility< / dt >
< dd > If your teammate cheats on a group project, < span style = "font-weight: bold; text-decoration: underline;" > the entire group will be penalized.< / span > < / dd >
< dt > Share Code, Share Blame< / dt >
< dd > If someone else submits your code as their own, < span style = "font-weight: bold; text-decoration: underline;" > you will be penalized as well.< / span > < / dd >
< / dl >
< / section >
< section >
< h3 > Questions/Concerns?< / h3 >
< / section >
< / section >
<!-- ================================================================ -->
< section >
< section >
< h2 > What does a Data Management System Do?< / h2 >
< / section >
< section >
< dl >
< dt > Analysis: Answering user-provided questions about data< / dt >
< dd class = "fragment" data-fragment-index = "1" > What kind of tools can we give end-users? < ul class = "tight" >
< li class = "fragment" > Declarative Languages< / li >
< li class = "fragment" > Organizational Datastructures (e.g., Indexes)< / li >
< / ul > < / dd >
< dt > Manipulation: Safely persisting and sharing data updates< / dt >
< dd class = "fragment" data-fragment-index = "1" > What kind of tools can we give end-users?< ul class = "tight" >
< li class = "fragment" > Consistency Primitives< / li >
< li class = "fragment" > Data Validation Primitives< / li >
< / ul > < / dd >
< / dl >
< / section >
< section >
< svg data-src = "graphics/2018-01-29-fs_vs_db.svg" / >
< / section >
< section >
< h2 > So let's talk structure...< / h2 >
< / section >
< section >
< dl style = "font-size: smaller;" >
< dt > Primitive< / dt >
< dd > Basic building blocks like Int, Float, Char, String< / dd >
< dt > Tuple< / dt >
< dd > Several ‘ fields’ of different types. (N-Tuple = N fields)< / dd >
< dd > A Tuple has a ‘ schema’ defining each field< / dd >
< dt > Set< / dt >
< dd > A collection of unique records, all of the same type< / dd >
< dt > Bag< / dt >
< dd > An unordered collection of records, all of the same type< / dd >
< dt > List< / dt >
< dd > An ordered collection of records, all of the same type< / dd >
< / dl >
< / section >
< section >
< svg data-src = "graphics/2018-01-29-rel-schemas.svg" / >
< / section >
< section >
< p >
Your data is currently an < i > Unordered Set< / i > < br / >
of < i > Tuples< / i > with 100 fields each.
< / p >
< p class = "fragment" style = "margin-top: 50px;" >
Tomorrow, you’ ll be repeatedly asked for < i > 1 specific attribute< / i > < br / >
of < i > 5 specific rows< / i > identified by the < i > first attribute< / i >
< / p >
< h3 class = "fragment" > Can you do better?< / p >
< / section >
< section >
< p > < b > Better Idea< / b > : Rewrite data into a 99-Tuple of Maps keyed on the 1st attribute< / p >
< p class = "fragment" style = "margin-top: 50px;" > This representation is < u > equivalent< / u > and < u > better< / u > for your needs.< / p >
< p class = "fragment" style = "margin-top: 50px; font-weight: bold;" > Declarative specifications make it easier to find equivalences.< / p >
< / section >
< / section >
2018-01-29 08:38:48 -05:00
< / div > < / div >
< script src = "../reveal.js-3.6.0/lib/js/head.min.js" > < / script >
< script src = "../reveal.js-3.6.0/js/reveal.js" > < / script >
< script >
// Full list of configuration options available at:
// https://github.com/hakimel/../reveal.js#configuration
Reveal.initialize({
2018-02-02 01:24:15 -05:00
controls: true,
2018-01-29 08:38:48 -05:00
progress: true,
history: true,
center: true,
slideNumber: true,
transition: 'fade', // none/fade/slide/convex/concave/zoom
chart: {
defaults: {
global: {
title: { fontColor: "#333", fontSize: 24 },
legend: {
labels: { fontColor: "#333", fontSize: 20 },
},
responsiveness: true
},
scale: {
scaleLabel: { fontColor: "#333", fontSize: 20 },
gridLines: { color: "#333", zeroLineColor: "#333" },
ticks: { fontColor: "#333", fontSize: 16 },
}
},
line: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ], "borderDash": [ [5,10], [0,0] ]},
bar: { backgroundColor: [
"rgba(220,220,220,0.8)",
"rgba(151,187,205,0.8)",
"rgba(205,151,187,0.8)",
"rgba(187,205,151,0.8)"
]
},
pie: { backgroundColor: [ ["rgba(0,0,0,.8)" , "rgba(220,20,20,.8)", "rgba(20,220,20,.8)", "rgba(220,220,20,.8)", "rgba(20,20,220,.8)"] ]},
radar: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ]},
},
// Optional ../reveal.js plugins
dependencies: [
{ src: '../reveal.js-3.6.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
{ src: '../reveal.js-3.6.0/plugin/math/math.js',
condition: function() { return true; },
mathjax: '../reveal.js-3.6.0/js/MathJax.js'
},
{ src: '../reveal.js-3.6.0/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../reveal.js-3.6.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../reveal.js-3.6.0/plugin/highlight/highlight.js', async: true, condition: function() { return !!document.querySelector( 'pre code' ); }, callback: function() { hljs.initHighlightingOnLoad(); } },
{ src: '../reveal.js-3.6.0/plugin/zoom-js/zoom.js', async: true },
{ src: '../reveal.js-3.6.0/plugin/notes/notes.js', async: true },
// Chart.min.js
{ src: '../reveal.js-3.6.0/plugin/chart/Chart.min.js'},
// the plugin
{ src: '../reveal.js-3.6.0/plugin/chart/csv2chart.js'},
{ src: '../reveal.js-3.6.0/plugin/svginline/es6-promise.auto.js', async: false },
{ src: '../reveal.js-3.6.0/plugin/svginline/data-src-svg.js', async: false }
]
});
< / script >
< / body >
< / html >