From e96830bad65485b90622789b185da589882c6381 Mon Sep 17 00:00:00 2001
From: Oliver Kennedy <okennedy@buffalo.edu>
Date: Wed, 15 Mar 2017 23:24:26 -0400
Subject: [PATCH] Checkpoint 2

---
 .../slides/2017-03-16-Checkpoint 2.html       | 231 ++++++++++++++++++
 1 file changed, 231 insertions(+)
 create mode 100755 src/teaching/cse-562/2017sp/slides/2017-03-16-Checkpoint 2.html
diff --git a/src/teaching/cse-562/2017sp/slides/2017-03-16-Checkpoint 2.html b/src/teaching/cse-562/2017sp/slides/2017-03-16-Checkpoint 2.html
new file mode 100755
index 00000000..5e64f231
--- /dev/null
+++ b/src/teaching/cse-562/2017sp/slides/2017-03-16-Checkpoint 2.html	
@@ -0,0 +1,231 @@
+<!doctype html>
+<html lang="en">
+
+<head>
+	<meta charset="utf-8">
+
+	<title>CSE 562 - Database Systems</title>
+
+	<meta name="description" content="CSE-562 Database Systems">
+	<meta name="author" content="Oliver Kennedy">
+
+	<meta name="apple-mobile-web-app-capable" content="yes">
+	<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
+
+	<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
+
+	<link rel="stylesheet" href="reveal/css/reveal.css">
+	<link rel="stylesheet" href="reveal/css/theme/simple.css" id="theme">
+
+	<!-- Theme used for syntax highlighting of code -->
+	<link rel="stylesheet" href="reveal/lib/css/zenburn.css">
+
+	<!-- Printing and PDF exports -->
+	<script>
+		var link = document.createElement( 'link' );
+		link.rel = 'stylesheet';
+		link.type = 'text/css';
+		link.href = window.location.search.match( /print-pdf/gi ) ? 'reveal/css/print/pdf.css' : 'reveal/css/print/paper.css';
+		document.getElementsByTagName( 'head' )[0].appendChild( link );
+	</script>
+
+	<!--[if lt IE 9]>
+	<script src="reveal/lib/js/html5shiv.js"></script>
+	<![endif]-->
+</head>
+
+<body><div class="reveal"><div class="slides">
+
+<!-- ############################################################################# -->
+<!-- ########################## BEGIN PRESENTATION BODY ########################## -->
+<!-- ############################################################################# -->
+
+<section>
+	<section>
+		<h1>CSE 562</h1>
+		<h2><b>March 16</b> - Checkpoint 2</h2>
+	</section>
+
+	<section>
+		<h2>Checkpoint 2</h2>
+
+		<svg width="900px" height="300px">
+			<image x="0" y="0" xlink:href="Images/Window.png" width="900px" height="24px" />
+			<rect x="0" y="24" fill="black" width="900px" height="276px" />
+			<g transform="translate(0,44)">
+				<text x="5" y="0" font-size="12pt" font-family="Courier" fill="#00ff00">sif$</text>
+				<g class="fragment">
+					<text x="50" y="0" font-size="12pt" font-family="Courier" fill="#00ff00">javac -cp build:*.jar -D build {all .java files}</text>
+					<text x="5" y="20" font-size="12pt" font-family="Courier" fill="#00ff00" class="fragment">sif$</text>
+				</g>
+			</g>
+			<g transform="translate(0,64)" class="fragment">
+				<text x="50" y="0" font-size="12pt" font-family="Courier" fill="#00ff00">java -cp build:jsqlparser.jar:evallib.jar dubstep.Main  - --preload --in-mem</text>
+				<text x="5" y="20" font-size="12pt" font-family="Courier" fill="#00ff00">$&gt;</text>
+			</g>
+			<g transform="translate(0,84)" class="fragment">
+				<text x="30" y="0" font-size="12pt" font-family="Courier" fill="#00ff00">CREATE TABLE R(</text>
+				<text x="40" y="20" font-size="12pt" font-family="Courier" fill="#00ff00">A int, B int, C int, </text>
+				<text x="40" y="40" font-size="12pt" font-family="Courier" fill="#00ff00">PRIMARY KEY (A), </text>
+				<text x="40" y="60" font-size="12pt" font-family="Courier" fill="#00ff00">INDEX B_INDEX (B)</text>
+				<text x="30" y="80" font-size="12pt" font-family="Courier" fill="#00ff00">);</text>
+				<text x="5" y="100" font-size="12pt" font-family="Courier" fill="#00ff00" class="fragment">$&gt;</text>
+			</g>
+			<g transform="translate(0, 184)" class="fragment">
+				<text x="30" y="0" font-size="12pt" font-family="Courier" fill="#00ff00">SELECT A, SUM(C) FROM R WHERE B &lt; 5 GROUP BY A;</text>
+				<text x="5" y="20" font-size="12pt" font-family="Courier" fill="#00ff00">1|5</text>
+				<text x="5" y="40" font-size="12pt" font-family="Courier" fill="#00ff00">...</text>
+				<text x="5" y="60" font-size="12pt" font-family="Courier" fill="#00ff00">$&gt;</text>
+			</g>
+		</svg>
+	</section>
+
+	<section>
+		<h2>Checkpoint 2</h2>
+		<ul>
+			<li>Two parts, one in-memory, one with more data than memory.</li>
+			<li>Preprocessing Step (You get 2 or 5 minutes for each CREATE TABLE)</li>
+			<li>New SQL features (GROUP-BY, LIMIT, SORT, From-Nesting)</li>
+			<li>Create table now includes "hints" (PRIMARY KEY, INDEX)</li>
+			<li>Tighter Constraints (Bigger data and less time/query)</li>
+		</ul>
+	</section>
+</section>
+
+<section>
+	<section>
+		<dl>
+			<dt><tt>--in-mem</tt></dt>
+			<dd>Java will be allowed to use 1GB of heap space (<tt>-Xmx1g</tt>), and the CSV representation of all input data data will be under 50MB (similar size to checkpoint 1's 'Big Data').</dd>
+			<dt><tt>--on-disk</tt></dt>
+			<dd>Java will be allowed to use 150MB of heap space (<tt>-Xmx150m</tt>), and the textual representation of your data will be over 100MB.</dd>
+		</dl>
+		<p>As a general guideline, Java becomes incredibly slow once you hit 50% memory usage as you spend more time in the garbage collector than in your code.</p>
+	</section>
+
+	<section>
+		<dl>
+			<dt><tt>--preload</tt></dt>
+			<dd>Signals that <tt>CREATE TABLE</tt> statements will be allowed to take 2 minutes in in-memory mode, or 5 minutes in on-disk mode.  This is a good opportunity to sort your data, and/or create the index structures suggested in the statement body.</dd>
+		</dl>
+		<p><i>Professor's note</i>: For an illustration of the potential benefits of a longer <tt>CREATE TABLE</tt> step, have a look at the leaderboards for Checkpoint 1.</p>
+	</section>
+</section>
+
+<section>
+	<section>
+		<h2>New SQL Features: Sort</h2>
+		<dl>
+			<dt><tt>ORDER BY col1 asc/desc, col2 asc/desc, ...</tt></dt>
+			<dd>Sort the data on col1 (using col2, col3, ... as tiebreakers) in <b>asc</b>ending or <b>desc</b>ending order.</dd>
+		</dl>
+		<ul>
+			<li>You will likely need 2 implementations: <ul>
+				<li>In-Memory (Reference Impl Uses Java's <tt>Collections.sort</tt>)</li>
+				<li>On-Disk (Reference Impl Uses 2-Pass Sort)</li>
+			</ul></li>
+		</ul>
+	</section>
+
+	<section>
+		<h2>New SQL Features: Limit</h2>
+		<dl>
+			<dt><tt>LIMIT N</tt></dt>
+			<dd>Return only the first <tt>N</tt> rows.  If the data is sorted, return the first rows according to the sort order.  If not, return an arbitrary <tt>N</tt> rows.</dd>
+		</dl>
+		<p>JSQLParser also supports more expressive limit clauses (e.g., including offsets).  You will not be required to support anything more complex than <tt>LIMIT N</tt>.</p>
+	</section>
+
+	<section>
+		<h2>New SQL Features: Group-By</h2>
+		<dl>
+			<dt><tt>SELECT A, B, SUM(C), ... FROM ... GROUP BY A, B</tt></dt>
+			<dd>Group the data by the A and B columns and apply the aggregate functions to each group.</dd>
+		</dl>
+	</section>
+
+	<section>
+		<h2>New SQL Features: From Nesting</h2>
+		<dl>
+			<dt><tt>SELECT * FROM (SELECT A, B FROM R) q</tt></dt>
+			<dd>You're still not going to get more than one FromItem, but now it can be a SELECT query instead of a table.</dd>
+		</dl>
+		<p>If you used iterators for checkpoint 1, you should just be able to replace a Table Scan iterator with the iterator you construct for the nested query</p>
+	</section>
+</section>
+
+<section>
+	<section>
+		<h2>Handling Tight Constraints</h2>
+		<dl>
+			<dt>Index Scans</dt>
+			<dd>You'll need to detect when opportunities to use them arise.  Remember that your WHERE clause may be an AND of different options, and you may have multiple indices that you can use.</dd>
+			<dt>Selection Pushdown</dt>
+			<dd>Selection Pushdown is ALWAYS good.  This is most likely to arise in the context of nested subqueries, but if you implement it in a general way you'll benefit in Checkpoint 3 as well.</dd>
+		</dl>
+	</section>
+
+	<section>
+		<h2>Pattern Matching</h2>
+		Remember pattern matching on Statement and Expression objects?  Try it on Iterators!
+		<pre><code>
+Iterator optimize(Iterator query) {
+  if(query instanceof Filter){
+    Filter f = (Filter)query;
+    if(f.input instanceof Project){
+      Project p (Project)f.input;
+      // replace query with a new pair of iterators where 
+      // the Project uses the Filter as an input. 
+    }
+  }
+  // Recur
+  query.input = optimize(query.input);
+  return query;
+}
+		</code></pre>
+	</section>
+</section>
+
+<section>
+<h1>Questions?</h1>
+</section>
+
+
+
+
+<!-- ############################################################################# -->
+<!-- ########################### END PRESENTATION BODY ########################### -->
+<!-- ############################################################################# -->
+
+
+</div></div>
+
+<script src="reveal/lib/js/head.min.js"></script>
+<script src="reveal/js/reveal.js"></script>
+
+<script>
+
+	// More info https://github.com/hakimel/reveal.js#configuration
+	Reveal.initialize({
+		controls: true,
+		progress: true,
+		history: true,
+		center: true,
+
+		transition: 'slide', // none/fade/slide/convex/concave/zoom
+
+		// More info https://github.com/hakimel/reveal.js#dependencies
+		dependencies: [
+			{ src: 'reveal/lib/js/classList.js', condition: function() { return !document.body.classList; } },
+			{ src: 'reveal/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
+			{ src: 'reveal/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
+			{ src: 'reveal/plugin/highlight/highlight.js', async: true, callback: function() { hljs.initHighlightingOnLoad(); } },
+			{ src: 'reveal/plugin/zoom-js/zoom.js', async: true },
+			{ src: 'reveal/plugin/notes/notes.js', async: true }
+		]
+	});
+
+</script>
+
+</body>
+</html>