Website/slides/talks/2017-4-Tour-JITDs/index.html

<!doctype html>
<html lang="en">

	<head>
		<meta charset="utf-8">

		<title>Just-in-Time Data Structures</title>

		<meta name="description" content="Small Data">
		<meta name="author" content="Oliver Kennedy">

		<meta name="apple-mobile-web-app-capable" content="yes" />
		<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />

		<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">

		<link rel="stylesheet" href="../reveal.js-3.5.0/css/reveal.css">
		<link rel="stylesheet" href="ubodin.css" id="theme">

		<!-- Code syntax highlighting -->
		<link rel="stylesheet" href="../reveal.js-3.5.0/lib/css/zenburn.css">


		<style>
		table.panelists td.name {
			text-align: right;
		}
		table.panelists td.affiliation {
			font-size: smaller;
			text-decoration: italic;
			text-align: left;
		}
		</style>

		<!-- Printing and PDF exports -->
		<script>
			var link = document.createElement( 'link' );
			link.rel = 'stylesheet';
			link.type = 'text/css';
			link.href = window.location.search.match( /print-pdf/gi ) ? '../reveal.js-3.5.0/css/print/pdf.css' : '../reveal.js-3.5.0/css/print/paper.css';
			document.getElementsByTagName( 'head' )[0].appendChild( link );
		</script>

		<!--[if lt IE 9]>
		<script src="../reveal.js-3.5.0/lib/js/html5shiv.js"></script>
		<![endif]-->
	</head>

	<body>

		<div class="reveal">

		<div class="header">
			<!-- Any Talk-Specific Header Content Goes Here -->
			<center>
				<a href="http://www.buffalo.edu" target="_blank">
					<img src="../graphics/logos/ub-1line-ro-white.png" height="20"/>
				</a>
			</center>
		</div>
		<div class="footer">
			<!-- Any Talk-Specific Footer Content Goes Here -->
			<div style="float: left; margin-top: 15px; ">
			Exploring <u><b>O</b></u>nline <u><b>D</b></u>ata <u><b>In</b></u>teractions
			</div>
			<a href="https://odin.cse.buffalo.edu" target="_blank">
				<img src="../graphics/logos/odin-1line-white.png" height="40" style="float: right;"/>
			</a>
		</div>

			<div class="slides">
		<!-- Any section element inside of this container is displayed as a slide -->

			<section>
				<!-- Credits... introduce everyone, etc... -->
				<section>
					<h2>Just-in-Time Data Structures</h2>
					<h4>Oliver Kennedy</h4>
					<h4><a href="mailto:okennedy@buffalo.edu">okennedy@buffalo.edu</a></h4>
				</section>

				<section>
					<table>
						<tr>
							<td width="150px"><img src="graphics/saurav.jpg" height="150px"/></td>
							<td width="150px"><img src="graphics/darshana.jpg" height="150px"/></td>
							<td width="150px"><img src="graphics/hank.jpg" height="150px"/></td>
							<td width="150px"><img src="graphics/ankur.jpg" height="150px"/></td>
							<td width="150px"><img src="graphics/luke.png" height="150px"/></td>
						</tr>
						<tr style="font-size: smaller;">
							<td>Saurav Singhi</td>
							<td>Darshana Balakrishnan<br/</td>
							<td>Hank Lin</td>
							<td>Ankur Upadhyay</td>
							<td>Lukasz Ziarek</td>
						</tr>
						<tr style="font-size: small;">
							<td>(PhD In Progress)</td>
							<td>(MS In Progress)</td>
							<td>(BS 2017)</td>
							<td>(MS 2014)</td>
							<td>(Prof @ UB)</td>
						</tr>
					</table>
					<p style="font-size: small">With support from NSF Awards IIS-1617586 and CNS-1629791<p>
				</section>
			</section>

			<section>
				<!-- Establish the setting:
				  - Data Structures are a game of trade-offs.
				  - Which structure is best?  Well, it depends!
				  - Specific data structures lock you in to a specific set of tradeoffs:
				  		- Read vs Write
				  		- Batch vs Individual Updates
				  		- Scan vs Lookup vs Range Queries
				 -->

				<section>
					<img src="graphics/conan.png" />

					<h3>What is best in life?</h3>
					<p class="fragment">(for organizing your data)</p>

					<attribution>&copy; Universal Pictures</attribution>
				</section>

				<section>
					<svg width="280" height="500" style="float: right">
						<image x="55" y="20" width="180" height="180"
									 xlink:href="figs/ExampleDSes-BinTree.svg"
						/>
						<image x="20" y="185" width="250" height="170"
									 xlink:href="figs/ExampleDSes-LinkedList.svg"
						/>
						<image x="75" y="290" width="145" height="145"
									 xlink:href="figs/ExampleDSes-SortedArray.svg"
						/>
					</svg>
					<h3>API</h3>
					<ul>
						<p>Insert $\lt key, value\gt$</p>
						<p>Query for $key \in [low, high)$</p>
					</ul>
					<hr/>
					<h3>Available Structures</h3>
					<p>
						Binary Tree, Linked List, Sorted Array
					</p>
				</section>

				<section>
					<img src="graphics/vizini.jpg" />
					<h3>You guessed wrong!</h3>
					<p style="font-size: smaller;">(unless you didn't)</p>
				</section>

				<section>
					<svg width="600" height="600">
						<image x="0" y="0" width="600" height="600"
						       xlink:href="figs/DSTradeoffs.svg"
						/>
					</svg>
	     </section>

				<section>
					<h2>Other Tradeoffs</h2>
					<ul>
						<li>Support for Threads</li>
						<li>Lookup vs Full Scan vs Range Scan</li>
						<li>Optimal Update Size</li>
					</ul>
	      </section>

				<section>
					<svg width="400" height="400" style="float: right">
						<image x="0" y="0" width="400" height="400"
						       xlink:href="figs/DSTradeoffs.svg"
						/>
						<g class="fragment" data-fragment-index="2">
							<g class="fragment fade-out" data-fragment-index="4">
								<text x="105" y="97" style="font-weight: bold; fill: red;">[Best]</text>
							</g>
						</g>
						<g class="fragment" data-fragment-index="4">
							<g class="fragment fade-out" data-fragment-index="6">
								<text x="140" y="224" style="font-weight: bold; fill: red;">[Best]</text>
							</g>
						</g>
						<g class="fragment" data-fragment-index="6">
							<g class="fragment fade-out" data-fragment-index="8">
								<text x="285" y="280" style="font-weight: bold; fill: red;">[Best]</text>
							</g>
						</g>
						<g class="fragment" data-fragment-index="8">
							<text x="95" y="97" style="font-weight: bold; fill: red;">[Best?]</text>
							<text x="130" y="224" style="font-weight: bold; fill: red;">[Best?]</text>
						</g>
					</svg>
					<h3>Interactive Analytics</h3>
					<ol style="margin-top: 20px;">
						<li class="fragment" data-fragment-index="1" style="margin-top: 10px;">User Opens CSV File</li>
						<li class="fragment" data-fragment-index="3"  style="margin-top: 10px;">User Poses Query as File Loads</li>
						<li class="fragment" data-fragment-index="5"  style="margin-top: 10px;">Lots More Queries</li>
						<li class="fragment" data-fragment-index="7"  style="margin-top: 10px;">User Adds More Data</li>
					</ol>
					<p class="fragment" data-fragment-index="9" style="clear: right; font-weight: bold;">
						Even in a single session, there may be more than one "optimal" data structure.
					</p>
	      </section>
			</section>

			<section>
				<section>
					<h2>State of the Art</h2>
					<aside class="notes">
						<p>1. Jack-of-all-trades, master of none.  e.g., B+Tree, LSM Tree</p>
						<p>2. a) Expensive, b) Need to be able to ttdict workload shifts before needed</p>
						<p>3. Mountain of programmer effort</p>
					</aside>
				</section>

				<section>
					<img src="graphics/victorinox.jpg" height="400px" />
					<aside class="notes">Jack of all trades data structure (e.g., B+Tree or LSM Trees).  Classic workhorses, but they have their shortcomings --- B+: slow updates, LSM: write amplification, slow reads</aside>
					<attribution>Victorinox</attribution>
				</section>

				<section>
					<img src="graphics/44922_large.jpg" style="vertical-align: middle;" width="300px"/><span class="fragment"  style="text-align: middle;">➔<img src="graphics/garbage.jpg"  style="vertical-align: middle;" width="300px"/></span>
					<aside class="notes">Keep re-building structures for different workloads</aside>
				</section>

				<section>
					<img src="graphics/bespoke.png" height="400px" />
					<aside class="notes">Bespoke data structures</aside>
				</section>

				<section>
					<ul>
						<li>Jack of All Trads Datastructures <div>(e.g., B+ Tree, LSM Tree)</div></li>
						<li>Keep re-building structures for different workloads<div>(e.g., <span style="font-family: Courier">DROP INDEX</span> ➔ <span style="font-family: Courier">LOAD TABLE</span> ➔ <span style="font-family: Courier">CREATE INDEX</span>)</div></li>
						<li>Bespoke data structures<div>(e.g., KD+R*++#N-Tree; Author et.al. SIGMOD 2023)</div></li>
					</ul>
					<p style="font-size: smaller; margin-top: 60px" class="fragment">No way to gracefully transition between different tradeoffs.</p>
				</section>

				<section>
					<ol>
						<li class="fragment" style="margin-top: 30px">What does it mean for a data structure to be halfway between a Binary Tree and a Linked List?</li>
						<li class="fragment" style="margin-top: 30px">How would we access and manipulate such a data structure?</li>
						<li class="fragment" style="margin-top: 30px">When and how should a data structure transition?</li>
						<li class="fragment" style="margin-top: 30px">How do we automatically generate bespoke data-structures?</li>
					</ol>
				</section>
				<section>
					<h3>Incremental Structure Transitions</h3>
					<ol>
						<li style="color: black;">A Universal Instance Language</li>
						<li style="color: grey;">Realizing Universal Data Structures</li>
						<li style="color: grey;">Just-In-Time Data Structure Optimization</li>
						<li style="color: grey;">Optimization Policy Discovery</li>
					</ol>
				</section>
				<!-- Get deeper in to the problem:
				  - Dynamic workloads: (E.g., Data Loading -> Data access || Regional Differences in Access Patterns)
				  - State of the art:
				      - Option 1: Trash the old data structure and build a new one (and you're left twiddling your thumbs while you wait)
				      - Option 2: Design a data structure *specifically* for your transitional needs (e.g., LSM trees)
				  - What would it take to allow a data structure to incrementally transition from one set of tradeoffs to another one?
				  - Challenge: We have no way to work with, or even to describe such an "intermediate" data structure in the middle of transitioning
				  - Talk outline:
				      - A Universal Data Structure Instance description language
				      - Accessing and Modifying Static Data Structures
				      - Optimizing Static Data Structures
				      - Dynamic Data
				      - Policy Discovery and Optimization
				 -->
			</section>

			<section>
				<section>
					<p style="text-align: center">Logical Content</p>
					<p style="text-align: center" class="fragment" data-fragment-index="1">↑</p>
					<p style="text-align: center" class="fragment" data-fragment-index="1">Physical Structure</p>
				</section>
				<section>
					<p style="text-align: center">A <i>Bag</i> of $\lt Key \rightarrow Value \gt$ Pairs</p>
					<p style="text-align: center">↑</p>
					<p style="text-align: center" class="fragment grow">One Physical Realization of the Bag</p>
				</section>

				<section>
					<b>Core Idea</b>: A grammar of physical realizations.
				</section>

				<section>
					<h3>Primitives</h3>
					<ul>
						<li><span style="width: 250px; display: inline-block">A Key ($\mathbb K$)</span><span style="text-align: right; display: inline-block; width: 400px;">Any ordered set</span></li>
						<li><span style="width: 250px; display: inline-block">A Record ($\mathbb R$)</span><span style="text-align: right; display: inline-block; width: 400px;">A key/value pair</span></li>
						<li><span style="width: 250px; display: inline-block">A Pointer ($\mathbb P$)</span><span style="text-align: right; display: inline-block; width: 400px;">Logically a bag of records</span></li>
					</ul>
				</section>

				<section>
					<h3>Grammar</h3>
					<p style="text-align: left; width: 300px; margin-left: auto; margin-right: auto; margin-top: 50px">
						\begin{align}
						\mathbb P :=\; &|\;Sng(\mathbb R) \\
						 &|\uplus(\mathbb P, \mathbb P) \\
						 &|\;BT_{\mathbb K}(\mathbb P, \mathbb P) \\
						 &|\;Array_N(\mathbb R \ldots \mathbb R) \\
						 &|\;Sorted_N(\mathbb R \ldots \mathbb R)
						 \end{align}
					</p>
				</section>

				<section>
					<h3>Singleton</h3>
					<table>
						<tr><td style="text-align: right;">Visual:</td><td><img src="figs/Singleton.svg" style="margin: 0px;"/></td></tr>
						<tr><td style="text-align: right;">UIL:</td><td>$Sng(x: \mathbb R)$</td></tr>
						<tr><td style="text-align: right;">Logical:</td><td>$\{ x \}$</td></tr>
					</table>
				</section>

				<section>
					<h3>Union Node</h3>
					<table>
						<tr><td style="text-align: right;">Visual:</td><td><img src="figs/Union.svg" style="margin: 0px;"/></td></tr>
						<tr><td style="text-align: right;">UIL:</td><td>$\uplus(a: \mathbb P, b: \mathbb P)$</td></tr>
						<tr><td style="text-align: right;">Logical:</td><td>$a \uplus b$</td></tr>
					</table>
				</section>

				<section>
					<h3>Combining Primitives: Linked List</h3>
					<table>
						<tr><td style="text-align: right;">Visual:</td><td><img src="figs/LinkedList.svg" style="margin: 0px;"/></td></tr>
						<tr><td style="text-align: right;">UIL:</td><td>\begin{align}LL :=\;&|\;U(Sng(x: \mathbb R), a: LL)\\&|\;Sng(x)\end{align}</td></tr>
						<tr><td style="text-align: right;">Logical:</td><td>$\{ x \} \uplus a$ or $\{ x \}$</td></tr>
					</table>
					<p class="fragment" data-fragment-index="1" style="clear: right; font-weight: bold;">
						Many existing data structures can be expressed as syntactic restrictions on this grammar.
					</p>
				</section>

				<section>
					<h3>Extension 1: Semantic Constraints</h3>
					<table>
						<tr><td style="text-align: right;">Visual:</td><td><img src="figs/BTreeNode.svg" style="margin: 0px;"/></td></tr>
						<tr><td style="text-align: right;">UIL:</td><td>$BT_{k: \mathbb K}(a: \mathbb P, b: \mathbb P)$</td></tr>
						<tr><td style="text-align: right;">Logical:</td><td>$a \uplus b$</td></tr>
						<tr><td style="text-align: right;">Constraint:</td><td>$\forall r \in a: r.key \lt K$<br/>$\forall r \in b: r.key \geq K$</td></tr>
					</table>
					<p class="fragment" data-fragment-index="1" style="clear: right; font-weight: bold;">
						Nodes can define syntactic constraints over the logical contents of descendents.
					</p>
				</section>

				<section>
					<h3 style="margin-bottom: 60px;">Combining Primitives: Binary Tree</h3>
					\begin{align}
					  BinTree :=\;&|\;BT_{k: \mathbb K}(a: BinTree, b: BinTree)\\&|\;Sng(x: \mathbb R)
					\end{align}
				</section>

				<section>
					<h3>Extension 2: Repetition</h3>
					<table>
						<tr><td style="text-align: right;">Visual:</td><td><img src="figs/Array.svg" style="margin: 0px;"/></td></tr>
						<tr><td style="text-align: right;">UIL:</td><td>$Array_{N : \mathbb N}(x_1: \mathbb R, \ldots,  x_N: \mathbb R)$</td></tr>
						<tr><td style="text-align: right;">Logical:</td><td>$\{ x_1, \ldots, x_N \}$</td></tr>
					</table>
					<p class="fragment" data-fragment-index="1" style="clear: right; font-weight: bold;">
						Can repeat structures for efficiency (e.g., B+Tree vs BinTree)
					</p>
				</section>

				<section>
					<h3>Combining Extensions</h3>
					<table>
						<tr><td style="text-align: right;">Visual:</td><td><img src="figs/SortedArray.svg" style="margin: 0px;"/></td></tr>
						<tr><td style="text-align: right;">UIL:</td><td>$Sorted_{N : \mathbb N}(x_1: \mathbb R, \ldots,  x_N: \mathbb R)$</td></tr>
						<tr><td style="text-align: right;">Logical:</td><td>$\{ x_1, \ldots, x_N \}$</td></tr>
						<tr><td style="text-align: right;">Constraint:</td><td>$\forall i \lt j: x_i.key \leq x_j.key$</td></tr>
					</table>
				</section>

				<section>
					<img src="figs/Legend.png">
				</section>

				<section>
					<h3>Example</h3>
					<svg width="300" height="300">
						<image  x="0" y="0"
										width="300" height="300"
										xlink:href="figs/Hybrid.svg"/>
						<g>
							<polygon points="0,300 150,300 150,218 240,218 240,300 300,300 300,0 0,0" style="fill:white; stroke-width:0" class="fragment fade-out" data-fragment-index="1"/>
							<polygon points="0,300 150,300 150,218 200,200 235,120 300,0 0,0" style="fill:white; stroke-width:0" class="fragment fade-out" data-fragment-index="2"/>
							<polygon points="0,170 80,170 125,100 150,70 100,0 0,0" style="fill:white; stroke-width:0" class="fragment fade-out" data-fragment-index="3"/>
						</g>
					</svg>
					<p style="font-size: smaller;">
						<span class="fragment" data-fragment-index="3">$\uplus(Sng(1), $</span>
							<span class="fragment" data-fragment-index="2">$\uplus(Array_2(2,4,7), $</span>
								<span class="fragment" data-fragment-index="1">$BT_6($</span>
								  $Sorted_2(3, 5)$
								<span class="fragment" data-fragment-index="1">$, Sng(6))$</span>
							<span class="fragment" data-fragment-index="2">$)$</span>
						<span class="fragment" data-fragment-index="3">$)$</span>
					</p>
				</section>

				<!-- A Universal Data Structure Instance description language
					- Initial goal: Describing an instance of a data structure at one specific point in time.
				  - Definitions:
				       - Logical vs Physical state (set of records vs the way that those records are laid out on disk)
					- Approach: Commonalities between different data structures... standard patterns
					     - Records
					     - Pointers (to a address of a physical entity that describes a collection of records)
					     - Semantics (explicit or implicit properties of the physical layout that can improve queries over the ds)
					- Specifics: Specific building blocks
					     - [X] U [Y]
					     - Record
					     - Generalizations:
					         - Repetition (requires array notation)
					         - Semantic Extensions (Binary Tree, Hash Table, Sorted Array)
					- Language (CFG for data structure instances)
					- Examples (borrow from paper?)
				-->
			</section>

			<section>
				<section>
					<h3>Incremental Structure Transitions</h3>
					<ol>
						<li style="color: grey;">A Universal Instance Language</li>
						<li style="color: black;">Realizing Universal Data Structures</li>
						<li style="color: grey;">Just-In-Time Data Structure Optimization</li>
						<li style="color: grey;">Optimization Policy Discovery</li>
					</ol>
				</section>
				<section>
					<h3>Universal Data Structures</h3>
					<ul>
						<li>Physiological Morphisms<ul>
							<li>Queries</li>
							<li>Updates</li>
						</ul></li>
						<li>Purely Physical Morphisms<ul>
							<li>Optimization</li>
						</ul></li>
					</ul>
				</section>

				<section>
					<h3 style="margin-bottom: 60px">Example: Range Queries</h3>
					$Q_{\ell,h} : \mathbb P \mapsto \mathbb P$
					<p>Return tuples in $[\ell,h)$</p>
				</section>

				<section style="font-size: smaller">
					\begin{align}
						Q_{\ell,h}(\uplus(a, b)) \rightarrow &\;\uplus(Q_{\ell,h}(a), Q_{\ell,h}(b))\\[10px]
						Q_{\ell,h}(BT_k(a, b)) \rightarrow &\;
						  \begin{cases}
						    Q_{\ell,h}(a) & \text{if } h \lt k\\
						    Q_{\ell,h}(b) & \text{if } \ell \geq k\\
						    BT_k(Q_{\ell,h}(a), Q_{\ell,h}(b)) & \text{otherwise}
						  \end{cases}\\[10px]
						Q_{\ell,h}(Array_N(x_1,\ldots,x_N)) \rightarrow &\;
							Array_{|Y|}(y_1, \ldots, y_{|Y|}) \\&\;\;\text{ s.t. } Y = \{\;x_i\;|\;\ell \leq x_i \lt h\;\}\\[10px]
						Q_{\ell,h}(Sorted_N(x_1,\ldots,x_N)) \rightarrow &\;
							Sorted_{j-i+1}(x_i, \ldots, x_j) \\&\;\;\text{ s.t. } i = argmin_i(x_i \geq \ell); \\&\;\;\;\;\;\;\;\;j = argmax_(x_j \lt h);
					\end{align}
				</section>

				<section>
					<h3 style="margin-bottom: 60px">Insert</h3>
					$$Insert_{\mathbb P}: \mathbb P \rightarrow \mathbb P$$
					<p style="font-weight: bold; margin-top: 60px" class="fragment">Do the least work possible (optimize later)</p>
					<div class="fragment" style="margin-top: 60px">
						$$Insert_{a}(old) \rightarrow \uplus(old, a)$$
					</div>
				</section>
				<!-- Accessing Static Data Structures
					- Queries as morphisms on the language
						- Lookups
						- Scans
						- Enumeration via "Pop"
						- Implementing "Pop lowest"
				-->
				<!-- Dynamic Data
				  - Definition: Physical vs logical state transitions: Dynamic data is a physiological transition.
					- Do the minimum work possible: Linked List
				-->
			</section>

			<section>
				<section>
					<h3>Incremental Structure Transitions</h3>
					<ol>
						<li style="color: grey;">A Universal Instance Language</li>
						<li style="color: grey;">Realizing Universal Data Structures</li>
						<li style="color: black;">Just-In-Time Data Structure Optimization</li>
						<li style="color: grey;">Optimization Policy Discovery</li>
					</ol>
				</section>

				<section>
					<p><b>Core Idea:</b> Physical layout as a compiler optimization problem.	</p>
				</section>

				<section>
					<h3>Example: Organize A Hybrid Data Structure</h3>
					<img src="figs/Pushdown-Before.svg" style="vertical-align: middle;"/>
					<span class="fragment" data-fragment-index="1">➔<img src="figs/Pushdown-Step1.svg" style="vertical-align: middle;" /></span>
					<span class="fragment" data-fragment-index="3">➔<img src="figs/Pushdown-Step2.svg" style="vertical-align: middle;" /></span>
					<div class="fragment" data-fragment-index="2" style="font-size: smaller; margin-top: 30px">
						$$\uplus(Sng(x), BT_k(a, b)) \rightarrow \begin{cases} BT_k(\uplus(Sng(x), a), b) & \text{if } x.key \lt k\\ BT_k(a, \uplus(Sng(x), b)) & \text{if } x.key \geq k\end{cases}$$
					</div>
					<div class="fragment" data-fragment-index="3" style="font-size: smaller; margin-top: 30px">
						$$\uplus(Sng(x), Sorted_N(y_1, \ldots, y_N) \rightarrow Sorted_N(y_1, \ldots, y_i, x, y_{i+1}, \ldots y_N)$$
						$$\text{ where }y_i.key \leq x.key \leq y_{i+1}.key$$
					</div>
				</section>

				<section>
					<h3>Rewrites</h3>

					<p>A pattern/replacement pair.</p>

					<ul>
						<li>Crack-Array</li>
						<li>Sort-Array</li>
						<li>Sort-Merge</li>
						<li>Pushdown-Array</li>
						<li>Pushdown-BT</li>
						<li>Pushdown-Sorted</li>
						<li>...</li>
					</ul>
				</section>

				<section>
					<h3>Events</h3>

					<p>A trigger for applying a rewrite.</p>

					<ul>
						<li>Before-Scan</li>
						<li>After-Scan</li>
						<li>Before-Visit</li>
						<li>After-Visit</li>
						<li>Before-Insert</li>
						<li>After-Insert</li>
						<li>Idle-Tick</li>
					</ul>
				</section>

				<section>
					<h3 style="margin-bottom:50px">Policies (Take 1)</h3>
					<p>A set of Rewrite/Event pairs.</p>

					<div style="margin-top:50px; margin-bottom: 50px;">
						<ul>
							<li>Cracker (Implements [Idreos et.al.-CIDR 2007])</li>
							<li>Adaptive Merge (Implements [Graefe/Kano-EDBT 2010])</li>
							<li>Swap (Heuristic Hybrid: Switch after 2000 events)</li>
							<li>Transition (Heuristic Hybrid: Gradient from 1-3k events)</li>
						</ul>
					</div>

					<attribution><a href="https://odin.cse.buffalo.edu/papers/2015/CIDR-jitd-final.pdf">[Kennedy/Ziarek-CIDR 2015]</a>; <a href="https://github.com/UBOdin/jitd">https://github.com/UBOdin/jitd</a></attribution>
				</section>

				<section>
					<h3>The Entire Transition Policy</h3>
					<pre style="width: 50%"><code class="java" style="font-size: 40%; line-height: 120%;">
package jitd;

import java.util.*;

public class TransitionMode extends Mode {
  int stepsTotal;
  int stepsTaken = 0;
  Random rand = new Random();
  Mode source, target;

  public TransitionMode(Mode source, Mode target, int steps)
  {
    this.stepsTotal = steps;
    this.source = source;
    this.target = target;
  }

  public Mode pick()
  {
    stepsTaken++;
    if(rand.nextInt(stepsTotal) < stepsTaken){
      return target;
    } else {
      return source;
    }
  }

  public KeyValueIterator scan(Driver driver, long low, long high)
  {
    return pick().scan(driver, low, high);
  }
  public void insert(Driver driver, Cog values)
  {
    pick().insert(driver, values);
  }
  public void idle(Driver driver)
  {
    pick().idle(driver);
  }
}
					</code></pre>
					<p>(40 lines of java)</p>
				</section>

				<section>
					<h3>Cracker Policy</h3>
					<img src="results/1g_cracker_1write.png" height="400px" />
					<p class="fragment">(incrementally improving performance)</p>
				</section>

				<section>
					<h3>Adaptive Merge Policy</h3>
					<img src="results/1g_merge_1write.png" height="400px" />
					<p class="fragment">(first read: 33s; bimodal: merge vs already merged)</p>
				</section>

				<section>
					<h3>Swap Policy</h3>
					<img src="results/1g_swap_1write.png" height="400px" />
					<p class="fragment">(can arbitrarilly switch to a different policy)</p>
				</section>

				<section>
					<h3>Transition Policy</h3>
					<img src="results/1g_transition_1write.png" height="400px" />
					<p class="fragment">(can have two policies running simultaneously in parallel)</p>
				</section>

				<section>
					<p>Universal data structures allow us to <br/> hybridize policies "for free".</p>
				</section>
				<!-- Optimizing Static Data Structures
				  - Now we're talking about purely physical state transitions
					- Pattern + Replacement Language
					- Events
					- Examples
					   - Cracker Index
					   - Splay Tree
					   - Lazy BTree
				-->
			</section>

			<section>
				<section>
					<h3>Policies (Take 2)</h3>
					<p style="margin-top: 80px;"><b>Core Idea:</b> Physical layout as a <i>just-in-time</i> compiler optimization problem.</p>
				</section>

				<section>
					<h3 style="margin-bottom: 60px">Just-in-Time Data Structures</h3>
   				<img src="graphics/Interface.png" height="200px" style="float: right; padding-top: 20px;">
   				<p style="width: 460px;">A background thread incrementally optimizes the data structure.</p>
   				<p style="width: 460px;">Continuous availability while performance improves.</p>
				</section>

				<section>
					<h3>Optimizer Work Loop</h3>
					<ol>
						<li class="fragment" style="margin-top: 30px">Which rewrite to apply?</li>
						<li class="fragment" style="margin-top: 30px">On what to apply it?</li>
					</ol>
					<p class="fragment" style="margin-top: 50px">A priority queue keeps track of available rewrite patterns</p>
				</section>

				<section>
					<h3>Example: A Load-Time Availabile Index</h3>
					<p><b>Input:</b> An Unsorted Array</p>
					<dl>
						<dt style="margin-top: 30px">Crack-in-Two (a.k.a. Radix-Partition)</dt>
						<dd>Fast ($O(N)$), but only small improvement</dd>
						<dd class="fragment" style="padding-left: 30px">... but can be recursively improved</dd>
						<dt style="margin-top: 30px">Sort</dt>
						<dd>Slow ($O(N\cdot \log(N))$), but big improvement</dd>
					</dl>
				</section>

				<section>
					<h3>Crack</h3>
						\begin{align}
						Array_N(x_1, \ldots, x_N) \rightarrow BT_{x_j.key}(\;\;&Array_{|Y|}(y_1, \ldots, y_{|Y|}), \\&Array_{|Z|}(z_1, \ldots, z_{|Z|})\;\;)
						\end{align}
						<p style="font-size: smaller">
							where $j \in [1, N]$, $Y = \{x_i | x_i.key \lt x_j\}$, $Z = \{x_i | x_i.key \geq x_j\}$
						</p>
					<h3 style="margin-top: 60px">Sort</h3>
						$$Array_N(x_1, \ldots, x_N) \rightarrow Sorted_N(x_{f(1)}, \ldots, x_{f(N)})$$
						<p style="font-size: smaller">
							where $f : [N] \rightarrow [N]$ and $x_{f(i)} \leq x_{f(i+1)}$
						</p>
				</section>

				<section>
					<h3>Crack</h3>
					<p><b>Deqeue: </b> 1x Array</p>
					<p><b>Enqueue: </b> 2x Array</p>

					<h3 style="margin-top: 60px">Sort</h3>
					<p><b>Deqeue: </b> 1x Array</p>
					<p><b>Enqueue: </b> 1x Sorted Array</p>
				</section>

				<section>
					<img src="figs/Split-Step1.svg">
					<p class="fragment grow"><b>Option 1:</b> Crack($Array_8(1 \ldots 8)$)</p>
					<p><b>Option 2:</b> Sort($Array_8(1 \ldots 8)$)</p>
				</section>

				<section>
					<img src="figs/Split-Step2.svg">
					<p><b>Option 1:</b> Crack($Array_4(1 \ldots 4)$)</p>
					<p><b>Option 2:</b> Sort($Array_4(1 \ldots 4)$)</p>
					<p><b>Option 3:</b> Crack($Array_4(5 \ldots 8)$)</p>
					<p class="fragment grow"><b>Option 4:</b> Sort($Array_4(5 \ldots 8)$)</p>
				</section>

				<section>
					<img src="figs/Split-Step3.svg">
					<p class="fragment grow"><b>Option 1:</b> Crack($Array_4(1 \ldots 4)$)</p>
					<p><b>Option 2:</b> Sort($Array_4(1 \ldots 4)$)</p>
				</section>
			</section>

			<section>
				<section>
					<h3>Incremental Structure Transitions</h3>
					<ol>
						<li style="color: grey;">A Universal Instance Language</li>
						<li style="color: grey;">Realizing Universal Data Structures</li>
						<li style="color: grey;">Just-In-Time Data Structure Optimization</li>
						<li style="color: black;">Optimization Policy Discovery</li>
					</ol>
				</section>

				<section>
					<p>How to prioritize rewrites?</p>
				</section>

				<section>
					<h3>Cost Model</h3>
					<table>
					<tr>
						<td style="padding-top: 60px; text-align: right; font-weight: bold;">$Array_N$:</td>
						<td style="text-align: left">$(300 \cdot N)$ ns to scan for 1 record</td></tr>
					<tr>
						<td style="padding-top: 60px; text-align: right; font-weight: bold;">$Sorted_N$:</td>
						<td style="text-align: left">$(175 \cdot \log N)$ ns to scan for 1 record</td></tr>
					<tr>
						<td style="padding-top: 60px; text-align: right; font-weight: bold;">$BT$:</td>
						<td style="text-align: left">Negligible</td></tr>


					</table>

					<p style="margin-top: 60px; font-weight: bold" class="fragment">Measure, then compute expected utility of static states.</p>
				</section>

				<section>
					<h3>Utility</h3>
					<ol>
						<li style="margin-top: 40px">Throughput</li>
						<li style="margin-top: 40px">(Negative) Latency</li>
						<li style="margin-top: 40px">Time spent with latency below 300ms</li>
					</ol>
				</section>

				<section>
					<h3>Heuristic: Sort Below Threshold Size</h3>
					<img src="results/Predictions.png" height="500px">
					<!-- <p class="fragment">Short-term value vs long-term performance.</p> -->
				</section>

				<section>
					<h3>Deriving Policies</h3>
					<ol>
						<li style="margin-top: 40px">Start with a heuristic and optimize parameters.<ul style="margin-top: 0px">
							<li>e.g., Pick a threshold to sort at.</li></ul>
						</li>
						<li style="margin-top: 40px">Model the expected cumulative utility of each candidate rewrite<ul style="margin-top: 0px">
							<li>e.g., Priority queue of Array nodes remaining.</li></ul>
						</li>
					</ol>
				</section>
				<!-- Policy Discovery and Optimization
					- Purely Heuristic Design: Cracker Index / Transition Policy / Splay Trees
					- Assisted Discovery: Parameterized Search Space + Cost Model
					- Autonomous Discovery:
						- Simulation-Based
						- (Likely Expensive) Chase-style
				-->
			</section>

			<section>
				<h3>Just-in-Time Data Structures</h3>
				<ul>
					<li style="margin-top: 40px">The Universal Instance Language can describe the intermediate state of a data structure in transition.</li>
					<li style="margin-top: 40px">UIL + localized rewrite rules can emulate the behaviors of existing data structures and be hybridized.</li>
					<li style="margin-top: 40px">Simulation + Cost-Analysis can be used to derive policies to drive direct rewrites.</li>
				</ul>
				<p class="fragment">Questions?</p>
			</section>

		</div></div>

		<script src="../reveal.js-3.5.0/lib/js/head.min.js"></script>
		<script src="../reveal.js-3.5.0/js/reveal.js"></script>

		<script>

			// Full list of configuration options available at:
			// https://github.com/hakimel/../reveal.js#configuration
			Reveal.initialize({
				controls: false,
				progress: true,
				history: true,
				center: true,
				slideNumber: true,

				transition: 'fade', // none/fade/slide/convex/concave/zoom

				// Optional ../reveal.js plugins
				dependencies: [
					{ src: '../reveal.js-3.5.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
					{ src: '../reveal.js-3.5.0/plugin/math/math.js',
						condition: function() { return true; },
						mathjax: '../reveal.js-3.5.0/js/MathJax.js'
					 },
					{ src: '../reveal.js-3.5.0/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
					{ src: '../reveal.js-3.5.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
					{ src: '../reveal.js-3.5.0/plugin/highlight/highlight.js', async: true, condition: function() { return !!document.querySelector( 'tt code' ); }, callback: function() { hljs.initHighlightingOnLoad(); } },
					{ src: '../reveal.js-3.5.0/plugin/zoom-js/zoom.js', async: true },
					{ src: '../reveal.js-3.5.0/plugin/notes/notes.js', async: true }
				]
			});

		</script>

	</body>
</html>