Website/slides/talks/2017-4-Tour-JITDs/index.html

862 lines
32 KiB
HTML

<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Just-in-Time Data Structures</title>
<meta name="description" content="Small Data">
<meta name="author" content="Oliver Kennedy">
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
<link rel="stylesheet" href="../reveal.js-3.5.0/css/reveal.css">
<link rel="stylesheet" href="ubodin.css" id="theme">
<!-- Code syntax highlighting -->
<link rel="stylesheet" href="../reveal.js-3.5.0/lib/css/zenburn.css">
<style>
table.panelists td.name {
text-align: right;
}
table.panelists td.affiliation {
font-size: smaller;
text-decoration: italic;
text-align: left;
}
</style>
<!-- Printing and PDF exports -->
<script>
var link = document.createElement( 'link' );
link.rel = 'stylesheet';
link.type = 'text/css';
link.href = window.location.search.match( /print-pdf/gi ) ? '../reveal.js-3.5.0/css/print/pdf.css' : '../reveal.js-3.5.0/css/print/paper.css';
document.getElementsByTagName( 'head' )[0].appendChild( link );
</script>
<!--[if lt IE 9]>
<script src="../reveal.js-3.5.0/lib/js/html5shiv.js"></script>
<![endif]-->
</head>
<body>
<div class="reveal">
<div class="header">
<!-- Any Talk-Specific Header Content Goes Here -->
<center>
<a href="http://www.buffalo.edu" target="_blank">
<img src="../graphics/logos/ub-1line-ro-white.png" height="20"/>
</a>
</center>
</div>
<div class="footer">
<!-- Any Talk-Specific Footer Content Goes Here -->
<div style="float: left; margin-top: 15px; ">
Exploring <u><b>O</b></u>nline <u><b>D</b></u>ata <u><b>In</b></u>teractions
</div>
<a href="https://odin.cse.buffalo.edu" target="_blank">
<img src="../graphics/logos/odin-1line-white.png" height="40" style="float: right;"/>
</a>
</div>
<div class="slides">
<!-- Any section element inside of this container is displayed as a slide -->
<section>
<!-- Credits... introduce everyone, etc... -->
<section>
<h2>Just-in-Time Data Structures</h2>
<h4>Oliver Kennedy</h4>
<h4><a href="mailto:okennedy@buffalo.edu">okennedy@buffalo.edu</a></h4>
</section>
<section>
<table>
<tr>
<td width="150px"><img src="graphics/saurav.jpg" height="150px"/></td>
<td width="150px"><img src="graphics/darshana.jpg" height="150px"/></td>
<td width="150px"><img src="graphics/hank.jpg" height="150px"/></td>
<td width="150px"><img src="graphics/ankur.jpg" height="150px"/></td>
<td width="150px"><img src="graphics/luke.png" height="150px"/></td>
</tr>
<tr style="font-size: smaller;">
<td>Saurav Singhi</td>
<td>Darshana Balakrishnan<br/</td>
<td>Hank Lin</td>
<td>Ankur Upadhyay</td>
<td>Lukasz Ziarek</td>
</tr>
<tr style="font-size: small;">
<td>(PhD In Progress)</td>
<td>(MS In Progress)</td>
<td>(BS 2017)</td>
<td>(MS 2014)</td>
<td>(Prof @ UB)</td>
</tr>
</table>
<p style="font-size: small">With support from NSF Awards IIS-1617586 and CNS-1629791<p>
</section>
</section>
<section>
<!-- Establish the setting:
- Data Structures are a game of trade-offs.
- Which structure is best? Well, it depends!
- Specific data structures lock you in to a specific set of tradeoffs:
- Read vs Write
- Batch vs Individual Updates
- Scan vs Lookup vs Range Queries
-->
<section>
<img src="graphics/conan.png" />
<h3>What is best in life?</h3>
<p class="fragment">(for organizing your data)</p>
<attribution>&copy; Universal Pictures</attribution>
</section>
<section>
<svg width="280" height="500" style="float: right">
<image x="55" y="20" width="180" height="180"
xlink:href="figs/ExampleDSes-BinTree.svg"
/>
<image x="20" y="185" width="250" height="170"
xlink:href="figs/ExampleDSes-LinkedList.svg"
/>
<image x="75" y="290" width="145" height="145"
xlink:href="figs/ExampleDSes-SortedArray.svg"
/>
</svg>
<h3>API</h3>
<ul>
<p>Insert $\lt key, value\gt$</p>
<p>Query for $key \in [low, high)$</p>
</ul>
<hr/>
<h3>Available Structures</h3>
<p>
Binary Tree, Linked List, Sorted Array
</p>
</section>
<section>
<img src="graphics/vizini.jpg" />
<h3>You guessed wrong!</h3>
<p style="font-size: smaller;">(unless you didn't)</p>
</section>
<section>
<svg width="600" height="600">
<image x="0" y="0" width="600" height="600"
xlink:href="figs/DSTradeoffs.svg"
/>
</svg>
</section>
<section>
<h2>Other Tradeoffs</h2>
<ul>
<li>Support for Threads</li>
<li>Lookup vs Full Scan vs Range Scan</li>
<li>Optimal Update Size</li>
</ul>
</section>
<section>
<svg width="400" height="400" style="float: right">
<image x="0" y="0" width="400" height="400"
xlink:href="figs/DSTradeoffs.svg"
/>
<g class="fragment" data-fragment-index="2">
<g class="fragment fade-out" data-fragment-index="4">
<text x="105" y="97" style="font-weight: bold; fill: red;">[Best]</text>
</g>
</g>
<g class="fragment" data-fragment-index="4">
<g class="fragment fade-out" data-fragment-index="6">
<text x="140" y="224" style="font-weight: bold; fill: red;">[Best]</text>
</g>
</g>
<g class="fragment" data-fragment-index="6">
<g class="fragment fade-out" data-fragment-index="8">
<text x="285" y="280" style="font-weight: bold; fill: red;">[Best]</text>
</g>
</g>
<g class="fragment" data-fragment-index="8">
<text x="95" y="97" style="font-weight: bold; fill: red;">[Best?]</text>
<text x="130" y="224" style="font-weight: bold; fill: red;">[Best?]</text>
</g>
</svg>
<h3>Interactive Analytics</h3>
<ol style="margin-top: 20px;">
<li class="fragment" data-fragment-index="1" style="margin-top: 10px;">User Opens CSV File</li>
<li class="fragment" data-fragment-index="3" style="margin-top: 10px;">User Poses Query as File Loads</li>
<li class="fragment" data-fragment-index="5" style="margin-top: 10px;">Lots More Queries</li>
<li class="fragment" data-fragment-index="7" style="margin-top: 10px;">User Adds More Data</li>
</ol>
<p class="fragment" data-fragment-index="9" style="clear: right; font-weight: bold;">
Even in a single session, there may be more than one "optimal" data structure.
</p>
</section>
</section>
<section>
<section>
<h2>State of the Art</h2>
<aside class="notes">
<p>1. Jack-of-all-trades, master of none. e.g., B+Tree, LSM Tree</p>
<p>2. a) Expensive, b) Need to be able to ttdict workload shifts before needed</p>
<p>3. Mountain of programmer effort</p>
</aside>
</section>
<section>
<img src="graphics/victorinox.jpg" height="400px" />
<aside class="notes">Jack of all trades data structure (e.g., B+Tree or LSM Trees). Classic workhorses, but they have their shortcomings --- B+: slow updates, LSM: write amplification, slow reads</aside>
<attribution>Victorinox</attribution>
</section>
<section>
<img src="graphics/44922_large.jpg" style="vertical-align: middle;" width="300px"/><span class="fragment" style="text-align: middle;"><img src="graphics/garbage.jpg" style="vertical-align: middle;" width="300px"/></span>
<aside class="notes">Keep re-building structures for different workloads</aside>
</section>
<section>
<img src="graphics/bespoke.png" height="400px" />
<aside class="notes">Bespoke data structures</aside>
</section>
<section>
<ul>
<li>Jack of All Trads Datastructures <div>(e.g., B+ Tree, LSM Tree)</div></li>
<li>Keep re-building structures for different workloads<div>(e.g., <span style="font-family: Courier">DROP INDEX</span><span style="font-family: Courier">LOAD TABLE</span><span style="font-family: Courier">CREATE INDEX</span>)</div></li>
<li>Bespoke data structures<div>(e.g., KD+R*++#N-Tree; Author et.al. SIGMOD 2023)</div></li>
</ul>
<p style="font-size: smaller; margin-top: 60px" class="fragment">No way to gracefully transition between different tradeoffs.</p>
</section>
<section>
<ol>
<li class="fragment" style="margin-top: 30px">What does it mean for a data structure to be halfway between a Binary Tree and a Linked List?</li>
<li class="fragment" style="margin-top: 30px">How would we access and manipulate such a data structure?</li>
<li class="fragment" style="margin-top: 30px">When and how should a data structure transition?</li>
<li class="fragment" style="margin-top: 30px">How do we automatically generate bespoke data-structures?</li>
</ol>
</section>
<section>
<h3>Incremental Structure Transitions</h3>
<ol>
<li style="color: black;">A Universal Instance Language</li>
<li style="color: grey;">Realizing Universal Data Structures</li>
<li style="color: grey;">Just-In-Time Data Structure Optimization</li>
<li style="color: grey;">Optimization Policy Discovery</li>
</ol>
</section>
<!-- Get deeper in to the problem:
- Dynamic workloads: (E.g., Data Loading -> Data access || Regional Differences in Access Patterns)
- State of the art:
- Option 1: Trash the old data structure and build a new one (and you're left twiddling your thumbs while you wait)
- Option 2: Design a data structure *specifically* for your transitional needs (e.g., LSM trees)
- What would it take to allow a data structure to incrementally transition from one set of tradeoffs to another one?
- Challenge: We have no way to work with, or even to describe such an "intermediate" data structure in the middle of transitioning
- Talk outline:
- A Universal Data Structure Instance description language
- Accessing and Modifying Static Data Structures
- Optimizing Static Data Structures
- Dynamic Data
- Policy Discovery and Optimization
-->
</section>
<section>
<section>
<p style="text-align: center">Logical Content</p>
<p style="text-align: center" class="fragment" data-fragment-index="1"></p>
<p style="text-align: center" class="fragment" data-fragment-index="1">Physical Structure</p>
</section>
<section>
<p style="text-align: center">A <i>Bag</i> of $\lt Key \rightarrow Value \gt$ Pairs</p>
<p style="text-align: center"></p>
<p style="text-align: center" class="fragment grow">One Physical Realization of the Bag</p>
</section>
<section>
<b>Core Idea</b>: A grammar of physical realizations.
</section>
<section>
<h3>Primitives</h3>
<ul>
<li><span style="width: 250px; display: inline-block">A Key ($\mathbb K$)</span><span style="text-align: right; display: inline-block; width: 400px;">Any ordered set</span></li>
<li><span style="width: 250px; display: inline-block">A Record ($\mathbb R$)</span><span style="text-align: right; display: inline-block; width: 400px;">A key/value pair</span></li>
<li><span style="width: 250px; display: inline-block">A Pointer ($\mathbb P$)</span><span style="text-align: right; display: inline-block; width: 400px;">Logically a bag of records</span></li>
</ul>
</section>
<section>
<h3>Grammar</h3>
<p style="text-align: left; width: 300px; margin-left: auto; margin-right: auto; margin-top: 50px">
\begin{align}
\mathbb P :=\; &|\;Sng(\mathbb R) \\
&|\uplus(\mathbb P, \mathbb P) \\
&|\;BT_{\mathbb K}(\mathbb P, \mathbb P) \\
&|\;Array_N(\mathbb R \ldots \mathbb R) \\
&|\;Sorted_N(\mathbb R \ldots \mathbb R)
\end{align}
</p>
</section>
<section>
<h3>Singleton</h3>
<table>
<tr><td style="text-align: right;">Visual:</td><td><img src="figs/Singleton.svg" style="margin: 0px;"/></td></tr>
<tr><td style="text-align: right;">UIL:</td><td>$Sng(x: \mathbb R)$</td></tr>
<tr><td style="text-align: right;">Logical:</td><td>$\{ x \}$</td></tr>
</table>
</section>
<section>
<h3>Union Node</h3>
<table>
<tr><td style="text-align: right;">Visual:</td><td><img src="figs/Union.svg" style="margin: 0px;"/></td></tr>
<tr><td style="text-align: right;">UIL:</td><td>$\uplus(a: \mathbb P, b: \mathbb P)$</td></tr>
<tr><td style="text-align: right;">Logical:</td><td>$a \uplus b$</td></tr>
</table>
</section>
<section>
<h3>Combining Primitives: Linked List</h3>
<table>
<tr><td style="text-align: right;">Visual:</td><td><img src="figs/LinkedList.svg" style="margin: 0px;"/></td></tr>
<tr><td style="text-align: right;">UIL:</td><td>\begin{align}LL :=\;&|\;U(Sng(x: \mathbb R), a: LL)\\&|\;Sng(x)\end{align}</td></tr>
<tr><td style="text-align: right;">Logical:</td><td>$\{ x \} \uplus a$ or $\{ x \}$</td></tr>
</table>
<p class="fragment" data-fragment-index="1" style="clear: right; font-weight: bold;">
Many existing data structures can be expressed as syntactic restrictions on this grammar.
</p>
</section>
<section>
<h3>Extension 1: Semantic Constraints</h3>
<table>
<tr><td style="text-align: right;">Visual:</td><td><img src="figs/BTreeNode.svg" style="margin: 0px;"/></td></tr>
<tr><td style="text-align: right;">UIL:</td><td>$BT_{k: \mathbb K}(a: \mathbb P, b: \mathbb P)$</td></tr>
<tr><td style="text-align: right;">Logical:</td><td>$a \uplus b$</td></tr>
<tr><td style="text-align: right;">Constraint:</td><td>$\forall r \in a: r.key \lt K$<br/>$\forall r \in b: r.key \geq K$</td></tr>
</table>
<p class="fragment" data-fragment-index="1" style="clear: right; font-weight: bold;">
Nodes can define syntactic constraints over the logical contents of descendents.
</p>
</section>
<section>
<h3 style="margin-bottom: 60px;">Combining Primitives: Binary Tree</h3>
\begin{align}
BinTree :=\;&|\;BT_{k: \mathbb K}(a: BinTree, b: BinTree)\\&|\;Sng(x: \mathbb R)
\end{align}
</section>
<section>
<h3>Extension 2: Repetition</h3>
<table>
<tr><td style="text-align: right;">Visual:</td><td><img src="figs/Array.svg" style="margin: 0px;"/></td></tr>
<tr><td style="text-align: right;">UIL:</td><td>$Array_{N : \mathbb N}(x_1: \mathbb R, \ldots, x_N: \mathbb R)$</td></tr>
<tr><td style="text-align: right;">Logical:</td><td>$\{ x_1, \ldots, x_N \}$</td></tr>
</table>
<p class="fragment" data-fragment-index="1" style="clear: right; font-weight: bold;">
Can repeat structures for efficiency (e.g., B+Tree vs BinTree)
</p>
</section>
<section>
<h3>Combining Extensions</h3>
<table>
<tr><td style="text-align: right;">Visual:</td><td><img src="figs/SortedArray.svg" style="margin: 0px;"/></td></tr>
<tr><td style="text-align: right;">UIL:</td><td>$Sorted_{N : \mathbb N}(x_1: \mathbb R, \ldots, x_N: \mathbb R)$</td></tr>
<tr><td style="text-align: right;">Logical:</td><td>$\{ x_1, \ldots, x_N \}$</td></tr>
<tr><td style="text-align: right;">Constraint:</td><td>$\forall i \lt j: x_i.key \leq x_j.key$</td></tr>
</table>
</section>
<section>
<img src="figs/Legend.png">
</section>
<section>
<h3>Example</h3>
<svg width="300" height="300">
<image x="0" y="0"
width="300" height="300"
xlink:href="figs/Hybrid.svg"/>
<g>
<polygon points="0,300 150,300 150,218 240,218 240,300 300,300 300,0 0,0" style="fill:white; stroke-width:0" class="fragment fade-out" data-fragment-index="1"/>
<polygon points="0,300 150,300 150,218 200,200 235,120 300,0 0,0" style="fill:white; stroke-width:0" class="fragment fade-out" data-fragment-index="2"/>
<polygon points="0,170 80,170 125,100 150,70 100,0 0,0" style="fill:white; stroke-width:0" class="fragment fade-out" data-fragment-index="3"/>
</g>
</svg>
<p style="font-size: smaller;">
<span class="fragment" data-fragment-index="3">$\uplus(Sng(1), $</span>
<span class="fragment" data-fragment-index="2">$\uplus(Array_2(2,4,7), $</span>
<span class="fragment" data-fragment-index="1">$BT_6($</span>
$Sorted_2(3, 5)$
<span class="fragment" data-fragment-index="1">$, Sng(6))$</span>
<span class="fragment" data-fragment-index="2">$)$</span>
<span class="fragment" data-fragment-index="3">$)$</span>
</p>
</section>
<!-- A Universal Data Structure Instance description language
- Initial goal: Describing an instance of a data structure at one specific point in time.
- Definitions:
- Logical vs Physical state (set of records vs the way that those records are laid out on disk)
- Approach: Commonalities between different data structures... standard patterns
- Records
- Pointers (to a address of a physical entity that describes a collection of records)
- Semantics (explicit or implicit properties of the physical layout that can improve queries over the ds)
- Specifics: Specific building blocks
- [X] U [Y]
- Record
- Generalizations:
- Repetition (requires array notation)
- Semantic Extensions (Binary Tree, Hash Table, Sorted Array)
- Language (CFG for data structure instances)
- Examples (borrow from paper?)
-->
</section>
<section>
<section>
<h3>Incremental Structure Transitions</h3>
<ol>
<li style="color: grey;">A Universal Instance Language</li>
<li style="color: black;">Realizing Universal Data Structures</li>
<li style="color: grey;">Just-In-Time Data Structure Optimization</li>
<li style="color: grey;">Optimization Policy Discovery</li>
</ol>
</section>
<section>
<h3>Universal Data Structures</h3>
<ul>
<li>Physiological Morphisms<ul>
<li>Queries</li>
<li>Updates</li>
</ul></li>
<li>Purely Physical Morphisms<ul>
<li>Optimization</li>
</ul></li>
</ul>
</section>
<section>
<h3 style="margin-bottom: 60px">Example: Range Queries</h3>
$Q_{\ell,h} : \mathbb P \mapsto \mathbb P$
<p>Return tuples in $[\ell,h)$</p>
</section>
<section style="font-size: smaller">
\begin{align}
Q_{\ell,h}(\uplus(a, b)) \rightarrow &\;\uplus(Q_{\ell,h}(a), Q_{\ell,h}(b))\\[10px]
Q_{\ell,h}(BT_k(a, b)) \rightarrow &\;
\begin{cases}
Q_{\ell,h}(a) & \text{if } h \lt k\\
Q_{\ell,h}(b) & \text{if } \ell \geq k\\
BT_k(Q_{\ell,h}(a), Q_{\ell,h}(b)) & \text{otherwise}
\end{cases}\\[10px]
Q_{\ell,h}(Array_N(x_1,\ldots,x_N)) \rightarrow &\;
Array_{|Y|}(y_1, \ldots, y_{|Y|}) \\&\;\;\text{ s.t. } Y = \{\;x_i\;|\;\ell \leq x_i \lt h\;\}\\[10px]
Q_{\ell,h}(Sorted_N(x_1,\ldots,x_N)) \rightarrow &\;
Sorted_{j-i+1}(x_i, \ldots, x_j) \\&\;\;\text{ s.t. } i = argmin_i(x_i \geq \ell); \\&\;\;\;\;\;\;\;\;j = argmax_(x_j \lt h);
\end{align}
</section>
<section>
<h3 style="margin-bottom: 60px">Insert</h3>
$$Insert_{\mathbb P}: \mathbb P \rightarrow \mathbb P$$
<p style="font-weight: bold; margin-top: 60px" class="fragment">Do the least work possible (optimize later)</p>
<div class="fragment" style="margin-top: 60px">
$$Insert_{a}(old) \rightarrow \uplus(old, a)$$
</div>
</section>
<!-- Accessing Static Data Structures
- Queries as morphisms on the language
- Lookups
- Scans
- Enumeration via "Pop"
- Implementing "Pop lowest"
-->
<!-- Dynamic Data
- Definition: Physical vs logical state transitions: Dynamic data is a physiological transition.
- Do the minimum work possible: Linked List
-->
</section>
<section>
<section>
<h3>Incremental Structure Transitions</h3>
<ol>
<li style="color: grey;">A Universal Instance Language</li>
<li style="color: grey;">Realizing Universal Data Structures</li>
<li style="color: black;">Just-In-Time Data Structure Optimization</li>
<li style="color: grey;">Optimization Policy Discovery</li>
</ol>
</section>
<section>
<p><b>Core Idea:</b> Physical layout as a compiler optimization problem. </p>
</section>
<section>
<h3>Example: Organize A Hybrid Data Structure</h3>
<img src="figs/Pushdown-Before.svg" style="vertical-align: middle;"/>
<span class="fragment" data-fragment-index="1"><img src="figs/Pushdown-Step1.svg" style="vertical-align: middle;" /></span>
<span class="fragment" data-fragment-index="3"><img src="figs/Pushdown-Step2.svg" style="vertical-align: middle;" /></span>
<div class="fragment" data-fragment-index="2" style="font-size: smaller; margin-top: 30px">
$$\uplus(Sng(x), BT_k(a, b)) \rightarrow \begin{cases} BT_k(\uplus(Sng(x), a), b) & \text{if } x.key \lt k\\ BT_k(a, \uplus(Sng(x), b)) & \text{if } x.key \geq k\end{cases}$$
</div>
<div class="fragment" data-fragment-index="3" style="font-size: smaller; margin-top: 30px">
$$\uplus(Sng(x), Sorted_N(y_1, \ldots, y_N) \rightarrow Sorted_N(y_1, \ldots, y_i, x, y_{i+1}, \ldots y_N)$$
$$\text{ where }y_i.key \leq x.key \leq y_{i+1}.key$$
</div>
</section>
<section>
<h3>Rewrites</h3>
<p>A pattern/replacement pair.</p>
<ul>
<li>Crack-Array</li>
<li>Sort-Array</li>
<li>Sort-Merge</li>
<li>Pushdown-Array</li>
<li>Pushdown-BT</li>
<li>Pushdown-Sorted</li>
<li>...</li>
</ul>
</section>
<section>
<h3>Events</h3>
<p>A trigger for applying a rewrite.</p>
<ul>
<li>Before-Scan</li>
<li>After-Scan</li>
<li>Before-Visit</li>
<li>After-Visit</li>
<li>Before-Insert</li>
<li>After-Insert</li>
<li>Idle-Tick</li>
</ul>
</section>
<section>
<h3 style="margin-bottom:50px">Policies (Take 1)</h3>
<p>A set of Rewrite/Event pairs.</p>
<div style="margin-top:50px; margin-bottom: 50px;">
<ul>
<li>Cracker (Implements [Idreos et.al.-CIDR 2007])</li>
<li>Adaptive Merge (Implements [Graefe/Kano-EDBT 2010])</li>
<li>Swap (Heuristic Hybrid: Switch after 2000 events)</li>
<li>Transition (Heuristic Hybrid: Gradient from 1-3k events)</li>
</ul>
</div>
<attribution><a href="https://odin.cse.buffalo.edu/papers/2015/CIDR-jitd-final.pdf">[Kennedy/Ziarek-CIDR 2015]</a>; <a href="https://github.com/UBOdin/jitd">https://github.com/UBOdin/jitd</a></attribution>
</section>
<section>
<h3>The Entire Transition Policy</h3>
<pre style="width: 50%"><code class="java" style="font-size: 40%; line-height: 120%;">
package jitd;
import java.util.*;
public class TransitionMode extends Mode {
int stepsTotal;
int stepsTaken = 0;
Random rand = new Random();
Mode source, target;
public TransitionMode(Mode source, Mode target, int steps)
{
this.stepsTotal = steps;
this.source = source;
this.target = target;
}
public Mode pick()
{
stepsTaken++;
if(rand.nextInt(stepsTotal) < stepsTaken){
return target;
} else {
return source;
}
}
public KeyValueIterator scan(Driver driver, long low, long high)
{
return pick().scan(driver, low, high);
}
public void insert(Driver driver, Cog values)
{
pick().insert(driver, values);
}
public void idle(Driver driver)
{
pick().idle(driver);
}
}
</code></pre>
<p>(40 lines of java)</p>
</section>
<section>
<h3>Cracker Policy</h3>
<img src="results/1g_cracker_1write.png" height="400px" />
<p class="fragment">(incrementally improving performance)</p>
</section>
<section>
<h3>Adaptive Merge Policy</h3>
<img src="results/1g_merge_1write.png" height="400px" />
<p class="fragment">(first read: 33s; bimodal: merge vs already merged)</p>
</section>
<section>
<h3>Swap Policy</h3>
<img src="results/1g_swap_1write.png" height="400px" />
<p class="fragment">(can arbitrarilly switch to a different policy)</p>
</section>
<section>
<h3>Transition Policy</h3>
<img src="results/1g_transition_1write.png" height="400px" />
<p class="fragment">(can have two policies running simultaneously in parallel)</p>
</section>
<section>
<p>Universal data structures allow us to <br/> hybridize policies "for free".</p>
</section>
<!-- Optimizing Static Data Structures
- Now we're talking about purely physical state transitions
- Pattern + Replacement Language
- Events
- Examples
- Cracker Index
- Splay Tree
- Lazy BTree
-->
</section>
<section>
<section>
<h3>Policies (Take 2)</h3>
<p style="margin-top: 80px;"><b>Core Idea:</b> Physical layout as a <i>just-in-time</i> compiler optimization problem.</p>
</section>
<section>
<h3 style="margin-bottom: 60px">Just-in-Time Data Structures</h3>
<img src="graphics/Interface.png" height="200px" style="float: right; padding-top: 20px;">
<p style="width: 460px;">A background thread incrementally optimizes the data structure.</p>
<p style="width: 460px;">Continuous availability while performance improves.</p>
</section>
<section>
<h3>Optimizer Work Loop</h3>
<ol>
<li class="fragment" style="margin-top: 30px">Which rewrite to apply?</li>
<li class="fragment" style="margin-top: 30px">On what to apply it?</li>
</ol>
<p class="fragment" style="margin-top: 50px">A priority queue keeps track of available rewrite patterns</p>
</section>
<section>
<h3>Example: A Load-Time Availabile Index</h3>
<p><b>Input:</b> An Unsorted Array</p>
<dl>
<dt style="margin-top: 30px">Crack-in-Two (a.k.a. Radix-Partition)</dt>
<dd>Fast ($O(N)$), but only small improvement</dd>
<dd class="fragment" style="padding-left: 30px">... but can be recursively improved</dd>
<dt style="margin-top: 30px">Sort</dt>
<dd>Slow ($O(N\cdot \log(N))$), but big improvement</dd>
</dl>
</section>
<section>
<h3>Crack</h3>
\begin{align}
Array_N(x_1, \ldots, x_N) \rightarrow BT_{x_j.key}(\;\;&Array_{|Y|}(y_1, \ldots, y_{|Y|}), \\&Array_{|Z|}(z_1, \ldots, z_{|Z|})\;\;)
\end{align}
<p style="font-size: smaller">
where $j \in [1, N]$, $Y = \{x_i | x_i.key \lt x_j\}$, $Z = \{x_i | x_i.key \geq x_j\}$
</p>
<h3 style="margin-top: 60px">Sort</h3>
$$Array_N(x_1, \ldots, x_N) \rightarrow Sorted_N(x_{f(1)}, \ldots, x_{f(N)})$$
<p style="font-size: smaller">
where $f : [N] \rightarrow [N]$ and $x_{f(i)} \leq x_{f(i+1)}$
</p>
</section>
<section>
<h3>Crack</h3>
<p><b>Deqeue: </b> 1x Array</p>
<p><b>Enqueue: </b> 2x Array</p>
<h3 style="margin-top: 60px">Sort</h3>
<p><b>Deqeue: </b> 1x Array</p>
<p><b>Enqueue: </b> 1x Sorted Array</p>
</section>
<section>
<img src="figs/Split-Step1.svg">
<p class="fragment grow"><b>Option 1:</b> Crack($Array_8(1 \ldots 8)$)</p>
<p><b>Option 2:</b> Sort($Array_8(1 \ldots 8)$)</p>
</section>
<section>
<img src="figs/Split-Step2.svg">
<p><b>Option 1:</b> Crack($Array_4(1 \ldots 4)$)</p>
<p><b>Option 2:</b> Sort($Array_4(1 \ldots 4)$)</p>
<p><b>Option 3:</b> Crack($Array_4(5 \ldots 8)$)</p>
<p class="fragment grow"><b>Option 4:</b> Sort($Array_4(5 \ldots 8)$)</p>
</section>
<section>
<img src="figs/Split-Step3.svg">
<p class="fragment grow"><b>Option 1:</b> Crack($Array_4(1 \ldots 4)$)</p>
<p><b>Option 2:</b> Sort($Array_4(1 \ldots 4)$)</p>
</section>
</section>
<section>
<section>
<h3>Incremental Structure Transitions</h3>
<ol>
<li style="color: grey;">A Universal Instance Language</li>
<li style="color: grey;">Realizing Universal Data Structures</li>
<li style="color: grey;">Just-In-Time Data Structure Optimization</li>
<li style="color: black;">Optimization Policy Discovery</li>
</ol>
</section>
<section>
<p>How to prioritize rewrites?</p>
</section>
<section>
<h3>Cost Model</h3>
<table>
<tr>
<td style="padding-top: 60px; text-align: right; font-weight: bold;">$Array_N$:</td>
<td style="text-align: left">$(300 \cdot N)$ ns to scan for 1 record</td></tr>
<tr>
<td style="padding-top: 60px; text-align: right; font-weight: bold;">$Sorted_N$:</td>
<td style="text-align: left">$(175 \cdot \log N)$ ns to scan for 1 record</td></tr>
<tr>
<td style="padding-top: 60px; text-align: right; font-weight: bold;">$BT$:</td>
<td style="text-align: left">Negligible</td></tr>
</table>
<p style="margin-top: 60px; font-weight: bold" class="fragment">Measure, then compute expected utility of static states.</p>
</section>
<section>
<h3>Utility</h3>
<ol>
<li style="margin-top: 40px">Throughput</li>
<li style="margin-top: 40px">(Negative) Latency</li>
<li style="margin-top: 40px">Time spent with latency below 300ms</li>
</ol>
</section>
<section>
<h3>Heuristic: Sort Below Threshold Size</h3>
<img src="results/Predictions.png" height="500px">
<!-- <p class="fragment">Short-term value vs long-term performance.</p> -->
</section>
<section>
<h3>Deriving Policies</h3>
<ol>
<li style="margin-top: 40px">Start with a heuristic and optimize parameters.<ul style="margin-top: 0px">
<li>e.g., Pick a threshold to sort at.</li></ul>
</li>
<li style="margin-top: 40px">Model the expected cumulative utility of each candidate rewrite<ul style="margin-top: 0px">
<li>e.g., Priority queue of Array nodes remaining.</li></ul>
</li>
</ol>
</section>
<!-- Policy Discovery and Optimization
- Purely Heuristic Design: Cracker Index / Transition Policy / Splay Trees
- Assisted Discovery: Parameterized Search Space + Cost Model
- Autonomous Discovery:
- Simulation-Based
- (Likely Expensive) Chase-style
-->
</section>
<section>
<h3>Just-in-Time Data Structures</h3>
<ul>
<li style="margin-top: 40px">The Universal Instance Language can describe the intermediate state of a data structure in transition.</li>
<li style="margin-top: 40px">UIL + localized rewrite rules can emulate the behaviors of existing data structures and be hybridized.</li>
<li style="margin-top: 40px">Simulation + Cost-Analysis can be used to derive policies to drive direct rewrites.</li>
</ul>
<p class="fragment">Questions?</p>
</section>
</div></div>
<script src="../reveal.js-3.5.0/lib/js/head.min.js"></script>
<script src="../reveal.js-3.5.0/js/reveal.js"></script>
<script>
// Full list of configuration options available at:
// https://github.com/hakimel/../reveal.js#configuration
Reveal.initialize({
controls: false,
progress: true,
history: true,
center: true,
slideNumber: true,
transition: 'fade', // none/fade/slide/convex/concave/zoom
// Optional ../reveal.js plugins
dependencies: [
{ src: '../reveal.js-3.5.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
{ src: '../reveal.js-3.5.0/plugin/math/math.js',
condition: function() { return true; },
mathjax: '../reveal.js-3.5.0/js/MathJax.js'
},
{ src: '../reveal.js-3.5.0/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../reveal.js-3.5.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../reveal.js-3.5.0/plugin/highlight/highlight.js', async: true, condition: function() { return !!document.querySelector( 'tt code' ); }, callback: function() { hljs.initHighlightingOnLoad(); } },
{ src: '../reveal.js-3.5.0/plugin/zoom-js/zoom.js', async: true },
{ src: '../reveal.js-3.5.0/plugin/notes/notes.js', async: true }
]
});
</script>
</body>
</html>