811 lines
29 KiB
HTML
811 lines
29 KiB
HTML
<!doctype html>
|
|
<html lang="en">
|
|
|
|
<head>
|
|
<meta charset="utf-8">
|
|
|
|
<title>Just-in-Time Data Structures</title>
|
|
|
|
<meta name="description" content="Small Data">
|
|
<meta name="author" content="Oliver Kennedy">
|
|
|
|
<meta name="apple-mobile-web-app-capable" content="yes" />
|
|
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
|
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
|
|
|
|
<link rel="stylesheet" href="../reveal.js-3.1.0/css/reveal.css">
|
|
<link rel="stylesheet" href="ubodin.css" id="theme">
|
|
|
|
<!-- Code syntax highlighting -->
|
|
<link rel="stylesheet" href="../reveal.js-3.1.0/lib/css/zenburn.css">
|
|
|
|
|
|
<style>
|
|
table.panelists td.name {
|
|
text-align: right;
|
|
}
|
|
table.panelists td.affiliation {
|
|
font-size: smaller;
|
|
text-decoration: italic;
|
|
text-align: left;
|
|
}
|
|
</style>
|
|
|
|
<!-- Printing and PDF exports -->
|
|
<script>
|
|
var link = document.createElement( 'link' );
|
|
link.rel = 'stylesheet';
|
|
link.type = 'text/css';
|
|
link.href = window.location.search.match( /print-pdf/gi ) ? '../reveal.js-3.1.0/css/print/pdf.css' : '../reveal.js-3.1.0/css/print/paper.css';
|
|
document.getElementsByTagName( 'head' )[0].appendChild( link );
|
|
</script>
|
|
|
|
<!--[if lt IE 9]>
|
|
<script src="../reveal.js-3.1.0/lib/js/html5shiv.js"></script>
|
|
<![endif]-->
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<div class="reveal">
|
|
|
|
<div class="header">
|
|
<!-- Any Talk-Specific Header Content Goes Here -->
|
|
<center>
|
|
<a href="http://www.buffalo.edu" target="_blank">
|
|
<img src="../graphics/logos/ub-1line-ro-white.png" height="20"/>
|
|
</a>
|
|
</center>
|
|
</div>
|
|
<div class="footer">
|
|
<!-- Any Talk-Specific Footer Content Goes Here -->
|
|
<div style="float: left; margin-top: 15px; ">
|
|
Exploring <u><b>O</b></u>nline <u><b>D</b></u>ata <u><b>In</b></u>teractions
|
|
</div>
|
|
<a href="https://odin.cse.buffalo.edu" target="_blank">
|
|
<img src="../graphics/logos/odin-1line-white.png" height="40" style="float: right;"/>
|
|
</a>
|
|
</div>
|
|
|
|
<div class="slides">
|
|
<!-- Any section element inside of this container is displayed as a slide -->
|
|
|
|
<section>
|
|
<!-- Credits... introduce everyone, etc... -->
|
|
<section>
|
|
<h2>Just-in-Time Data Structures</h2>
|
|
<h4>Oliver Kennedy</h4>
|
|
<h4><a href="mailto:okennedy@buffalo.edu">okennedy@buffalo.edu</a></h4>
|
|
</section>
|
|
|
|
<section>
|
|
<table>
|
|
<tr>
|
|
<td width="150px"><img src="graphics/saurav.jpg" height="150px"/></td>
|
|
<td width="150px"><img src="graphics/darshana.jpg" height="150px"/></td>
|
|
<td width="150px"><img src="graphics/hank.jpg" height="150px"/></td>
|
|
<td width="150px"><img src="graphics/ankur.jpg" height="150px"/></td>
|
|
<td width="150px"><img src="graphics/luke.png" height="150px"/></td>
|
|
</tr>
|
|
<tr style="font-size: smaller;">
|
|
<td>Saurav Singhi</td>
|
|
<td>Darshana Balakrishnan<br/</td>
|
|
<td>Hank Lin</td>
|
|
<td>Ankur Upadhyay</td>
|
|
<td>Lukasz Ziarek</td>
|
|
</tr>
|
|
<tr style="font-size: small;">
|
|
<td>(PhD In Progress)</td>
|
|
<td>(MS In Progress)</td>
|
|
<td>(BS 2017)</td>
|
|
<td>(MS 2014)</td>
|
|
<td>(Prof @ UB)</td>
|
|
</tr>
|
|
</table>
|
|
</section>
|
|
</section>
|
|
|
|
<section>
|
|
<!-- Establish the setting:
|
|
- Data Structures are a game of trade-offs.
|
|
- Which structure is best? Well, it depends!
|
|
- Specific data structures lock you in to a specific set of tradeoffs:
|
|
- Read vs Write
|
|
- Batch vs Individual Updates
|
|
- Scan vs Lookup vs Range Queries
|
|
-->
|
|
|
|
<section>
|
|
<img src="graphics/conan.png" />
|
|
|
|
<h3>What is best in life?</h3>
|
|
<p class="fragment">(for organizing your data)</p>
|
|
|
|
<attribution>© Universal Pictures</attribution>
|
|
</section>
|
|
|
|
<section>
|
|
<svg width="280" height="500" style="float: right">
|
|
<image x="55" y="20" width="180" height="180"
|
|
xlink:href="figs/ExampleDSes-BinTree.svg"
|
|
/>
|
|
<image x="20" y="185" width="250" height="170"
|
|
xlink:href="figs/ExampleDSes-LinkedList.svg"
|
|
/>
|
|
<image x="75" y="290" width="145" height="145"
|
|
xlink:href="figs/ExampleDSes-SortedArray.svg"
|
|
/>
|
|
</svg>
|
|
<h3>API</h3>
|
|
<ul>
|
|
<p>Insert $\lt key, value\gt$</p>
|
|
<p>Query for $key \in [low, high)$</p>
|
|
</ul>
|
|
<hr/>
|
|
<h3>Available Structures</h3>
|
|
<p>
|
|
Binary Tree, Linked List, Sorted Array
|
|
</p>
|
|
</section>
|
|
|
|
<section>
|
|
<svg width="600" height="600">
|
|
<image x="0" y="0" width="600" height="600"
|
|
xlink:href="figs/DSTradeoffs.svg"
|
|
/>
|
|
</svg>
|
|
</section>
|
|
|
|
<section>
|
|
<h2>Other Tradeoffs</h2>
|
|
<ul>
|
|
<li>Support for Threads</li>
|
|
<li>Lookup vs Full Scan vs Range Scan</li>
|
|
<li>Optimal Update Size</li>
|
|
</ul>
|
|
</section>
|
|
|
|
<section>
|
|
<svg width="400" height="400" style="float: right">
|
|
<image x="0" y="0" width="400" height="400"
|
|
xlink:href="figs/DSTradeoffs.svg"
|
|
/>
|
|
<g class="fragment" data-fragment-index="2">
|
|
<g class="fragment fade-out" data-fragment-index="4">
|
|
<text x="105" y="97" style="font-weight: bold; fill: red;">[Best]</text>
|
|
</g>
|
|
</g>
|
|
<g class="fragment" data-fragment-index="4">
|
|
<g class="fragment fade-out" data-fragment-index="6">
|
|
<text x="140" y="224" style="font-weight: bold; fill: red;">[Best]</text>
|
|
</g>
|
|
</g>
|
|
<g class="fragment" data-fragment-index="6">
|
|
<g class="fragment fade-out" data-fragment-index="8">
|
|
<text x="285" y="280" style="font-weight: bold; fill: red;">[Best]</text>
|
|
</g>
|
|
</g>
|
|
<g class="fragment" data-fragment-index="8">
|
|
<text x="95" y="97" style="font-weight: bold; fill: red;">[Best?]</text>
|
|
<text x="130" y="224" style="font-weight: bold; fill: red;">[Best?]</text>
|
|
</g>
|
|
</svg>
|
|
<h3>Interactive Analytics</h3>
|
|
<ol style="margin-top: 20px;">
|
|
<li class="fragment" data-fragment-index="1" style="margin-top: 10px;">User Opens CSV File</li>
|
|
<li class="fragment" data-fragment-index="3" style="margin-top: 10px;">User Poses Query as File Loads</li>
|
|
<li class="fragment" data-fragment-index="5" style="margin-top: 10px;">Lots More Queries</li>
|
|
<li class="fragment" data-fragment-index="7" style="margin-top: 10px;">User Adds More Data</li>
|
|
</ol>
|
|
<p class="fragment" data-fragment-index="9" style="clear: right; font-weight: bold;">
|
|
Even in a single session, there may be more than one "optimal" data structure.
|
|
</p>
|
|
</section>
|
|
</section>
|
|
|
|
<section>
|
|
<section>
|
|
<h2>State of the Art</h2>
|
|
<ul>
|
|
<li>Find a jack-of-all-trades data structure</li>
|
|
<li>Trash and re-build structures for different workloads</li>
|
|
<li>Use a bespoke data structure</li>
|
|
</ul>
|
|
<p class="fragment" data-fragment-index="1" style="clear: right; font-weight: bold;">
|
|
What would it take to enable incremental transitions from one set of tradeoffs to another one?
|
|
</p>
|
|
<aside class="notes">
|
|
<p>1. Jack-of-all-trades, master of none. e.g., B+Tree, LSM Tree</p>
|
|
<p>2. a) Expensive, b) Need to be able to predict workload shifts before needed</p>
|
|
<p>3. Mountain of programmer effort</p>
|
|
</aside>
|
|
</section>
|
|
<section>
|
|
<h2>Incremental Transitions</h2>
|
|
<ol>
|
|
<li class="fragment">What does it mean for a data structure to be halfway between a Binary Tree and a Linked List?</li>
|
|
<li class="fragment">How would we access and manipulate such a data structure?</li>
|
|
<li class="fragment">When and how should a data structure transition?</li>
|
|
<li class="fragment">How do we automatically generate bespoke data-structures?</li>
|
|
</ol>
|
|
</section>
|
|
<section>
|
|
<h2>Incremental Transitions</h2>
|
|
<ol>
|
|
<li style="color: black;">A Universal Instance Language</li>
|
|
<li style="color: grey;">Realizing Universal Data Structures</li>
|
|
<li style="color: grey;">Just-In-Time Data Structure Optimization</li>
|
|
<li style="color: grey;">Optimization Policy Discovery</li>
|
|
</ol>
|
|
</section>
|
|
<!-- Get deeper in to the problem:
|
|
- Dynamic workloads: (E.g., Data Loading -> Data access || Regional Differences in Access Patterns)
|
|
- State of the art:
|
|
- Option 1: Trash the old data structure and build a new one (and you're left twiddling your thumbs while you wait)
|
|
- Option 2: Design a data structure *specifically* for your transitional needs (e.g., LSM trees)
|
|
- What would it take to allow a data structure to incrementally transition from one set of tradeoffs to another one?
|
|
- Challenge: We have no way to work with, or even to describe such an "intermediate" data structure in the middle of transitioning
|
|
- Talk outline:
|
|
- A Universal Data Structure Instance description language
|
|
- Accessing and Modifying Static Data Structures
|
|
- Optimizing Static Data Structures
|
|
- Dynamic Data
|
|
- Policy Discovery and Optimization
|
|
-->
|
|
</section>
|
|
|
|
<section>
|
|
<section>
|
|
<p style="text-align: center">Logical Content</p>
|
|
<p style="text-align: center" class="fragment">↑</p>
|
|
<p style="text-align: center">Physical Structure</p>
|
|
</section>
|
|
<section>
|
|
<p style="text-align: center">A <i>Bag</i> of $\lt Key \rightarrow Value \gt$ Pairs</p>
|
|
<p style="text-align: center">↑</p>
|
|
<p style="text-align: center" class="fragment grow">One Physical Realization of the Bag</p>
|
|
</section>
|
|
|
|
<section>
|
|
<b>Core Idea</b>: A grammar of physical realizations.
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Primitives</h3>
|
|
<ul>
|
|
<li>A Key ($\mathbb K$)</li>
|
|
<li>A Record ($\mathbb R$)<br/>Logically a single record</li>
|
|
<li>A Pointer ($\mathbb P$)<br/>Logically a bag of records</li>
|
|
</ul>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Grammar</h3>
|
|
<p style="text-align: left; width: 300px; margin-left: auto; margin-right: auto; margin-top: 50px">
|
|
\begin{align}
|
|
\mathbb P :=\; &|\;Sng(\mathbb R) \\
|
|
&|\uplus(\mathbb P, \mathbb P) \\
|
|
&|\;BT_{\mathbb K}(\mathbb P, \mathbb P) \\
|
|
&|\;Array_N(\mathbb R \ldots \mathbb R) \\
|
|
&|\;Sorted_N(\mathbb R \ldots \mathbb R)
|
|
\end{align}
|
|
</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Singleton</h3>
|
|
<table>
|
|
<tr><td style="text-align: right;">Visual:</td><td><img src="figs/Singleton.svg" style="margin: 0px;"/></td></tr>
|
|
<tr><td style="text-align: right;">UIL:</td><td>$Sng(x: \mathbb R)$</td></tr>
|
|
<tr><td style="text-align: right;">Logical:</td><td>$\{ x \}$</td></tr>
|
|
</table>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Union Node</h3>
|
|
<table>
|
|
<tr><td style="text-align: right;">Visual:</td><td><img src="figs/Union.svg" style="margin: 0px;"/></td></tr>
|
|
<tr><td style="text-align: right;">UIL:</td><td>$\uplus(a: \mathbb P, b: \mathbb P)$</td></tr>
|
|
<tr><td style="text-align: right;">Logical:</td><td>$a \uplus b$</td></tr>
|
|
</table>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Combining Primitives: Linked List</h3>
|
|
<table>
|
|
<tr><td style="text-align: right;">Visual:</td><td><img src="figs/LinkedList.svg" style="margin: 0px;"/></td></tr>
|
|
<tr><td style="text-align: right;">UIL:</td><td>\begin{align}LL :=\;&|\;U(Sng(x: \mathbb R), a: LL)\\&|\;Sng(x)\end{align}</td></tr>
|
|
<tr><td style="text-align: right;">Logical:</td><td>$\{ x \} \uplus a$ or $\{ x \}</td></tr>
|
|
</table>
|
|
<p class="fragment" data-fragment-index="1" style="clear: right; font-weight: bold;">
|
|
Existing data structures can be expressed as syntactic restrictions on this grammar.
|
|
</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Extension 1: Semantic Constraints</h3>
|
|
<table>
|
|
<tr><td style="text-align: right;">Visual:</td><td><img src="figs/BTreeNode.svg" style="margin: 0px;"/></td></tr>
|
|
<tr><td style="text-align: right;">UIL:</td><td>$BT_{k: \mathbb K}(a: \mathbb P, b: \mathbb P)$</td></tr>
|
|
<tr><td style="text-align: right;">Logical:</td><td>$a \uplus b$</td></tr>
|
|
<tr><td style="text-align: right;">Constraint:</td><td>$\forall r \in a: r.key \lt K$<br/>$\forall r \in b: r.key \geq K$</td></tr>
|
|
</table>
|
|
<p class="fragment" data-fragment-index="1" style="clear: right; font-weight: bold;">
|
|
Nodes can define syntactic constraints on the contents of descendents.
|
|
</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3 style="margin-bottom: 60px;">Combining Primitives: Binary Tree</h3>
|
|
\begin{align}
|
|
BinTree :=\;&|\;BT_{k: \mathbb K}(a: BinTree, b: BinTree)\\&|\;Sng(x: \mathbb R)
|
|
\end{align}
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Extension 2: Repetition</h3>
|
|
<table>
|
|
<tr><td style="text-align: right;">Visual:</td><td><img src="figs/Array.svg" style="margin: 0px;"/></td></tr>
|
|
<tr><td style="text-align: right;">UIL:</td><td>$Array_{N : \mathbb N}(x_1: \mathbb R, \ldots, x_N: \mathbb R)$</td></tr>
|
|
<tr><td style="text-align: right;">Logical:</td><td>$\{ x_1, \ldots, x_N \}$</td></tr>
|
|
</table>
|
|
<p class="fragment" data-fragment-index="1" style="clear: right; font-weight: bold;">
|
|
Can repeat structures for efficiency (e.g., B+Tree vs BinTree)
|
|
</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Combining Extensions</h3>
|
|
<table>
|
|
<tr><td style="text-align: right;">Visual:</td><td><img src="figs/SortedArray.svg" style="margin: 0px;"/></td></tr>
|
|
<tr><td style="text-align: right;">UIL:</td><td>$Sorted_{N : \mathbb N}(x_1: \mathbb R, \ldots, x_N: \mathbb R)$</td></tr>
|
|
<tr><td style="text-align: right;">Logical:</td><td>$\{ x_1, \ldots, x_N \}$</td></tr>
|
|
<tr><td style="text-align: right;">Constraint:</td><td>$\forall i \lt j: x_i.key \leq x_j.key$</td></tr>
|
|
</table>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Example</h3>
|
|
<svg width="300" height="300">
|
|
<image x="0" y="0"
|
|
width="300" height="300"
|
|
xlink:href="figs/Hybrid.svg"/>
|
|
<g>
|
|
<polygon points="0,300 150,300 150,218 240,218 240,300 300,300 300,0 0,0" style="fill:white; stroke-width:0" class="fragment fade-out" data-fragment-index="1"/>
|
|
<polygon points="0,300 150,300 150,218 200,200 235,120 300,0 0,0" style="fill:white; stroke-width:0" class="fragment fade-out" data-fragment-index="2"/>
|
|
<polygon points="0,170 80,170 125,100 150,70 100,0 0,0" style="fill:white; stroke-width:0" class="fragment fade-out" data-fragment-index="3"/>
|
|
</g>
|
|
</svg>
|
|
<p style="font-size: smaller;">
|
|
<span class="fragment" data-fragment-index="3">$\uplus(Sng(1), $</span>
|
|
<span class="fragment" data-fragment-index="2">$\uplus(Array_2(2,4,7), $</span>
|
|
<span class="fragment" data-fragment-index="1">$BT_6($</span>
|
|
$Sorted_2(3, 5)$
|
|
<span class="fragment" data-fragment-index="1">$, Sng(6))$</span>
|
|
<span class="fragment" data-fragment-index="2">$)$</span>
|
|
<span class="fragment" data-fragment-index="3">$)$</span>
|
|
</p>
|
|
</section>
|
|
|
|
<!-- A Universal Data Structure Instance description language
|
|
- Initial goal: Describing an instance of a data structure at one specific point in time.
|
|
- Definitions:
|
|
- Logical vs Physical state (set of records vs the way that those records are laid out on disk)
|
|
- Approach: Commonalities between different data structures... standard patterns
|
|
- Records
|
|
- Pointers (to a address of a physical entity that describes a collection of records)
|
|
- Semantics (explicit or implicit properties of the physical layout that can improve queries over the ds)
|
|
- Specifics: Specific building blocks
|
|
- [X] U [Y]
|
|
- Record
|
|
- Generalizations:
|
|
- Repetition (requires array notation)
|
|
- Semantic Extensions (Binary Tree, Hash Table, Sorted Array)
|
|
- Language (CFG for data structure instances)
|
|
- Examples (borrow from paper?)
|
|
-->
|
|
</section>
|
|
|
|
<section>
|
|
<section>
|
|
<h2>Incremental Transitions</h2>
|
|
<ol>
|
|
<li style="color: grey;">A Universal Instance Language</li>
|
|
<li style="color: black;">Realizing Universal Data Structures</li>
|
|
<li style="color: grey;">Just-In-Time Data Structure Optimization</li>
|
|
<li style="color: grey;">Optimization Policy Discovery</li>
|
|
</ol>
|
|
</section>
|
|
<section>
|
|
<h3>Universal Data Structures</h3>
|
|
<ul>
|
|
<li>Physiological Morphisms<ul>
|
|
<li>Queries</li>
|
|
<li>Updates</li>
|
|
</ul></li>
|
|
<li>Purely Physical Morphisms<ul>
|
|
<li>Optimization</li>
|
|
</ul></li>
|
|
</ul>
|
|
</section>
|
|
|
|
<section>
|
|
<h3 style="margin-bottom: 60px">Example: Range Queries</h3>
|
|
$Q_{\ell,h} : \mathbb P \mapsto \mathbb P$
|
|
<p>Return tuples in $[\ell,h)$</p>
|
|
</section>
|
|
|
|
<section style="font-size: smaller">
|
|
\begin{align}
|
|
Q_{\ell,h}(\uplus(a, b)) \rightarrow &\;\uplus(Q_{\ell,h}(a), Q_{\ell,h}(b))\\[10px]
|
|
Q_{\ell,h}(BT_k(a, b)) \rightarrow &\;
|
|
\begin{cases}
|
|
Q_{\ell,h}(a) & \text{if } h \lt k\\
|
|
Q_{\ell,h}(b) & \text{if } \ell \geq k\\
|
|
BT_k(Q_{\ell,h}(a), Q_{\ell,h}(b)) & \text{otherwise}
|
|
\end{cases}\\[10px]
|
|
Q_{\ell,h}(Array_N(x_1,\ldots,x_N)) \rightarrow &\;
|
|
Array_{|Y|}(y_1, \ldots, y_{|Y|}) \\&\;\;\text{ s.t. } Y = \{\;x_i\;|\;\ell \leq x_i \lt h\;\}\\[10px]
|
|
Q_{\ell,h}(Sorted_N(x_1,\ldots,x_N)) \rightarrow &\;
|
|
Sorted_{j-i+1}(x_i, \ldots, x_j) \\&\;\;\text{ s.t. } i = argmin_i(x_i \geq \ell); \\&\;\;\;\;\;\;\;\;j = argmax_(x_j \lt h);
|
|
\end{align}
|
|
</section>
|
|
|
|
<section>
|
|
<h3 style="margin-bottom: 60px">Insert</h3>
|
|
$$Insert: \mathbb P \times \mathbb P \rightarrow \mathbb P$$
|
|
<p style="font-weight: bold; margin-top: 60px" class="fragment">Do the least work possible (optimize later)</p>
|
|
<div class="fragment" style="margin-top: 60px">
|
|
$$Insert(a, b) \rightarrow \uplus(a, b)$$
|
|
</div>
|
|
</section>
|
|
<!-- Accessing Static Data Structures
|
|
- Queries as morphisms on the language
|
|
- Lookups
|
|
- Scans
|
|
- Enumeration via "Pop"
|
|
- Implementing "Pop lowest"
|
|
-->
|
|
<!-- Dynamic Data
|
|
- Definition: Physical vs logical state transitions: Dynamic data is a physiological transition.
|
|
- Do the minimum work possible: Linked List
|
|
-->
|
|
</section>
|
|
|
|
<section>
|
|
<section>
|
|
<h2>Incremental Transitions</h2>
|
|
<ol>
|
|
<li style="color: grey;">A Universal Instance Language</li>
|
|
<li style="color: grey;">Realizing Universal Data Structures</li>
|
|
<li style="color: black;">Just-In-Time Data Structure Optimization</li>
|
|
<li style="color: grey;">Optimization Policy Discovery</li>
|
|
</ol>
|
|
</section>
|
|
|
|
<section>
|
|
<p><b>Core Idea:</b> Physical layout as a compiler optimization problem. </p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Example: Organize A Hybrid Data Structure</h3>
|
|
<img src="figs/Pushdown-Before.svg" style="vertical-align: middle;"/>
|
|
<span class="fragment" data-fragment-index="2">➔<img src="figs/Pushdown-Step1.svg" style="vertical-align: middle;" /></span>
|
|
<span class="fragment" data-fragment-index="3">➔<img src="figs/Pushdown-Step2.svg" style="vertical-align: middle;" /></span>
|
|
<div class="fragment" data-fragment-index="1" style="font-size: smaller; margin-top: 30px">
|
|
$$\uplus(Sng(x), BT_k(a, b)) \rightarrow \begin{cases} BT_k(\uplus(Sng(x), a), b) & \text{if } x.key \lt k\\ BT_k(a, \uplus(Sng(x), b)) & \text{if } x.key \geq k\end{cases}$$
|
|
</div>
|
|
<div class="fragment" data-fragment-index="3" style="font-size: smaller; margin-top: 30px">
|
|
$$\uplus(Sng(x), Sorted_N(y_1, \ldots, y_N) \rightarrow Sorted_N(y_1, \ldots, y_i, x, y_{i+1}, \ldots y_N)$$
|
|
$$\text{ where }y_i.key \leq x.key \leq y_{i+1}.key$$
|
|
</div>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Rewrites</h3>
|
|
|
|
<p>A pattern/replacement pair.</p>
|
|
|
|
<ul>
|
|
<li>Crack-Array</li>
|
|
<li>Sort-Array</li>
|
|
<li>Sort-Merge</li>
|
|
<li>Pushdown-Array</li>
|
|
<li>Pushdown-BT</li>
|
|
<li>Pushdown-Sorted</li>
|
|
<li>...</li>
|
|
</ul>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Events</h3>
|
|
|
|
<p>A trigger for applying a rewrite.</p>
|
|
|
|
<ul>
|
|
<li>Before-Scan</li>
|
|
<li>After-Scan</li>
|
|
<li>Before-Visit</li>
|
|
<li>After-Visit</li>
|
|
<li>Before-Insert</li>
|
|
<li>After-Insert</li>
|
|
<li>Idle-Tick</li>
|
|
</ul>
|
|
</section>
|
|
|
|
<section>
|
|
<h3 style="margin-bottom:50px">Policies (Take 1)</h3>
|
|
<p>A set of Rewrite/Event pairs.</p>
|
|
|
|
<div style="margin-top:50px; margin-bottom: 50px;">
|
|
<ul>
|
|
<li>Cracker (Implements [Idreos et.al.-CIDR 2007])</li>
|
|
<li>Adaptive Merge (Implements [Graefe/Kano-EDBT 2010])</li>
|
|
<li>Swap (Heuristic Hybrid: Switch after 2000 events)</li>
|
|
<li>Transition (Heuristic Hybrid: Gradient from 1-3k events)</li>
|
|
</ul>
|
|
</div>
|
|
|
|
<attribution><a href="https://odin.cse.buffalo.edu/papers/2015/CIDR-jitd-final.pdf">[Kennedy/Ziarek-CIDR 2015]</a>; <a href="https://github.com/UBOdin/jitd">https://github.com/UBOdin/jitd</a></attribution>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>The Entire Transition Policy</h3>
|
|
<pre style="width: 50%"><code class="java" style="font-size: 40%; line-height: 120%;">
|
|
package jitd;
|
|
|
|
import java.util.*;
|
|
|
|
public class TransitionMode extends Mode {
|
|
int stepsTotal;
|
|
int stepsTaken = 0;
|
|
Random rand = new Random();
|
|
Mode source, target;
|
|
|
|
public TransitionMode(Mode source, Mode target, int steps)
|
|
{
|
|
this.stepsTotal = steps;
|
|
this.source = source;
|
|
this.target = target;
|
|
}
|
|
|
|
public Mode pick()
|
|
{
|
|
stepsTaken++;
|
|
if(rand.nextInt(stepsTotal) < stepsTaken){
|
|
return target;
|
|
} else {
|
|
return source;
|
|
}
|
|
}
|
|
|
|
public KeyValueIterator scan(Driver driver, long low, long high)
|
|
{
|
|
return pick().scan(driver, low, high);
|
|
}
|
|
public void insert(Driver driver, Cog values)
|
|
{
|
|
pick().insert(driver, values);
|
|
}
|
|
public void idle(Driver driver)
|
|
{
|
|
pick().idle(driver);
|
|
}
|
|
}
|
|
</code></pre>
|
|
<p>(40 lines of java)</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Cracker Policy</h3>
|
|
<img src="results/1g_cracker_1write.png"/>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Adaptive Merge Policy</h3>
|
|
<img src="results/1g_merge_1write.png"/>
|
|
(first read: 33s)
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Swap Policy</h3>
|
|
<img src="results/1g_swap_1write.png"/>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Transition Policy</h3>
|
|
<img src="results/1g_transition_1write.png"/>
|
|
</section>
|
|
|
|
<section>
|
|
<p>Universal data structures allow us to <br/> hybridize policies "for free".</p>
|
|
</section>
|
|
<!-- Optimizing Static Data Structures
|
|
- Now we're talking about purely physical state transitions
|
|
- Pattern + Replacement Language
|
|
- Events
|
|
- Examples
|
|
- Cracker Index
|
|
- Splay Tree
|
|
- Lazy BTree
|
|
-->
|
|
</section>
|
|
|
|
<section>
|
|
<section>
|
|
<h3>Policies (Take 2)</h3>
|
|
<p style="margin-top: 80px;"><b>Core Idea:</b> Physical layout as a <i>just-in-time</i> compiler optimization problem.</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3 style="margin-bottom: 60px">Just-in-Time Data Structures</h3>
|
|
<img src="graphics/Interface.png" height="200px" style="float: right;">
|
|
<p style="padding-top: 40px; width: 400px;">A background thread incrementally optimizes the data structure.</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Challenges</h3>
|
|
<ol>
|
|
<li class="fragment" style="margin-top: 30px">Which rewrite to apply?</li>
|
|
<li class="fragment" style="margin-top: 30px">Which data to rewrite?</li>
|
|
</ol>
|
|
</section>
|
|
|
|
<section>
|
|
<p>Two simple transforms: Crack or Sort</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Crack</h3>
|
|
\begin{align}
|
|
Array_N(x_1, \ldots, x_N) \rightarrow BT_{x_j.key}(\;\;&Array_{|Y|}(y_1, \ldots, y_{|Y|}), \\&Array_{|Z|}(z_1, \ldots, z_{|Z|})\;\;)
|
|
\end{align}
|
|
<p style="font-size: smaller">
|
|
where $j \in [1, N]$, $Y = \{x_i | x_i.key \lt x_j\}$, $Z = \{x_i | x_i.key \geq x_j\}$
|
|
</p>
|
|
<h3 style="margin-top: 60px">Sort</h3>
|
|
$$Array_N(x_1, \ldots, x_N) \rightarrow Sorted_N(x_{f(1)}, \ldots, x_{f(N)})$$
|
|
<p style="font-size: smaller">
|
|
where $f : [N] \rightarrow [N]$ and $x_{f(i)} \leq x_{f(i+1)}$
|
|
</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Crack</h3>
|
|
<p><b>Deqeue: </b> 1x Array</p>
|
|
<p><b>Enqueue: </b> 2x Array</p>
|
|
|
|
<h3 style="margin-top: 60px">Sort</h3>
|
|
<p><b>Deqeue: </b> 1x Array</p>
|
|
<p><b>Enqueue: </b> 1x Sorted Array</p>
|
|
</section>
|
|
|
|
<section>
|
|
<img src="figs/Split-Step1.svg">
|
|
<p><b>Option 1:</b> Crack($Array_8(1 \ldots 8)$)</p>
|
|
<p><b>Option 2:</b> Sort($Array_8(1 \ldots 8)$)</p>
|
|
</section>
|
|
|
|
<section>
|
|
<img src="figs/Split-Step2.svg">
|
|
<p><b>Option 1:</b> Crack($Array_4(1 \ldots 4)$)</p>
|
|
<p><b>Option 2:</b> Sort($Array_4(1 \ldots 4)$)</p>
|
|
<p><b>Option 3:</b> Crack($Array_4(5 \ldots 8)$)</p>
|
|
<p><b>Option 4:</b> Sort($Array_4(5 \ldots 8)$)</p>
|
|
</section>
|
|
|
|
<section>
|
|
<img src="figs/Split-Step3.svg">
|
|
<p><b>Option 1:</b> Crack($Array_4(1 \ldots 4)$)</p>
|
|
<p><b>Option 2:</b> Sort($Array_4(1 \ldots 4)$)</p>
|
|
</section>
|
|
</section>
|
|
|
|
<section>
|
|
<section>
|
|
<h2>Incremental Transitions</h2>
|
|
<ol>
|
|
<li style="color: grey;">A Universal Instance Language</li>
|
|
<li style="color: grey;">Realizing Universal Data Structures</li>
|
|
<li style="color: grey;">Just-In-Time Data Structure Optimization</li>
|
|
<li style="color: black;">Optimization Policy Discovery</li>
|
|
</ol>
|
|
</section>
|
|
|
|
<section>
|
|
<p>How to prioritize rewrites?</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Cost Model</h3>
|
|
<p><b>Array_N:</b> $O(N)$</p>
|
|
<p><b>Sorted_N:</b> $O(N\cdot \log(N))$</p>
|
|
<p><b>BT:</b> Negligible</p>
|
|
|
|
<p style="margin-top: 60px; font-weight: bold" class="fragment">Compute expected utility of a static state.</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Utility</h3>
|
|
<ol>
|
|
<li>Throughput</li>
|
|
<li>(Negative) Latency</li>
|
|
<li>Time spent with latency below 300ms</li>
|
|
</ol>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Heuristic: Sort Below Threshold Size</h3>
|
|
<img src="results/Predictions.png" height="400px">
|
|
<p class="fragment">Short-term value vs long-term performance.</p>
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Deriving Policies</h3>
|
|
<ol>
|
|
<li>Start with a heuristic and optimzie parameters.<ul>
|
|
<li>e.g., Pick a threshold to sort at.</li></ul>
|
|
</li>
|
|
<li>Model the expected cumulative utility of each candidate rewrite<ul>
|
|
<li>e.g., Priority queue of Array nodes remaining.</li></ul>
|
|
</li>
|
|
</ol>
|
|
</section>
|
|
<!-- Policy Discovery and Optimization
|
|
- Purely Heuristic Design: Cracker Index / Transition Policy / Splay Trees
|
|
- Assisted Discovery: Parameterized Search Space + Cost Model
|
|
- Autonomous Discovery:
|
|
- Simulation-Based
|
|
- (Likely Expensive) Chase-style
|
|
-->
|
|
</section>
|
|
|
|
<section>
|
|
<h3>Just-in-Time Data Structures</h3>
|
|
<ul>
|
|
<li>The Universal Instance Language can describe the intermediate state of a data structure in transition.</li>
|
|
<li>Localized, event-driven rewrites can emulate the behaviors of existing data structures and be hybridized.</li>
|
|
<li>Simulation + Cost-Analysis can be used to derive policies to drive direct rewrites.</li>
|
|
</ul>
|
|
<p class="fragment">Questions?</p>
|
|
</section>
|
|
|
|
</div></div>
|
|
|
|
<script src="../reveal.js-3.1.0/lib/js/head.min.js"></script>
|
|
<script src="../reveal.js-3.1.0/js/reveal.js"></script>
|
|
|
|
<script>
|
|
|
|
// Full list of configuration options available at:
|
|
// https://github.com/hakimel/../reveal.js#configuration
|
|
Reveal.initialize({
|
|
controls: false,
|
|
progress: true,
|
|
history: true,
|
|
center: true,
|
|
slideNumber: true,
|
|
|
|
transition: 'fade', // none/fade/slide/convex/concave/zoom
|
|
|
|
// Optional ../reveal.js plugins
|
|
dependencies: [
|
|
{ src: '../reveal.js-3.1.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
|
|
{ src: '../reveal.js-3.1.0/plugin/math/math.js',
|
|
condition: function() { return true; },
|
|
mathjax: '../reveal.js-3.1.0/js/MathJax.js'
|
|
},
|
|
{ src: '../reveal.js-3.1.0/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
|
|
{ src: '../reveal.js-3.1.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
|
|
{ src: '../reveal.js-3.1.0/plugin/highlight/highlight.js', async: true, condition: function() { return !!document.querySelector( 'pre code' ); }, callback: function() { hljs.initHighlightingOnLoad(); } },
|
|
{ src: '../reveal.js-3.1.0/plugin/zoom-js/zoom.js', async: true },
|
|
{ src: '../reveal.js-3.1.0/plugin/notes/notes.js', async: true }
|
|
]
|
|
});
|
|
|
|
</script>
|
|
|
|
</body>
|
|
</html>
|