366 lines
12 KiB
HTML
366 lines
12 KiB
HTML
<!doctype html>
|
||
<html lang="en">
|
||
|
||
<head>
|
||
<meta charset="utf-8">
|
||
|
||
<title>CSE 4/562 - Spring 2018</title>
|
||
|
||
<meta name="description" content="CSE 4/562 - Spring 2018">
|
||
<meta name="author" content="Oliver Kennedy">
|
||
|
||
<meta name="apple-mobile-web-app-capable" content="yes" />
|
||
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
|
||
|
||
<link rel="stylesheet" href="../reveal.js-3.6.0/css/reveal.css">
|
||
<link rel="stylesheet" href="ubodin.css" id="theme">
|
||
|
||
<!-- Code syntax highlighting -->
|
||
<link rel="stylesheet" href="../reveal.js-3.6.0/lib/css/zenburn.css">
|
||
|
||
<!-- Printing and PDF exports -->
|
||
<script>
|
||
var link = document.createElement( 'link' );
|
||
link.rel = 'stylesheet';
|
||
link.type = 'text/css';
|
||
link.href = window.location.search.match( /print-pdf/gi ) ? '../reveal.js-3.6.0/css/print/pdf.css' : '../reveal.js-3.6.0/css/print/paper.css';
|
||
document.getElementsByTagName( 'head' )[0].appendChild( link );
|
||
</script>
|
||
|
||
<script src="../reveal.js-3.6.0/lib/js/head.min.js"></script>
|
||
|
||
<!--[if lt IE 9]>
|
||
<script src="../reveal.js-3.6.0/lib/js/html5shiv.js"></script>
|
||
<![endif]-->
|
||
</head>
|
||
|
||
<body>
|
||
|
||
<div class="reveal">
|
||
<!-- Any section element inside of this container is displayed as a slide -->
|
||
|
||
<div class="header">
|
||
<!-- Any Talk-Specific Header Content Goes Here -->
|
||
CSE 4/562 - Database Systems
|
||
</div>
|
||
|
||
<div class="slides">
|
||
|
||
<!-- 2018 by OK:
|
||
Indexing really belongs *before* we discuss join algorithms.
|
||
Not only is indexing fundamental to several joins (1-pass, 2-pass, INLJ), but it
|
||
organizes the discussion of algorithms in a more contiguous block.
|
||
-->
|
||
|
||
<section>
|
||
<h1>Indexes</h1>
|
||
<h3>CSE 4/562 – Database Systems</h3>
|
||
<h5>February 19, 2018</h5>
|
||
</section>
|
||
|
||
<section>
|
||
<section>
|
||
<table>
|
||
<tr>
|
||
<td>
|
||
<img src="graphics/Books/DBSystemsHardcover.jpg" height="200px">
|
||
</td>
|
||
<td>
|
||
<img src="graphics/Books/DBSystemsSoftcover.jpg" height="200px">
|
||
</td>
|
||
</tr>
|
||
<tr class="fragment">
|
||
<td>$150</td>
|
||
<td>$50</td>
|
||
</tr>
|
||
<tr class="fragment">
|
||
<td>Index<br/>ToC</td>
|
||
<td>No Index<br/>ToC Summary</td>
|
||
</tr>
|
||
</table>
|
||
</section>
|
||
|
||
<section>
|
||
<h3>Today's Focus</h3>
|
||
|
||
<p style="margin: 100px;">
|
||
$\sigma_C(R)$ <span style="margin: 50px">and</span> $(\ldots \bowtie_C R)$
|
||
</p>
|
||
<p class="fragment" style="font-size: 70%">(Finding records in a table <span class="fragment">really fast</span>)</p>
|
||
|
||
|
||
</section>
|
||
|
||
<section>
|
||
<h3>Indexing Strategies</h3>
|
||
|
||
<dl>
|
||
<div class="fragment">
|
||
<dt>Rearrange the data.</dt>
|
||
<dd>Put things in a predictable location or a specific order.</dd>
|
||
<dd class="fragment">("clustering" the data)</dd>
|
||
</div>
|
||
|
||
<div class="fragment">
|
||
<dt>Wrap the data.</dt>
|
||
<dd>Record where specific data values live</dd>
|
||
<dd class="fragment">("indexing" the data).</dd>
|
||
</div>
|
||
</dl>
|
||
</section>
|
||
|
||
<section>
|
||
<h3>Data Organization</h3>
|
||
|
||
<dl>
|
||
<div class="fragment">
|
||
<dt>Unordered Heap</dt>
|
||
<dd>No organization at all. $O(N)$ reads.</dd>
|
||
</div>
|
||
|
||
<div class="fragment">
|
||
<dt>(Secondary) Index</dt>
|
||
<dd>Index structure over unorganized data. $O(\ll N)$ <b>random</b> reads for <b>some</b> queries.</dd>
|
||
</div>
|
||
|
||
<div class="fragment">
|
||
<dt>Clustered (Primary) Index</dt>
|
||
<dd>Index structure over clustered data. $O(\ll N)$ <b>sequential</b> reads for <b>some</b> queries.</dd>
|
||
</div>
|
||
</dl>
|
||
</section>
|
||
|
||
<section>
|
||
<h3>Data Organization</h3>
|
||
<img src="graphics/2018-02-19-Index-Types.svg" />
|
||
</section>
|
||
|
||
<section>
|
||
<h3>Data Organization</h3>
|
||
<img src="graphics/2018-02-19-PrimaryVsSecondary.png" />
|
||
</section>
|
||
|
||
<section>
|
||
<h3>Index Types</h3>
|
||
<dl>
|
||
<dt>Tree-Based</dt>
|
||
<dd>A hierarchy of decisions lead to data at the leaves.</dd>
|
||
|
||
<div class="fragment highlight-grey" data-fragment-index=2>
|
||
<dt>Hash-Based</dt>
|
||
<dd>A hash function puts data in predictable locations.</dd>
|
||
|
||
<dt>CDF-Based <span style="color: red" class="fragment" data-fragment-index=1>(new)</span></dt>
|
||
<dd>A more complex function predicts where data lives.</dd>
|
||
</div>
|
||
</dl>
|
||
</section>
|
||
</section>
|
||
|
||
<section>
|
||
<section>
|
||
<h3>Tree-Based Indexes</h3>
|
||
|
||
<svg data-src="graphics/2018-02-19-Tree-BinSearch.svg"/>
|
||
</section>
|
||
|
||
<section>
|
||
<h3>Tree-Based Indexes</h3>
|
||
|
||
<svg data-src="graphics/2018-02-19-Tree-Motivation.svg"/>
|
||
</section>
|
||
|
||
<section>
|
||
<h3>Challenges</h3>
|
||
|
||
<dl>
|
||
<dt>Balance</dt>
|
||
<dd>Bad question orders lead to poor performance!</dd>
|
||
|
||
<dt>IO</dt>
|
||
<dd>Each access to a binary tree node is a random access.</dd>
|
||
|
||
<dt class="fragment highlight-grey" data-fragment-index="1">Which Dimension</dt>
|
||
<dd class="fragment highlight-grey" data-fragment-index="1">Why limit ourselves to asking about one dimension?</dd>
|
||
</dl>
|
||
</section>
|
||
|
||
<section>
|
||
<svg data-src="graphics/2018-02-19-Tree-Unbalanced.svg" class="stretch"/>
|
||
</section>
|
||
|
||
<section>
|
||
<h3>Worst-Case Tree?</h3>
|
||
<div class="fragment" style="margin: 100px">$O(N)$ with the tree laid out left/right-deep</div>
|
||
<h3 class="fragment">Best-Case Tree?</h3>
|
||
<div class="fragment" style="margin: 100px">$O(\log N)$ with the tree perfectly balanced</div>
|
||
</section>
|
||
</section>
|
||
|
||
<section>
|
||
<section>
|
||
<h3>Binary Trees are Bad for IO</h3>
|
||
<p style="margin-top: 100px;">Every step of binary search is a random access</p>
|
||
<p style="margin-top: 100px;" class="fragment">Every tree node access is a random access</p>
|
||
</section>
|
||
|
||
<section>
|
||
<p>Random access IO is bad.</p>
|
||
</section>
|
||
|
||
<section>
|
||
<p><b>Idea: </b> Load a bunch of binary tree nodes together.</p>
|
||
</section>
|
||
|
||
<section>
|
||
<p><b>Binary Tree: </b> $1$ separator & $2$ pointers</p>
|
||
<p>$log_2(N)$ Deep</p>
|
||
<p class="fragment" data-fragment-index="1" style="margin-top: 50px;"><b>$K$-ary Tree: </b> $(K-1)$ separators & $K$ pointers</p>
|
||
<p class="fragment" data-fragment-index="1">$log_K(N)$ Deep</p>
|
||
</section>
|
||
|
||
<section>
|
||
<p><b>Important:</b> You still need to do binary search on each node of a $K$-ary tree, but now you're doing random access on memory (or cache) instead of disk (or memory)</p>
|
||
</section>
|
||
|
||
<section>
|
||
<h3>ISAM Trees</h3>
|
||
<img src="graphics/2018-02-19-ISAM.png" height="500px">
|
||
</section>
|
||
</section>
|
||
|
||
<section>
|
||
<section>
|
||
<p>How do you handle updates?</p>
|
||
<p style="margin-top: 50px;" class="fragment">B+Tree = ISAM + Updates</p>
|
||
</section>
|
||
|
||
<section>
|
||
<h3>Challenges</h3>
|
||
|
||
<ul>
|
||
<li class="fragment">Finding space for new records</li>
|
||
<li class="fragment">Keeping the tree balanced as new records are added</li>
|
||
</ul>
|
||
</section>
|
||
|
||
<section>
|
||
<p><b>Idea 1:</b> Reserve space for new records</p>
|
||
</section>
|
||
|
||
<section>
|
||
<svg data-src="graphics/2018-02-19-BTree-Reserved.svg" />
|
||
</section>
|
||
|
||
<section>
|
||
<p>Just maintaining open space won't work forever...</p>
|
||
</section>
|
||
|
||
<section>
|
||
<h3>Rules of B+Trees</h3>
|
||
|
||
<dl>
|
||
<dt>Keep space open for insertions in inner/data nodes.</dt>
|
||
<dd>‘Split’ nodes when they’re full</dd>
|
||
|
||
<dt>Avoid under-using space</dt>
|
||
<dd>‘Merge’ nodes when they’re under-filled</dd>
|
||
</dl>
|
||
|
||
<p class="fragment"><b>Maintain Invariant:</b> All Nodes ≥ 50% Full</p>
|
||
<p class="fragment">(Exception: The Root)</p>
|
||
</section>
|
||
</section>
|
||
|
||
<section>
|
||
<section><img src="graphics/2018-02-19-InsertExample-1.png" height="300px"/></section>
|
||
<section><img src="graphics/2018-02-19-InsertExample-2.png" height="300px"/></section>
|
||
<section><img src="graphics/2018-02-19-InsertExample-3.png" height="300px"/></section>
|
||
<section><img src="graphics/2018-02-19-InsertExample-4.png" height="300px"/></section>
|
||
<section><img src="graphics/2018-02-19-InsertExample-5.png" height="300px"/></section>
|
||
<section><img src="graphics/2018-02-19-InsertExample-6.png" height="300px"/></section>
|
||
<section><img src="graphics/2018-02-19-InsertExample-7.png" height="300px"/></section>
|
||
<section><img src="graphics/2018-02-19-InsertExample-8.png" height="300px"/></section>
|
||
<section><p>Deletions reverse this process (at 50% fill).</p></section>
|
||
</section>
|
||
|
||
<section>
|
||
<a href="https://www.cs.usfca.edu/~galles/visualization/BPlusTree.html" style="font-size: 200%; font-weight: bold;">Demo</a>
|
||
</section>
|
||
|
||
<section>
|
||
<p><b>Next Class</b>: Hash- and CDF-Based Indexes</p>
|
||
</section>
|
||
|
||
</div></div>
|
||
|
||
<script src="../reveal.js-3.6.0/js/reveal.js"></script>
|
||
|
||
<script>
|
||
|
||
// Full list of configuration options available at:
|
||
// https://github.com/hakimel/../reveal.js#configuration
|
||
Reveal.initialize({
|
||
controls: true,
|
||
progress: true,
|
||
history: true,
|
||
center: true,
|
||
slideNumber: true,
|
||
|
||
transition: 'fade', // none/fade/slide/convex/concave/zoom
|
||
|
||
chart: {
|
||
defaults: {
|
||
global: {
|
||
title: { fontColor: "#333", fontSize: 24 },
|
||
legend: {
|
||
labels: { fontColor: "#333", fontSize: 20 },
|
||
},
|
||
responsiveness: true
|
||
},
|
||
scale: {
|
||
scaleLabel: { fontColor: "#333", fontSize: 20 },
|
||
gridLines: { color: "#333", zeroLineColor: "#333" },
|
||
ticks: { fontColor: "#333", fontSize: 16 },
|
||
}
|
||
},
|
||
line: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ], "borderDash": [ [5,10], [0,0] ]},
|
||
bar: { backgroundColor: [
|
||
"rgba(220,220,220,0.8)",
|
||
"rgba(151,187,205,0.8)",
|
||
"rgba(205,151,187,0.8)",
|
||
"rgba(187,205,151,0.8)"
|
||
]
|
||
},
|
||
pie: { backgroundColor: [ ["rgba(0,0,0,.8)" , "rgba(220,20,20,.8)", "rgba(20,220,20,.8)", "rgba(220,220,20,.8)", "rgba(20,20,220,.8)"] ]},
|
||
radar: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ]},
|
||
},
|
||
|
||
// Optional ../reveal.js plugins
|
||
dependencies: [
|
||
{ src: '../reveal.js-3.6.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
|
||
{ src: '../reveal.js-3.6.0/plugin/math/math.js',
|
||
condition: function() { return true; },
|
||
mathjax: '../reveal.js-3.6.0/js/MathJax.js'
|
||
},
|
||
{ src: '../reveal.js-3.6.0/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
|
||
{ src: '../reveal.js-3.6.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
|
||
{ src: '../reveal.js-3.6.0/plugin/highlight/highlight.js', async: true, condition: function() { return !!document.querySelector( 'pre code' ); }, callback: function() { hljs.initHighlightingOnLoad(); } },
|
||
{ src: '../reveal.js-3.6.0/plugin/zoom-js/zoom.js', async: true },
|
||
{ src: '../reveal.js-3.6.0/plugin/notes/notes.js', async: true },
|
||
// Chart.min.js
|
||
{ src: '../reveal.js-3.6.0/plugin/chart/Chart.min.js'},
|
||
// the plugin
|
||
{ src: '../reveal.js-3.6.0/plugin/chart/csv2chart.js'},
|
||
{ src: '../reveal.js-3.6.0/plugin/svginline/es6-promise.auto.js', async: false },
|
||
{ src: '../reveal.js-3.6.0/plugin/svginline/data-src-svg.js', async: false }
|
||
]
|
||
});
|
||
|
||
</script>
|
||
|
||
</body>
|
||
</html>
|