341 lines
15 KiB
HTML
341 lines
15 KiB
HTML
<!doctype html>
|
||
<html lang="en">
|
||
|
||
<head>
|
||
<meta charset="utf-8">
|
||
|
||
<title>CSE 4/562 - Spring 2018</title>
|
||
|
||
<meta name="description" content="CSE 4/562 - Spring 2018">
|
||
<meta name="author" content="Oliver Kennedy">
|
||
|
||
<meta name="apple-mobile-web-app-capable" content="yes" />
|
||
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
|
||
|
||
<link rel="stylesheet" href="../reveal.js-3.6.0/css/reveal.css">
|
||
<link rel="stylesheet" href="ubodin.css" id="theme">
|
||
|
||
<!-- Code syntax highlighting -->
|
||
<link rel="stylesheet" href="../reveal.js-3.6.0/lib/css/zenburn.css">
|
||
|
||
<!-- Printing and PDF exports -->
|
||
<script>
|
||
var link = document.createElement( 'link' );
|
||
link.rel = 'stylesheet';
|
||
link.type = 'text/css';
|
||
link.href = window.location.search.match( /print-pdf/gi ) ? '../reveal.js-3.6.0/css/print/pdf.css' : '../reveal.js-3.6.0/css/print/paper.css';
|
||
document.getElementsByTagName( 'head' )[0].appendChild( link );
|
||
</script>
|
||
|
||
<script src="../reveal.js-3.6.0/lib/js/head.min.js"></script>
|
||
|
||
<!--[if lt IE 9]>
|
||
<script src="../reveal.js-3.6.0/lib/js/html5shiv.js"></script>
|
||
<![endif]-->
|
||
</head>
|
||
|
||
<body>
|
||
|
||
<div class="reveal">
|
||
<!-- Any section element inside of this container is displayed as a slide -->
|
||
|
||
<div class="header">
|
||
<!-- Any Talk-Specific Header Content Goes Here -->
|
||
CSE 4/562 - Database Systems
|
||
</div>
|
||
|
||
<div class="slides">
|
||
|
||
<section>
|
||
<h1>Cost Based Optimization</h1>
|
||
<h3>CSE 4/562 – Database Systems</h3>
|
||
<h5>February 28, 2018</h5>
|
||
</section>
|
||
<!-- ============================================ -->
|
||
|
||
<section>
|
||
<section>
|
||
<h3>General Query Optimizers</h3>
|
||
<ol style="font-size: 60%">
|
||
<li>Apply blind heuristics (e.g., push down selections)</li>
|
||
<li>Enumerate all possible <i>execution plans</i> by varying (or for a reasonable subset)
|
||
<ul>
|
||
<li>Join/Union Evaluation Order (commutativity, associativity, distributivity)</li>
|
||
<li>Algorithms for Joins, Aggregates, Sort, Distinct, and others</li>
|
||
<li>Data Access Paths</li>
|
||
</ul>
|
||
</li>
|
||
<li class="fragment highlight-blue">Estimate the cost of each execution plan</li>
|
||
<li>Pick the execution plan with the lowest cost</li>
|
||
</ol>
|
||
</section>
|
||
</section>
|
||
|
||
<section>
|
||
<section>
|
||
<p><b>Idea 1: </b> Run each plan</p>
|
||
</section>
|
||
|
||
<section>
|
||
<img src="graphics/Clipart/facepalm.jpg" class="stretch" />
|
||
<attribution>© Paramount Pictures</attribution>
|
||
</section>
|
||
|
||
<section>
|
||
<p>If we can't get the exact cost of a plan, what can we do?</p>
|
||
</section>
|
||
|
||
<section>
|
||
<p class="fragment highlight-grey"><b>Idea 2: </b> Run each plan on a small sample of the data.</p>
|
||
<p style="margin-top: 50px;"><b>Idea 3: </b> Analytically estimate the cost of a plan.</p>
|
||
</section>
|
||
|
||
<section>
|
||
<h3>Plan Cost</h3>
|
||
<dl>
|
||
<div class="fragment" data-fragment-index="1"><div class="fragment highlight-grey" data-fragment-index="4">
|
||
<dt>CPU Time</dt>
|
||
<dd>How much time is spent processing.</dd>
|
||
</div></div>
|
||
|
||
<div class="fragment" data-fragment-index="2">
|
||
<dt># of IOs</dt>
|
||
<dd>How many random reads + writes go to disk.</dd>
|
||
</div>
|
||
|
||
<div class="fragment" data-fragment-index="3">
|
||
<dt>Memory Required</dt>
|
||
<dd>How much memory do you need.</dd>
|
||
</div>
|
||
</dl>
|
||
</section>
|
||
|
||
<section>
|
||
<img src="graphics/Clipart/estimation.png">
|
||
<attribution>Randal Munroe (<a href="https://creativecommons.org/licenses/by-nc/2.5/">cc-by-nc</a>)</attribution>
|
||
</section>
|
||
|
||
<section>
|
||
<h3>Remember the Real Goals</h3>
|
||
<ol>
|
||
<li class="fragment">Accurately <b>rank</b> the plans.</li>
|
||
<li class="fragment">Don't spend more time optimizing than you get back.</li>
|
||
<li class="fragment">Don't pick a plan that uses more memory than you have.</li>
|
||
</ol>
|
||
</section>
|
||
</section>
|
||
|
||
<!-- ============================================ -->
|
||
|
||
<section>
|
||
<section>
|
||
<h3>Accounting</h3>
|
||
<p class="fragment" style="margin-top: 50px;">Figure out the cost of each <b>individual</b> operator.</p>
|
||
<p class="fragment" style="margin-top: 50px;">Only count the number of IOs <b>added</b> by each operator.</p>
|
||
</section>
|
||
|
||
<section>
|
||
<table style="font-size: 70%">
|
||
<tr><th>Operation</th><th>RA</th><th>IOs Added (#pages)</th><th>Memory (#tuples)</th></tr>
|
||
<tr class="fragment" data-fragment-index="0">
|
||
<td>Table Scan</td>
|
||
<td>$R$</td>
|
||
<td class="fragment" data-fragment-index="1">$\frac{|R|}{\mathcal P}$</td>
|
||
<td class="fragment" data-fragment-index="2">$O(1)$</td>
|
||
</tr>
|
||
<tr class="fragment" data-fragment-index="3">
|
||
<td>Projection</td>
|
||
<td>$\pi(R)$</td>
|
||
<td class="fragment" data-fragment-index="4">$0$</td>
|
||
<td class="fragment" data-fragment-index="4">$O(1)$</td>
|
||
</tr>
|
||
<tr class="fragment" data-fragment-index="5">
|
||
<td>Selection</td>
|
||
<td>$\sigma(R)$</td>
|
||
<td>$0$</td>
|
||
<td>$O(1)$</td>
|
||
</tr>
|
||
<tr class="fragment" data-fragment-index="6">
|
||
<td>Union</td>
|
||
<td>$R \uplus S$</td>
|
||
<td>$0$</td>
|
||
<td>$O(1)$</td>
|
||
</tr>
|
||
<tr class="fragment" data-fragment-index="7">
|
||
<td style="vertical-align: middle;">Sort <span class="fragment" data-fragment-index="8">(In-Mem)</span></td>
|
||
<td style="vertical-align: middle;">$\tau(R)$</td>
|
||
<td class="fragment" data-fragment-index="8">$0$</td>
|
||
<td class="fragment" data-fragment-index="9">$O(|R|)$</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="fragment" data-fragment-index="10">Sort (On-Disk)</td>
|
||
<td class="fragment" data-fragment-index="10">$\tau(R)$</td>
|
||
<td class="fragment" data-fragment-index="11">$\frac{2 \cdot \lfloor log_{\mathcal B}(|R|) \rfloor}{\mathcal P}$</td>
|
||
<td class="fragment" data-fragment-index="10">$O(\mathcal B)$</td>
|
||
</tr>
|
||
<tr class="fragment" data-fragment-index="12">
|
||
<td><span class="fragment" data-fragment-index="13">(B+Tree)</span> Index Scan</td>
|
||
<td>$Index(R, c)$</td>
|
||
<td class="fragment" data-fragment-index="13">$\log_{\mathcal I}(|R|) + \frac{|\sigma_c(R)|}{\mathcal P}$</td>
|
||
<td class="fragment" data-fragment-index="14">$O(1)$</td>
|
||
</tr>
|
||
<tr>
|
||
<td span class="fragment" data-fragment-index="15">(Hash) Index Scan</td>
|
||
<td span class="fragment" data-fragment-index="15">$Index(R, c)$</td>
|
||
<td class="fragment" data-fragment-index="15">$1$</td>
|
||
<td class="fragment" data-fragment-index="16">$O(1)$</td>
|
||
</tr>
|
||
</table>
|
||
|
||
<ol style="font-size: 50%; margin-top: 50px;">
|
||
<li class="fragment" data-fragment-index="1">Tuples per Page ($\mathcal P$) <span>– Normally defined per-schema</span></li>
|
||
<li class="fragment" data-fragment-index="1">Size of $R$ ($|R|$)</li>
|
||
<li class="fragment" data-fragment-index="10">Pages of Buffer ($\mathcal B$)</li>
|
||
<li class="fragment" data-fragment-index="13">Keys per Index Page ($\mathcal I$)</li>
|
||
</ol>
|
||
</section>
|
||
<section>
|
||
<table style="font-size: 70%">
|
||
<tr><th width="300px">Operation</th><th>RA</th><th>IOs Added (#pages)</th><th>Memory (#tuples)</th></tr>
|
||
<tr class="fragment" data-fragment-index="1">
|
||
<td style="font-size: 60%">Nested Loop Join <span class="fragment" data-fragment-index="2">(Buffer $S$ in mem)</span></td>
|
||
<td>$R \times S$</td>
|
||
<td class="fragment" data-fragment-index="2">$0$</td>
|
||
<td class="fragment" data-fragment-index="3">$O(|S|)$</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="fragment" data-fragment-index="4" style="font-size: 60%">Nested Loop Join (Buffer $S$ on disk)</td>
|
||
<td class="fragment" data-fragment-index="4">$R \times_{disk} S$</td>
|
||
<td class="fragment" data-fragment-index="5">$(1+ |R|) \cdot \frac{|S|}{\mathcal P}$</td>
|
||
<td class="fragment" data-fragment-index="4">$O(1)$</td>
|
||
</tr>
|
||
<tr class="fragment" data-fragment-index="6">
|
||
<td>1-Pass Hash Join</td>
|
||
<td>$R \bowtie_{1PH, c} S$</td>
|
||
<td class="fragment" data-fragment-index="7">$0$</td>
|
||
<td class="fragment" data-fragment-index="7">$O(|S|)$</td>
|
||
</tr>
|
||
<tr class="fragment" data-fragment-index="8">
|
||
<td>2-Pass Hash Join</td>
|
||
<td>$R \bowtie_{2PH, c} S$</td>
|
||
<td class="fragment" data-fragment-index="9">$\frac{2|R| + 2|S|}{\mathcal P}$</td>
|
||
<td class="fragment" data-fragment-index="9">$O(1)$</td>
|
||
</tr>
|
||
<tr class="fragment" data-fragment-index="10">
|
||
<td>Sort-Merge Join </td>
|
||
<td>$R \bowtie_{SM, c} S$</td>
|
||
<td class="fragment" data-fragment-index="11">[Sort]</td>
|
||
<td class="fragment" data-fragment-index="11">[Sort]</td>
|
||
</tr>
|
||
<tr class="fragment" data-fragment-index="12">
|
||
<td><span class="fragment" data-fragment-index="13">(Tree)</span> Index NLJ</td>
|
||
<td>$R \bowtie_{INL, c}$</td>
|
||
<td class="fragment" data-fragment-index="13">$|R| \cdot (\log_{\mathcal I}(|S|) + \frac{|\sigma_c(S)|}{\mathcal P})$</td>
|
||
<td class="fragment" data-fragment-index="14">$O(1)$</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="fragment" data-fragment-index="15">(Hash) Index NLJ</td>
|
||
<td class="fragment" data-fragment-index="15">$R \bowtie_{INL, c}$</td>
|
||
<td class="fragment" data-fragment-index="15">$|R| \cdot 1$</td>
|
||
<td class="fragment" data-fragment-index="16">$O(1)$</td>
|
||
</tr>
|
||
<tr class="fragment" data-fragment-index="17">
|
||
<td><span class="fragment" data-fragment-index="18">(In-Mem)</span> Aggregate</td>
|
||
<td>$\gamma_A(R)$</td>
|
||
<td class="fragment" data-fragment-index="18">$0$</td>
|
||
<td class="fragment" data-fragment-index="19">$adom(A)$</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="fragment" data-fragment-index="20" style="font-size: 90%">(Sort/Merge) Aggregate</td>
|
||
<td class="fragment" data-fragment-index="20">$\gamma_A(R)$</td>
|
||
<td class="fragment" data-fragment-index="20">[Sort]</td>
|
||
<td class="fragment" data-fragment-index="20">[Sort]</td>
|
||
</tr>
|
||
</table>
|
||
|
||
<ol style="font-size: 50%;">
|
||
<li>Tuples per Page ($\mathcal P$) <span>– Normally defined per-schema</span></li>
|
||
<li>Size of $R$ ($|R|$)</li>
|
||
<li>Pages of Buffer ($\mathcal B$)</li>
|
||
<li>Keys per Index Page ($\mathcal I$)</li>
|
||
<li class="fragment" data-fragment-index="19">Number of distinct values of $A$ ($adom(A)$)</li>
|
||
</ol>
|
||
</section>
|
||
|
||
<section>
|
||
<p><b>Next Class: </b> How to estimate $|R|$</p>
|
||
</section>
|
||
</section>
|
||
|
||
</div></div>
|
||
|
||
<script src="../reveal.js-3.6.0/js/reveal.js"></script>
|
||
|
||
<script>
|
||
|
||
// Full list of configuration options available at:
|
||
// https://github.com/hakimel/../reveal.js#configuration
|
||
Reveal.initialize({
|
||
controls: true,
|
||
progress: true,
|
||
history: true,
|
||
center: true,
|
||
slideNumber: true,
|
||
|
||
transition: 'fade', // none/fade/slide/convex/concave/zoom
|
||
|
||
chart: {
|
||
defaults: {
|
||
global: {
|
||
title: { fontColor: "#333", fontSize: 24 },
|
||
legend: {
|
||
labels: { fontColor: "#333", fontSize: 20 },
|
||
},
|
||
responsiveness: true
|
||
},
|
||
scale: {
|
||
scaleLabel: { fontColor: "#333", fontSize: 20 },
|
||
gridLines: { color: "#333", zeroLineColor: "#333" },
|
||
ticks: { fontColor: "#333", fontSize: 16 },
|
||
}
|
||
},
|
||
line: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ], "borderDash": [ [5,10], [0,0] ]},
|
||
bar: { backgroundColor: [
|
||
"rgba(220,220,220,0.8)",
|
||
"rgba(151,187,205,0.8)",
|
||
"rgba(205,151,187,0.8)",
|
||
"rgba(187,205,151,0.8)"
|
||
]
|
||
},
|
||
pie: { backgroundColor: [ ["rgba(0,0,0,.8)" , "rgba(220,20,20,.8)", "rgba(20,220,20,.8)", "rgba(220,220,20,.8)", "rgba(20,20,220,.8)"] ]},
|
||
radar: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ]},
|
||
},
|
||
|
||
// Optional ../reveal.js plugins
|
||
dependencies: [
|
||
{ src: '../reveal.js-3.6.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
|
||
{ src: '../reveal.js-3.6.0/plugin/math/math.js',
|
||
condition: function() { return true; },
|
||
mathjax: '../reveal.js-3.6.0/js/MathJax.js'
|
||
},
|
||
{ src: '../reveal.js-3.6.0/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
|
||
{ src: '../reveal.js-3.6.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
|
||
{ src: '../reveal.js-3.6.0/plugin/highlight/highlight.js', async: true, condition: function() { return !!document.querySelector( 'pre code' ); }, callback: function() { hljs.initHighlightingOnLoad(); } },
|
||
{ src: '../reveal.js-3.6.0/plugin/zoom-js/zoom.js', async: true },
|
||
{ src: '../reveal.js-3.6.0/plugin/notes/notes.js', async: true },
|
||
// Chart.min.js
|
||
{ src: '../reveal.js-3.6.0/plugin/chart/Chart.min.js'},
|
||
// the plugin
|
||
{ src: '../reveal.js-3.6.0/plugin/chart/csv2chart.js'},
|
||
{ src: '../reveal.js-3.6.0/plugin/svginline/es6-promise.auto.js', async: false },
|
||
{ src: '../reveal.js-3.6.0/plugin/svginline/data-src-svg.js', async: false }
|
||
]
|
||
});
|
||
|
||
</script>
|
||
|
||
</body>
|
||
</html>
|