
524 lines
23 KiB
Raw Normal View History

2018-02-06 22:44:57 -05:00
<!doctype html>
<html lang="en">
<meta charset="utf-8">
<title>CSE 4/562 - Spring 2018</title>
<meta name="description" content="CSE 4/562 - Spring 2018">
<meta name="author" content="Oliver Kennedy">
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
<link rel="stylesheet" href="../reveal.js-3.6.0/css/reveal.css">
<link rel="stylesheet" href="ubodin.css" id="theme">
<!-- Code syntax highlighting -->
<link rel="stylesheet" href="../reveal.js-3.6.0/lib/css/zenburn.css">
<!-- Printing and PDF exports -->
var link = document.createElement( 'link' );
link.rel = 'stylesheet';
link.type = 'text/css';
link.href = /print-pdf/gi ) ? '../reveal.js-3.6.0/css/print/pdf.css' : '../reveal.js-3.6.0/css/print/paper.css';
document.getElementsByTagName( 'head' )[0].appendChild( link );
<script src="../reveal.js-3.6.0/lib/js/head.min.js"></script>
<!--[if lt IE 9]>
<script src="../reveal.js-3.6.0/lib/js/html5shiv.js"></script>
<div class="reveal">
<!-- Any section element inside of this container is displayed as a slide -->
<div class="header">
<!-- Any Talk-Specific Header Content Goes Here -->
CSE 4/562 - Database Systems
<div class="slides">
<h1>Relational Algebra Equivalences</h1>
<h3>CSE 4/562 Database Systems</h3>
<h5>February 7, 2018</h5>
<h3>Recap: Relational Algebra</h3>
<table style="font-size: 70%">
<tr><td>Selection</td><td>$\sigma$</td><td>Select a subset of the input rows</td></tr>
<tr><td>Projection</td><td>$\pi$</td><td>Delete unwanted columns</td></tr>
<tr><td>Cross-product</td><td>$\times$</td><td>Combine two relations</td></tr>
<tr><td>Set-difference</td><td>$-$</td><td>Tuples in Rel 1, but not Rel 2</td></tr>
<tr><td>Union</td><td>$\cup$</td><td>Tuples either in Rel 1 or in Rel 2</td></tr>
<tr><td>Intersection</td><td>$\cap$</td><td>Tuples in both Rel 1 and Rel 2</td></tr>
<tr><td>Join</td><td>$\bowtie$</td><td>Pairs of tuples matching a specified condition</td></tr>
<tr style="color: grey;"><td>Division</td><td>$/$</td><td>"Inverse" of cross-product</td></tr>
<h3>Division ($/$)</h3>
<p>Not typically supported as a primitive operator,<br/>but useful for expressing queries like:</p>
<p style="font-size: 70%; font-weight: bold">Find species that appear in all boroughs</p>
<div style="font-size: 70%" class="fragment">
$$\pi_{BORONAME,\ SPC\_COMMON}(\textbf{Trees}) \;\;/\;\;\pi_{SPC\_COMMON}(\textbf{Trees})$$
(using set relational algebra)
<p class="fragment">
$$R / S \equiv \{\; \left<\vec t\right> \;|\; \forall \left<\vec s\right> \in S, \left< \vec t \vec s \right> \in R \;\}$$
<h3>Division ($/$)</h3>
<table style="font-size: 60%; margin-top: 30px; display: inline-block; vertical-align: middle;">
<tr><th>BORO</th> <th>SPC_COMMON</th></tr>
<tr class="fragment highlight-blue" data-fragment-index="1"><td>Brooklyn</td> <td>honeylocust</td></tr>
<tr class="fragment highlight-red" data-fragment-index="3"><td>Brooklyn</td> <td>American linden</td></tr>
<tr><td>Brooklyn</td> <td>London planetree</td></tr>
<tr class="fragment highlight-blue" data-fragment-index="1"><td>Manhattan</td> <td>honeylocust</td></tr>
<tr class="fragment highlight-red" data-fragment-index="3"><td>Manhattan</td> <td>American linden</td></tr>
<tr class="fragment highlight-green" data-fragment-index="5"><td>Manhattan</td> <td>pin oak</td></tr>
<tr class="fragment highlight-blue" data-fragment-index="1"><td>Queens</td> <td>honeylocust</td></tr>
<tr class="fragment highlight-red" data-fragment-index="3"><td>Queens</td> <td>American linden</td></tr>
<tr class="fragment highlight-blue" data-fragment-index="1"><td>Bronx</td> <td>honeylocust</td></tr>
<table style="font-size: 40%; margin-left: 30px; display: inline-block; vertical-align: middle;">
<tr class="fragment" data-fragment-index="1"><td>
<span style="font-size: 200%">/</span>
<table style="display: inline-block; vertical-align: middle; margin-left: 10px; border: 1px solid black;"><tr><th>SPC_COMMON</th></tr><tr><td class="fragment highlight-current-blue" data-fragment-index="1">honeylocust</td></tr></table>
<span class="fragment" data-fragment-index="2">
<span style="font-size: 200%; margin-left: 10px;">=</span>
<table style="display: inline-block; vertical-align: middle; margin-left: 10px; border: 1px solid black;"><tr><th>BORO</th></tr><tr><td>Brooklyn</td></tr><tr><td>Manhattan</td></tr><tr><td>Queens</td></tr><tr><td>Bronx</td></tr></table>
<tr class="fragment" data-fragment-index="3"><td>
<span style="font-size: 200%">/</span>
<table style="display: inline-block; vertical-align: middle; margin-left: 10px; border: 1px solid black;"><tr><th>SPC_COMMON</th></tr><tr><td class="fragment highlight-current-blue" data-fragment-index="3">honeylocust</td></tr><tr><td class="fragment highlight-current-red" data-fragment-index="3">American linden</td></tr></table>
<span class="fragment" data-fragment-index="4">
<span style="font-size: 200%; margin-left: 10px;">=</span>
<table style="display: inline-block; vertical-align: middle; margin-left: 10px; border: 1px solid black;"><tr><th>BORO</th></tr><tr><td>Brooklyn</td></tr><tr><td>Manhattan</td></tr><tr><td>Queens</td></tr></table>
<tr class="fragment" data-fragment-index="5"><td>
<span style="font-size: 200%">/</span>
<table style="display: inline-block; vertical-align: middle; margin-left: 10px; border: 1px solid black;"><tr><th>SPC_COMMON</th></tr><tr><td class="fragment highlight-current-blue" data-fragment-index="5">honeylocust</td></tr><tr><td class="fragment highlight-current-red" data-fragment-index="5">American linden</td></tr><tr><td class="fragment highlight-current-green" data-fragment-index="5">pin oak</td></tr></table>
<span class="fragment" data-fragment-index="6">
<span style="font-size: 200%; margin-left: 10px;">=</span>
<table style="display: inline-block; vertical-align: middle; margin-left: 10px; border: 1px solid black;"><tr><th>BORO</th></tr><tr><td>Manhattan</td></tr></table>
<h3>The running theme</h3>
<span class="fragment highlight-grey" data-fragment-index="1">If X and Y are </span><u>equivalent</u><span class="fragment highlight-grey" data-fragment-index="1"> and Y is <u>better</u>,<br/>
then replace all Xs with Ys</span>
<p class="fragment" data-fragment-index="1" style="font-size: 70%;"><b>Today's focus</b>: Provable Equivalence for RA Expressions</p>
$$Q_1 = \pi_{A}\left( \sigma_{c}( R ) \right)$$
$$Q_2 = \sigma_{c}\left( \pi_{A}( R ) \right)$$
<div class="fragment">
$$Q_1 \stackrel{?}{\equiv} Q_2$$
<h3>Ground Rules</h3>
<dt class="fragment" data-fragment-index="1">Only Relational Values Matter</dt>
<dd class="fragment" data-fragment-index="1">Obviously $Q_1 \neq Q_2$. What we care about is whether $Q_1(R) = Q_2(R)$...</dd>
<dt class="fragment" data-fragment-index="2">Data Independent</dt>
<dd class="fragment" data-fragment-index="2">... for <i>all</i> valid input data $R$.</dd>
<dd class="fragment" data-fragment-index="3" style="font-size: 70%">However, it's fair to talk about equivalence when we know the data has some properties. (more on this later)</dd>
<dt class="fragment" data-fragment-index="4">Data-Model Dependent</dt>
<dd class="fragment" data-fragment-index="4">It's important to be clear whether we're talking about sets, <span class="fragment highlight-blue">bags</span>, or lists.</dd>
<h3>In summary...</h3>
<p style="font-size: 80%;">
We say that $Q_1 \equiv Q_2$ if and only if<br/>
we can guarantee that the <i>bag</i> of tuples produced by $Q_1(R, S, T, \ldots)$ <br/>
is the same as the <i>bag</i> of tuples produced by $Q_2(R, S, T, \ldots)$ <br/>
for any combination of valid inputs $R, S, T, \ldots$.
<p style="font-size: 70%;" class="fragment">
... that satisfy any necessary properties.
<h3>Starting Rules</h3>
<table style="font-size: 80%">
<tr><th colspan="2" style="padding-top: 20px;">Selection</th></tr>
<td>$\sigma_{c_1 \wedge c_2}(R) \equiv \sigma_{c_1}(\sigma_{c_2}(R))$</td>
<tr><th colspan="2" style="padding-top: 20px;">Projection</th></tr>
<td>$\pi_{A}(R) \equiv \pi_{A}(\pi_{A \cup B}(R))$</td>
<tr><th colspan="2" style="padding-top: 20px;">Cross Product</th></tr>
<td>$R \times (S \times T) \equiv (R \times S) \times T$</td>
<td>$R \times S \equiv S \times R$</td>
<tr><th colspan="2" style="padding-top: 20px;">Union</th></tr>
<td>$R \cup (S \cup T) \equiv (R \cup S) \cup T$</td>
<td>$R \cup S \equiv S \cup R$</td>
<h3>Try it!</h3>
<p class="fragment highlight-grey" data-fragment-index="1">
Show that
$$R \times (S \times T) \equiv T \times (S \times R)$$
<div class="fragment highlight-grey" data-fragment-index="2">
<p class="fragment" data-fragment-index="1">
Show that
$$\sigma_{c_1}(\sigma_{c_2}(R)) \equiv \sigma_{c_2}(\sigma_{c_1}(R))$$
<div class="fragment highlight-grey" data-fragment-index="3">
<p class="fragment" data-fragment-index="2">
Show that
$$R \bowtie_{c} S \equiv S \bowtie_{c} R$$
<p class="fragment" data-fragment-index="3">
Show that
$$\sigma_{R.B = S.B \wedge R.A > 3}(R \times S) \equiv \sigma_{R.A > 3}(R \bowtie_{B} S)$$
<h3>Rules for Multiple Operators</h3>
<table style="font-size: 90%; margin-bottom: 50px;">
<tr><th colspan="2" style="padding-top: 20px;">Selection + Projection</th></tr>
<td>$\pi_{A}(\sigma_{c}(R)) \equiv \sigma_{c}(\pi_{A}(R))$</td>
<p style="font-size: 80%;" class="fragment">... but only if $A$ and $c$ are <u>compatible</u></p>
<p style="font-size: 80%;" class="fragment">$A$ must include all columns referenced by $c$ ($cols(c)$)</p>
<div class="fragment" style="margin-top: 50px;">
<h3>Try it!</h3>
Show that
$$\pi_A(\sigma_c(R)) \equiv \pi_A(\sigma_c(\pi_{(A \cup cols(c))}(R)))$$
<table style="font-size: 90%; margin-bottom: 50px;">
<tr><th colspan="2" style="padding-top: 20px;">Selection + Cross Product</th></tr>
<td>$\sigma_c(R \times S) \equiv (\sigma_{c}(R)) \times S$</td>
<p style="font-size: 80%;" class="fragment">... but only if $c$ references only columns of $R$</p>
<p style="font-size: 60%;" class="fragment">$cols(c) \subseteq cols(R)$</p>
<div class="fragment" style="margin-top: 50px;">
<h3>Try it!</h3>
Show that
$$\sigma_{R.B = S.B \wedge R.A > 3}(R \times S) \equiv (\sigma_{R.A > 3}(R)) \bowtie_{B} S$$
<div style="font-size: 70%;" class="fragment">When is this rewrite a good idea?</div>
<table style="font-size: 90%; margin-bottom: 50px;">
<tr><th colspan="2" style="padding-top: 20px;">Projection + Cross Product</th></tr>
<td>$\pi_A(R \times S) \equiv (\pi_{A_R}(R)) \times (\pi_{A_S}(S))$</td>
<p style="font-size: 80%;">... where $A_R$ and $A_S$ are the columns of $A$ from $R$ and $S$ respectively.</p>
<p style="font-size: 60%;">$A_R = A \cap cols(R)$&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;$A_S = A \cap cols(S)$</p>
<div class="fragment" style="margin-top: 50px;">
<h3>Try it!</h3>
Show that
$$\pi_{A}(R \bowtie_c S) \equiv (\pi_{A_R}(R)) \bowtie_c (\pi_{A_S}(S))$$
<div style="font-size: 70%;" class="fragment">When does this condition hold?</div>
<table style="font-size: 90%; margin-bottom: 50px;">
<tr><th colspan="2" style="padding-top: 20px;">Intersection</th></tr>
<td>$R \cap (S \cap T) \equiv (R \cap S) \cap T$</td>
<td>$R \cap S \equiv S \cap R$</td>
<tr><th colspan="2" style="padding-top: 20px;">Selection + ____</th></tr>
<td>$\sigma_c(R \cup S) \equiv (\sigma_c(R)) \cup (\sigma_c(R))$</td>
<td>$\sigma_c(R \cap S) \equiv (\sigma_c(R)) \cap (\sigma_c(R))$</td>
<tr><th colspan="2" style="padding-top: 20px;">Projection + ____</th></tr>
<td>$\pi_A(R \cup S) \equiv (\pi_A(R)) \cup (\pi_A(R))$</td>
<td>$\pi_A(R \cap S) \equiv (\pi_A(R)) \cap (\pi_A(R))$</td>
<tr><th colspan="2" style="padding-top: 20px;">Cross Product + Union</th></tr>
<td>$R \times (S \cup T) \equiv (R \times S) \cup (R \times T)$</td>
<pre style="display: inline-block; width: 300px; vertical-align: middle;"><code class="sql">
AND S.C < 5
<span style="vertical-align: middle; margin: 50px; font-size: 300%"></span>
<img src="graphics/2018-02-07-RA-Opt-1.svg" style="vertical-align: middle;"/>
<section data-transition="slide">
<img src="graphics/2018-02-07-RA-Opt-1.svg" style="vertical-align: middle;" />
<span style="vertical-align: middle; margin: 50px; font-size: 300%"></span>
<img src="graphics/2018-02-07-RA-Opt-2.svg" style="vertical-align: middle;"/>
<section data-transition="slide">
<img src="graphics/2018-02-07-RA-Opt-2.svg" style="vertical-align: middle;" />
<span style="vertical-align: middle; margin: 50px; font-size: 300%"></span>
<img src="graphics/2018-02-07-RA-Opt-3.svg" style="vertical-align: middle;" />
<section data-transition="slide">
<img src="graphics/2018-02-07-RA-Opt-3.svg" style="vertical-align: middle;" />
<span style="vertical-align: middle; margin: 50px; font-size: 300%"></span>
<img src="graphics/2018-02-07-RA-Opt-4.svg" style="vertical-align: middle;" />
<section data-transition="slide">
<img src="graphics/2018-02-07-RA-Opt-4.svg" style="vertical-align: middle;" />
<span style="vertical-align: middle; margin: 50px; font-size: 300%"></span>
<img src="graphics/2018-02-07-RA-Opt-5.svg" style="vertical-align: middle;" />
<section data-transition="slide">
<img src="graphics/2018-02-07-RA-Opt-5.svg" style="vertical-align: middle;" />
<span style="vertical-align: middle; margin: 50px; font-size: 300%"></span>
<img src="graphics/2018-02-07-RA-Opt-6.svg" style="vertical-align: middle;" />
<section data-transition="slide">
<img src="graphics/2018-02-07-RA-Opt-6.svg" style="vertical-align: middle;" />
<span style="vertical-align: middle; margin: 50px; font-size: 300%"></span>
<img src="graphics/2018-02-07-RA-Opt-7.svg" style="vertical-align: middle;" />
<h3>General Query Optimizers</h3>
<p><b>Input:</b> Dumb translation of SQL to RA</p>
<p><b>Output:</b> Better, but equivalent query</p>
<p>Which rewrite rules should we apply?</p>
<dl style="font-size: 75%">
<dt class="fragment highlight-grey" data-fragment-index="1">Selection Pushdown</dt>
<dd class="fragment highlight-grey" data-fragment-index="1"><b>Always</b> commute Selections as close to the leaves as possible.</dd>
<dt class="fragment highlight-grey" data-fragment-index="1">Join Construction</dt>
<dd class="fragment highlight-grey" data-fragment-index="1">Joins are <b>always</b> better than cross-products.</dd>
<dt class="fragment highlight-grey" data-fragment-index="1">(Optional) Projection Pushdown</dt>
<dd class="fragment highlight-grey" data-fragment-index="1">Commuting Projections down to the leaves removes redundant columns, and <b>may</b> be beneficial for some systems.</dd>
<dt>Join Algorithm Selection</dt>
<dd>Joins can be implemented differently, depending on the join predicate.</dd>
<dt>Join/Union Ordering</dt>
<dd>The order in which joins are evaluated <b>may</b> affect query runtimes.</dd>
<dt>Access Paths</dt>
<dd>$(\sigma_c(R))$ and $(Q(\ldots) \bowtie_c R)$ are special cases that we can make fast!</dd>
<p class="fragment" data-fragment-index="1" style="font-size: 60%">Some rewrites are situational... we need more information to decide when to apply them.</p>
<b>Next Class:</b> Extended Relational Algebra and Basic Join Algorithms
<script src="../reveal.js-3.6.0/js/reveal.js"></script>
// Full list of configuration options available at:
controls: false,
progress: true,
history: true,
center: true,
slideNumber: true,
transition: 'fade', // none/fade/slide/convex/concave/zoom
chart: {
defaults: {
global: {
title: { fontColor: "#333", fontSize: 24 },
legend: {
labels: { fontColor: "#333", fontSize: 20 },
responsiveness: true
scale: {
scaleLabel: { fontColor: "#333", fontSize: 20 },
gridLines: { color: "#333", zeroLineColor: "#333" },
ticks: { fontColor: "#333", fontSize: 16 },
line: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ], "borderDash": [ [5,10], [0,0] ]},
bar: { backgroundColor: [
pie: { backgroundColor: [ ["rgba(0,0,0,.8)" , "rgba(220,20,20,.8)", "rgba(20,220,20,.8)", "rgba(220,220,20,.8)", "rgba(20,20,220,.8)"] ]},
radar: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ]},
// Optional ../reveal.js plugins
dependencies: [
{ src: '../reveal.js-3.6.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
{ src: '../reveal.js-3.6.0/plugin/math/math.js',
condition: function() { return true; },
mathjax: '../reveal.js-3.6.0/js/MathJax.js'
{ src: '../reveal.js-3.6.0/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../reveal.js-3.6.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
{ src: '../reveal.js-3.6.0/plugin/highlight/highlight.js', async: true, condition: function() { return !!document.querySelector( 'pre code' ); }, callback: function() { hljs.initHighlightingOnLoad(); } },
{ src: '../reveal.js-3.6.0/plugin/zoom-js/zoom.js', async: true },
{ src: '../reveal.js-3.6.0/plugin/notes/notes.js', async: true },
// Chart.min.js
{ src: '../reveal.js-3.6.0/plugin/chart/Chart.min.js'},
// the plugin
{ src: '../reveal.js-3.6.0/plugin/chart/csv2chart.js'},
{ src: '../reveal.js-3.6.0/plugin/svginline/', async: false },
{ src: '../reveal.js-3.6.0/plugin/svginline/data-src-svg.js', async: false }