From 28d3d16b0c615b43a7fb44975d05503d8acb6e63 Mon Sep 17 00:00:00 2001
From: Oliver Kennedy <okennedy@buffalo.edu>
Date: Thu, 8 Mar 2018 23:22:09 -0500
Subject: [PATCH] Review slides

---
 slides/cse4562sp2018/2018-03-09-Review.html | 754 ++++++++++++++++++++
 1 file changed, 754 insertions(+)
 create mode 100644 slides/cse4562sp2018/2018-03-09-Review.html
diff --git a/slides/cse4562sp2018/2018-03-09-Review.html b/slides/cse4562sp2018/2018-03-09-Review.html
new file mode 100644
index 00000000..3d3f32c3
--- /dev/null
+++ b/slides/cse4562sp2018/2018-03-09-Review.html
@@ -0,0 +1,754 @@
+<!doctype html>
+<html lang="en">
+
+  <head>
+    <meta charset="utf-8">
+
+    <title>CSE 4/562 - Spring 2018</title>
+
+    <meta name="description" content="CSE 4/562 - Spring 2018">
+    <meta name="author" content="Oliver Kennedy">
+
+    <meta name="apple-mobile-web-app-capable" content="yes" />
+    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
+
+    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
+
+    <link rel="stylesheet" href="../reveal.js-3.6.0/css/reveal.css">
+    <link rel="stylesheet" href="ubodin.css" id="theme">
+
+    <!-- Code syntax highlighting -->
+    <link rel="stylesheet" href="../reveal.js-3.6.0/lib/css/zenburn.css">
+
+    <!-- Printing and PDF exports -->
+    <script>
+      var link = document.createElement( 'link' );
+      link.rel = 'stylesheet';
+      link.type = 'text/css';
+      link.href = window.location.search.match( /print-pdf/gi ) ? '../reveal.js-3.6.0/css/print/pdf.css' : '../reveal.js-3.6.0/css/print/paper.css';
+      document.getElementsByTagName( 'head' )[0].appendChild( link );
+    </script>
+
+    <script src="../reveal.js-3.6.0/lib/js/head.min.js"></script>
+
+    <!--[if lt IE 9]>
+    <script src="../reveal.js-3.6.0/lib/js/html5shiv.js"></script>
+    <![endif]-->
+  </head>
+
+  <body>
+
+    <div class="reveal">
+    <!-- Any section element inside of this container is displayed as a slide -->
+
+    <div class="header">
+      <!-- Any Talk-Specific Header Content Goes Here -->
+      CSE 4/562 - Database Systems
+    </div>
+
+    <div class="slides">
+
+      <section>
+        <h1>Midterm Review</h1>
+        <h3>CSE 4/562 – Database Systems</h3>
+        <h5>March 9, 2018</h5>
+      </section>
+      <!-- ============================================ -->
+
+      <section>
+
+        <section>
+          <p>What are Databases?</p>
+        </section>
+
+        <section>
+          <dl>
+            <dt>Analysis: Answering user-provided questions about data</dt>
+            <dd>What kind of tools can we give end-users? <ul class="tight">
+              <li>Declarative Languages</li>
+              <li>Organizational Datastructures (e.g., Indexes)</li>
+            </ul></dd>
+
+            <div style="color: grey;">
+              <dt>Manipulation: Safely persisting and sharing data updates</dt>
+              <dd>What kind of tools can we give end-users?<ul class="tight">
+                <li>Consistency Primitives</li>
+                <li>Data Validation Primitives</li>
+              </ul></dd>
+            </div>
+          </dl>
+        </section>
+
+        <section>
+          <dl style="font-size: smaller;">
+            <dt>Primitive</dt>
+            <dd>Basic building blocks like Int, Float, Char, String</dd>
+            <dt>Tuple</dt>
+            <dd>Several ‘fields’ of different types. (N-Tuple = N fields)</dd>
+            <dd>A Tuple has a ‘schema’ defining each field</dd>
+            <dt>Set</dt>
+            <dd>A collection of unique records, all of the same type</dd>
+            <dt>Bag</dt>
+            <dd>An unordered collection of records, all of the same type</dd>
+            <dt>List</dt>
+            <dd>An ordered collection of records, all of the same type</dd>
+          </dl>
+        </section>
+
+        <section>
+          <pre><code class="sql">
+            SELECT  [DISTINCT] targetlist
+            FROM    relationlist
+            WHERE   condition
+          </code></pre>
+          <ol>
+            <li class="fragment">Compute the $2^n$ combinations of tuples in all relations appearing in <span style="color: red;">relationlist</span></li>
+            <li class="fragment">Discard tuples that fail the <span style="color: red;">condition</span></li>
+            <li class="fragment">Delete attributes not in <span style="color: red;">targetlist</span></li>
+            <li class="fragment">If <span style="font-family: Courier, fixedwidth;">DISTINCT</span> is specified, eliminate duplicate rows</li>
+          </ol>
+          <p style="font-size: 70%;" class="fragment">
+            This is the least efficient strategy to compute a query!
+            A good optimizer will find <b>more efficient strategies</b> to compute <b>the same answer.</b>
+          </p>
+        </section>
+
+      </section>
+
+      <section>
+        <section>
+          <h2>Physical Layout</h2>          
+        </section>
+
+        <section>
+          <h3>Record Formats</h3>
+          <dl>
+            <dt>Fixed</dt>
+            <dd>Constant-size fields.  Field $i$ at byte $\sum_{j < i} |Field_j|$</dd>
+            <dt>Delimited</dt>
+            <dd>Special character or string (e.g., <code>,</code>) between fields</dd>
+            <dt>Header</dt>
+            <dd>Fixed-size header points to start of each field</dd>
+            <dt>&nbsp;</dt>
+            <dd>&nbsp;</dd>
+          </dl>
+        </section>
+
+        <section>
+          <h3>File Formats</h3>
+          <dl>
+            <dt>Fixed</dt>
+            <dd>Constant-size records.  Record $i$ at byte $|Record| \times i$</dd>
+            <dt>Delimited</dt>
+            <dd>Special character or string (e.g., <code>\r\n</code>) at record end</dd>
+            <dt>Header</dt>
+            <dd>Index in file points to start of each record</dd>
+            <dt class="fragment" data-fragment-index="1">Paged</dt>
+            <dd class="fragment" data-fragment-index="1">Align records to paging boundaries</dd>
+          </dl>
+        </section>
+
+        <section>
+          <dl>
+            <dt>File</dt>
+            <dd>A collection of pages (or records)</dd>
+            <dt>Page</dt>
+            <dd>A fixed-size collection of records</dd>
+            <dd style="font-size: smaller;">Page size is usually dictated by hardware.<br/>Mem Page $\approx$ 4KB&nbsp;&nbsp;&nbsp;Cache Line $\approx$ 64B</dd>
+            <dt>Record</dt>
+            <dd>One or more fields (for now)</dd>
+            <dt>Field</dt>
+            <dd>A primitive value (for now)</dd>
+          </dl>
+        </section>
+      </section>
+
+      <section>
+        <section>
+          <p>Relational Algebra</p>
+        </section>
+
+        <section>
+          <h3>Relational Algebra</h3>
+
+          <table style="font-size: 70%">
+            <tr><th>Operation</th><th>Sym</th><th>Meaning</th></tr>
+            <tr><td>Selection</td><td>$\sigma$</td><td>Select a subset of the input rows</td></tr>
+            <tr><td>Projection</td><td>$\pi$</td><td>Delete unwanted columns</td></tr>
+            <tr><td>Cross-product</td><td>$\times$</td><td>Combine two relations</td></tr>
+            <tr><td>Set-difference</td><td>$-$</td><td>Tuples in Rel 1, but not Rel 2</td></tr>
+            <tr><td>Union</td><td>$\cup$</td><td>Tuples either in Rel 1 or in Rel 2</td></tr>
+            <tr><td>Intersection</td><td>$\cap$</td><td>Tuples in both Rel 1 and Rel 2</td></tr>
+            <tr><td>Join</td><td>$\bowtie$</td><td>Pairs of tuples matching a specified condition</td></tr>
+            <tr style="color: grey;"><td>Division</td><td>$/$</td><td>"Inverse" of cross-product</td></tr>
+
+            <tr><td>Sort</td></tr>
+            <tr><td>Limit</td></tr>
+          </table>
+        </section>
+
+        <section>
+          <h3>Equivalence</h3>
+          $$Q_1 = \pi_{A}\left( \sigma_{c}( R ) \right)$$
+          $$Q_2 = \sigma_{c}\left( \pi_{A}( R ) \right)$$
+
+          <div class="fragment">
+            $$Q_1 \stackrel{?}{\equiv} Q_2$$
+          </div>
+        </section>
+
+        <section>          
+          <table style="font-size: 60%">
+            <tr><th><b>Rule</b></th><th><b>Notes</b> </th></tr>
+            <tr><td>$\sigma_{C_1\wedge C_2}(R) \equiv \sigma_{C_1}(\sigma_{C_2}(R))$</td><td></td></tr>
+            <tr><td>$\sigma_{C_1\vee C_2}(R) \equiv \sigma_{C_1}(R) \cup \sigma_{C_2}(R)$</td><td>Only true for set, not bag union</td></tr>
+            <tr><td>$\sigma_C(R \times S) \equiv R \bowtie_C S$</td><td></td></tr>
+            <tr><td>$\sigma_C(R \times S) \equiv \sigma_C(R) \times S$</td><td>If $C$ references only $R$'s attributes, also works for joins</td></tr>
+            <tr><td>$\pi_{A}(\pi_{A \cup B}(R)) \equiv \pi_{A}(R)$</td><td> </td></tr>
+            <tr><td>$\sigma_C(\pi_{A}(R)) \equiv \pi_A(\sigma_C(R))$</td><td>If $A$ contains all of the attributes referenced by $C$ </td></tr>
+            <tr><td>$\pi_{A\cup B}(R\times S) \equiv \pi_A(R) \times \pi_B(S)$</td><td>Where $A$ (resp., $B$) contains attributes in $R$ (resp., $S$)</td></tr>
+            <tr><td>$R \times (S \times T) \equiv (R \times S) \times T$</td><td>Also works for joins  </td></tr>
+            <tr><td>$R \times S \equiv S \times R$</td><td>Also works for joins  </td></tr>
+            <tr><td>$R \cup (S \cup T) \equiv (R \cup S) \cup T$</td><td>Also works for intersection and bag-union  </td></tr>
+            <tr><td>$R \cup S \equiv S \cup R$</td><td>Also works for intersections and bag-union  </td></tr>
+            <tr><td>$\sigma_{C}(R \cup S) \equiv \sigma_{C}(R) \cup \sigma_{C}(S)$</td><td>Also works for intersections and bag-union  </td></tr>
+            <tr><td>$\pi_{A}(R \cup S) \equiv \pi_{A}(R) \cup \pi_{A}(S)$</td><td>Also works for intersections and bag-union  </td></tr>
+            <tr><td>$\sigma_{C}(\gamma_{A, AGG}(R)) \equiv \gamma_{A, AGG}(\sigma_{C}(R))$</td><td>If $A$ contains all of the attributes referenced by $C$</td></tr>
+          </table>
+        </section>
+
+        <section>
+          <h3>Algorithms</h3>
+          <dl>
+            <dt>"Volcano" Operators (Iterators)</dt>
+            <dd>Operators "pull" tuples, one-at-a-time, from their children.</dd>
+
+            <dt>2-Pass (External) Sort</dt>
+            <dd>Create sorted runs, then repeatedly merge runs</dd>
+
+            <dt>Join Algorithms</dt>
+            <dd>Quickly picking out <i>specific</i> pairs of tuples.</dd>
+
+            <dt>Aggregation Algorithms</dt>
+            <dd>In-Memory vs 2-Pass, Normal vs Group-By</dd>
+          </dl>
+        </section>
+
+        <section>
+          <h3>Nested-Loop Join</h3>
+          <svg data-src="graphics/2018-02-12-Join-NLJ.svg" />
+        </section>
+
+        <section>
+          <h3>Block-Nested Loop Join</h3>
+          <svg data-src="graphics/2018-02-12-Join-BNLJ.svg" class="stretch" />
+        </section>
+
+        <section>
+          <h3>Strategies for Implementing $R \bowtie_{R.A = S.A} S$</h3>
+
+          <dl>
+            <dt>Sort/Merge Join</dt>
+            <dd>Sort all of the data upfront, then scan over both sides.</dd>
+
+            <dt>In-Memory Index Join (1-pass Hash; Hash Join)</dt>
+            <dd>Build an in-memory index on one table, scan the other.</dd>
+
+            <dt>Partition Join (2-pass Hash; External Hash Join)</dt>
+            <dd>Partition both sides so that tuples don't join across partitions.</dd>
+          </dl>
+        </section>
+
+        <section>
+          <h3>Sort/Merge Join</h3>
+          <svg data-src="graphics/2018-02-12-Join-SortMerge.svg" />
+        </section>
+
+        <section>
+          <h3>1-Pass Hash Join</h3>
+          <svg data-src="graphics/2018-02-12-Join-1PassHash.svg" />
+        </section>
+
+        <section>
+          <h3>2-Pass Hash Join</h3>
+          <dl>
+            <dt>Limited Queries</dt>
+            <dd>Only supports join conditions of the form $R.A = S.B$</dd>
+
+            <dt>Low Memory</dt>
+            <dd>Never need more than 1 pair of partitions in memory</dd>
+
+            <dt>High IO Cost</dt>
+            <dd>Every record gets written out to disk, and back in.</dd>
+          </dl>          
+          <p class="fragment">Can partition on data-values to support other types of queries.</p>
+        </section>
+
+        <section>
+          <h3>Index Nested Loop Join</h3>
+
+          To compute $R \bowtie_{R.A < S.B} S$ with an index on $S.B$
+
+          <ol>
+            <li>Read One Row of $R$</li>
+            <li>Get the value of $R.A$</li>
+            <li>Start index scan on $S.B > [R.A]$</li>
+            <li>Return rows as normal</li>
+          </ol>
+        </section>
+
+        <section>
+          <h3>Basic Aggregate Pattern</h3>
+          <dl>
+            <dt>Init</dt>
+            <dd>Define a starting value for the accumulator</dd>
+            <dt>Fold(Accum, New)</dt>
+            <dd>Merge a new value into the accumulator</dd>
+            <dt>Finalize(Accum)</dt>
+            <dd>Extract the aggregate from the accumulator.</dd>
+          </dl>
+        </section>
+
+        <section>
+          <h3>Basic Aggregate Types</h3>
+          <p class="fragment" style="font-size: 60%">Grey et. al. "Data Cube: A Relational Aggregation Operator Generalizing Group-By, Cross-Tab, and Sub-Totals</p>
+
+          <dl>
+            <dt>Distributive</dt>
+            <dd>Finite-sized accumulator and doesn't need a finalize (COUNT, SUM)</dd>
+            <dt>Algebraic</dt>
+            <dd>Finite-sized accumulator but needs a finalize (AVG)</dd>
+            <dt>Holistic</dt>
+            <dd>Unbounded accumulator (MEDIAN)</dd>
+          </dl>
+        </section>
+
+        <section>
+          <h3>Grouping Algorithms</h3>
+
+          <dl>
+            <dt>2-pass Hash Aggregate</dt>
+            <dd>Like 2-pass Hash Join: Distribute groups across buckets, then do an in-memory aggregate for each bucket.</dd>
+
+            <dt>Sort-Aggregate</dt>
+            <dd>Like Sort-Merge Join: Sort data by groups, then group elements will be adjacent.</dd>
+          </dl>
+        </section>
+      </section>
+
+      <section>
+        <section>
+          <p>Indexing</p>
+        </section>
+
+        <section>
+          <h3>Data Organization</h3>
+
+          <dl>
+            <div class="fragment">
+              <dt>Unordered Heap</dt>
+              <dd>No organization at all.  $O(N)$ reads.</dd>
+            </div>
+
+            <div class="fragment">
+              <dt>(Secondary) Index</dt>
+              <dd>Index structure over unorganized data.  $O(\ll N)$ <b>random</b> reads for <b>some</b> queries.</dd>
+            </div>
+
+            <div class="fragment">
+              <dt>Clustered (Primary) Index</dt>
+              <dd>Index structure over clustered data.  $O(\ll N)$ <b>sequential</b> reads for <b>some</b> queries.</dd>
+            </div>
+          </dl>
+        </section>
+
+        <section>
+          <h3>Data Organization</h3>
+          <img src="graphics/2018-02-19-Index-Types.svg" />
+        </section>
+
+        <section>
+          <h3>Data Organization</h3>
+          <img src="graphics/2018-02-19-PrimaryVsSecondary.png" />
+        </section>
+
+        <section>
+          <h3>Tree-Based Indexes</h3>
+
+          <svg data-src="graphics/2018-02-19-Tree-Motivation.svg"/>
+        </section>
+
+        <section>
+          <svg data-src="graphics/2018-02-19-BTree-Reserved.svg" />
+        </section>
+
+        <section>
+          <h3>Rules of B+Trees</h3>
+
+          <dl>
+            <dt>Keep space open for insertions in inner/data nodes.</dt>
+            <dd>‘Split’ nodes when they’re full</dd>
+
+            <dt>Avoid under-using space</dt>
+            <dd>‘Merge’ nodes when they’re under-filled</dd>
+          </dl>
+
+          <p><b>Maintain Invariant:</b> All Nodes ≥ 50% Full</p>
+          <p>(Exception: The Root)</p>
+        </section>
+
+        <section>
+          <svg data-src="graphics/2018-02-23-HashTable.svg" class="stretch"/>
+        </section>
+
+        <section>
+          <h3>Problems</h3>
+          <dl>
+            <dt>$N$ is too small</dt>
+            <dd>Too many overflow pages (slower reads).</dd>
+            <dt>$N$ is too big</dt>
+            <dd>Too many normal pages (wasted space).</dd>
+          </dl>
+        </section>
+
+        <section>
+          <svg data-src="graphics/2018-02-23-HashResize-Naive.svg" class="stretch"/>
+        </section>
+
+        <section>
+          <h3>Problems</h3>
+          <dl>
+            <dt class="fragment" data-fragment-index="1">Changing hash functions reallocates everything</dt>
+            <dd class="fragment" data-fragment-index="1">Only double/halve the size of a hash function</dd>
+
+            <dt class="fragment" data-fragment-index="2">Changing sizes still requires reading everything</dt>
+            <dd class="fragment" data-fragment-index="3"><b>Idea:</b> Only redistribute buckets that are too big</dd>
+          </dl>
+        </section>
+
+        <section>
+          <svg data-src="graphics/2018-02-23-HashResize-Dynamic.svg" class="stretch" />
+        </section>
+      </section>
+
+      <section>
+        <section>
+          <p>Cost-Based Optimization</p>
+        </section>
+
+
+        <section>
+          <h3>Accounting</h3>
+          <p style="margin-top: 50px;">Figure out the cost of each <b>individual</b> operator.</p>
+          <p style="margin-top: 50px;">Only count the number of IOs <b>added</b> by each operator.</p>
+        </section>
+
+        <section>
+          <table style="font-size: 70%">
+            <tr><th>Operation</th><th>RA</th><th>IOs Added (#pages)</th><th>Memory (#tuples)</th></tr>
+            <tr>
+              <td>Table Scan</td>
+              <td>$R$</td>
+              <td>$\frac{|R|}{\mathcal P}$</td>
+              <td>$O(1)$</td>
+            </tr>
+            <tr>
+              <td>Projection</td>
+              <td>$\pi(R)$</td>
+              <td>$0$</td>
+              <td>$O(1)$</td>
+            </tr>
+            <tr>
+              <td>Selection</td>
+              <td>$\sigma(R)$</td>
+              <td>$0$</td>
+              <td>$O(1)$</td>
+            </tr>
+            <tr>
+              <td>Union</td>
+              <td>$R \uplus S$</td>
+              <td>$0$</td>
+              <td>$O(1)$</td>
+            </tr>
+            <tr>
+              <td style="vertical-align: middle;">Sort <span>(In-Mem)</span></td>
+              <td style="vertical-align: middle;">$\tau(R)$</td>
+              <td>$0$</td>
+              <td>$O(|R|)$</td>
+            </tr>
+            <tr>
+              <td>Sort (On-Disk)</td>
+              <td>$\tau(R)$</td>
+              <td>$\frac{2 \cdot \lfloor log_{\mathcal B}(|R|) \rfloor}{\mathcal P}$</td>
+              <td>$O(\mathcal B)$</td>
+            </tr>
+            <tr>
+              <td><span>(B+Tree)</span> Index Scan</td>
+              <td>$Index(R, c)$</td>
+              <td>$\log_{\mathcal I}(|R|) + \frac{|\sigma_c(R)|}{\mathcal P}$</td>
+              <td>$O(1)$</td>
+            </tr>
+            <tr>
+              <td span>(Hash) Index Scan</td>
+              <td span>$Index(R, c)$</td>
+              <td>$1$</td>
+              <td>$O(1)$</td>
+            </tr>
+          </table>
+
+          <ol style="font-size: 50%; margin-top: 50px;">
+            <li>Tuples per Page ($\mathcal P$) <span>– Normally defined per-schema</span></li>
+            <li>Size of $R$ ($|R|$)</li>
+            <li>Pages of Buffer ($\mathcal B$)</li>
+            <li>Keys per Index Page ($\mathcal I$)</li>
+          </ol>
+        </section>
+        <section>
+          <table style="font-size: 70%">
+            <tr><th width="300px">Operation</th><th>RA</th><th>IOs Added (#pages)</th><th>Memory (#tuples)</th></tr>
+            <tr>
+              <td style="font-size: 60%">Nested Loop Join <span>(Buffer $S$ in mem)</span></td>
+              <td>$R \times S$</td>
+              <td>$0$</td>
+              <td>$O(|S|)$</td>
+            </tr>
+            <tr>
+              <td style="font-size: 60%">Nested Loop Join (Buffer $S$ on disk)</td>
+              <td>$R \times_{disk} S$</td>
+              <td>$(1+ |R|) \cdot \frac{|S|}{\mathcal P}$</td>
+              <td>$O(1)$</td>
+            </tr>
+            <tr>
+              <td>1-Pass Hash Join</td>
+              <td>$R \bowtie_{1PH, c} S$</td>
+              <td>$0$</td>
+              <td>$O(|S|)$</td>
+            </tr>
+            <tr>
+              <td>2-Pass Hash Join</td>
+              <td>$R \bowtie_{2PH, c} S$</td>
+              <td>$\frac{2|R| + 2|S|}{\mathcal P}$</td>
+              <td>$O(1)$</td>
+            </tr>
+            <tr>
+              <td>Sort-Merge Join </td>
+              <td>$R \bowtie_{SM, c} S$</td>
+              <td>[Sort]</td>
+              <td>[Sort]</td>
+            </tr>
+            <tr>
+              <td><span>(Tree)</span> Index NLJ</td>
+              <td>$R \bowtie_{INL, c}$</td>
+              <td>$|R| \cdot (\log_{\mathcal I}(|S|) + \frac{|\sigma_c(S)|}{\mathcal P})$</td>
+              <td>$O(1)$</td>
+            </tr>
+            <tr>
+              <td>(Hash) Index NLJ</td>
+              <td>$R \bowtie_{INL, c}$</td>
+              <td>$|R| \cdot 1$</td>
+              <td>$O(1)$</td>
+            </tr>
+            <tr>
+              <td><span>(In-Mem)</span> Aggregate</td>
+              <td>$\gamma_A(R)$</td>
+              <td>$0$</td>
+              <td>$adom(A)$</td>
+            </tr>
+            <tr>
+              <td style="font-size: 90%">(Sort/Merge) Aggregate</td>
+              <td>$\gamma_A(R)$</td>
+              <td>[Sort]</td>
+              <td>[Sort]</td>
+            </tr>
+          </table>
+
+          <ol style="font-size: 50%;">
+            <li>Tuples per Page ($\mathcal P$) <span>– Normally defined per-schema</span></li>
+            <li>Size of $R$ ($|R|$)</li>
+            <li>Pages of Buffer ($\mathcal B$)</li>
+            <li>Keys per Index Page ($\mathcal I$)</li>
+            <li>Number of distinct values of $A$ ($adom(A)$)</li>
+          </ol>
+        </section>
+
+        <section>
+          <p>Estimating IOs requires Estimating $|Q(R)|$</p>
+        </section>
+
+        <section>
+          <table style="font-size: 70%">
+            <tr>
+              <th>Operator</th>
+              <th>RA</th>
+              <th>Estimated Size</th>
+            </tr>
+
+            <tr>
+              <td>Table</td>
+              <td>$R$</td>
+              <td>$|R|$</td>
+            </tr>
+
+            <tr>
+              <td>Projection</td>
+              <td>$\pi(Q)$</td>
+              <td>$|Q|$</td>
+            </tr>
+
+            <tr>
+              <td>Union</td>
+              <td>$Q_1 \uplus Q_2$</td>
+              <td>$|Q_1| + |Q_2|$</td>
+            </tr>
+
+            <tr>
+              <td>Cross Product</td>
+              <td>$Q_1 \times Q_2$</td>
+              <td>$|Q_1| \times |Q_2|$</td>
+            </tr>
+
+            <tr>
+              <td>Sort</td>
+              <td>$\tau(Q)$</td>
+              <td>$|Q|$</td>
+            </tr>
+
+            <tr>
+              <td>Limit</td>
+              <td>$\texttt{LIMIT}_N(Q)$</td>
+              <td>$N$</td>
+            </tr>
+
+            <tr>
+              <td>Selection</td>
+              <td>$\sigma_c(Q)$</td>
+              <td>$|Q| \times \texttt{SEL}(c, Q)$</td>
+            </tr>
+
+            <tr>
+              <td>Join</td>
+              <td>$Q_1 \bowtie_c Q_2$</td>
+              <td>$|Q_1| \times |Q_2| \times \texttt{SEL}(c, Q_1\times Q_2)$</td>
+            </tr>
+
+            <tr>
+              <td>Distinct</td>
+              <td>$\delta_A(Q)$</td>
+              <td>$\texttt{UNIQ}(A, Q)$</td>
+            </tr>
+
+            <tr>
+              <td>Aggregate</td>
+              <td>$\gamma_{A, B \leftarrow \Sigma}(Q)$</td>
+              <td>$\texttt{UNIQ}(A, Q)$</td>
+            </tr>
+          </table>
+
+          <ul style="font-size: 50%; margin-top: 20px">
+            <li>$\texttt{SEL}(c, Q)$: Selectivity of $c$ on $Q$, or $\frac{|\sigma_c(Q)|}{|Q|}$</li>
+            <li>$\texttt{UNIQ}(A, Q)$: # of distinct values of $A$ in $Q$.
+          </ul>
+        </section>
+
+        <section>
+          <h3>(Some) Estimation Techniques</h3>
+
+          <dl style="font-size: 80%">
+            <div class="fragment">
+              <dt>Guess Randomly</dt>
+              <dd>Rules of thumb if you have no other options...</dd>
+            </div>
+
+            <div class="fragment">
+              <dt>Uniform Prior</dt>
+              <dd>Use basic statistics to make a very rough guess.</dd>
+            </div>
+
+            <div class="fragment">
+              <dt>Sampling / History</dt>
+              <dd>Small, Quick Sampling Runs (or prior executions of the query).</dd>
+            </div>
+
+            <div class="fragment">
+              <dt>Histograms</dt>
+              <dd>Using more detailed statistics for improved guesses.</dd>
+            </div>
+
+            <div class="fragment">
+              <dt>Constraints</dt>
+              <dd>Using rules about the data for improved guesses.</dd>
+            </div>
+          </dl>
+        </section>
+
+
+      </section>
+    </div></div>
+
+    <script src="../reveal.js-3.6.0/js/reveal.js"></script>
+
+    <script>
+
+      // Full list of configuration options available at:
+      // https://github.com/hakimel/../reveal.js#configuration
+      Reveal.initialize({
+        controls: false,
+        progress: true,
+        history: true,
+        center: true,
+        slideNumber: true,
+
+        transition: 'fade', // none/fade/slide/convex/concave/zoom
+
+        chart: {
+          defaults: { 
+            global: { 
+              title: { fontColor: "#333", fontSize: 24 }, 
+              legend: {
+                labels: { fontColor: "#333", fontSize: 20 },
+              },
+              responsiveness: true
+            },
+            scale: { 
+              scaleLabel: { fontColor: "#333", fontSize: 20 }, 
+              gridLines: { color: "#333", zeroLineColor: "#333" }, 
+              ticks: { fontColor: "#333", fontSize: 16 }, 
+            } 
+          },
+          line: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ], "borderDash": [ [5,10], [0,0] ]}, 
+          bar: { backgroundColor: [ 
+              "rgba(220,220,220,0.8)",
+              "rgba(151,187,205,0.8)",
+              "rgba(205,151,187,0.8)",
+              "rgba(187,205,151,0.8)"
+            ]
+          }, 
+          pie: { backgroundColor: [ ["rgba(0,0,0,.8)" , "rgba(220,20,20,.8)", "rgba(20,220,20,.8)", "rgba(220,220,20,.8)", "rgba(20,20,220,.8)"] ]},
+          radar: { borderColor: [ "rgba(20,220,220,.8)" , "rgba(220,120,120,.8)", "rgba(20,120,220,.8)" ]}, 
+        },
+
+        // Optional ../reveal.js plugins
+        dependencies: [
+          { src: '../reveal.js-3.6.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
+          { src: '../reveal.js-3.6.0/plugin/math/math.js', 
+            condition: function() { return true; },
+            mathjax: '../reveal.js-3.6.0/js/MathJax.js'
+           },
+          { src: '../reveal.js-3.6.0/plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
+          { src: '../reveal.js-3.6.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
+          { src: '../reveal.js-3.6.0/plugin/highlight/highlight.js', async: true, condition: function() { return !!document.querySelector( 'pre code' ); }, callback: function() { hljs.initHighlightingOnLoad(); } },
+          { src: '../reveal.js-3.6.0/plugin/zoom-js/zoom.js', async: true },
+          { src: '../reveal.js-3.6.0/plugin/notes/notes.js', async: true },
+          // Chart.min.js
+          { src: '../reveal.js-3.6.0/plugin/chart/Chart.min.js'},
+          // the plugin
+          { src: '../reveal.js-3.6.0/plugin/chart/csv2chart.js'},
+          { src: '../reveal.js-3.6.0/plugin/svginline/es6-promise.auto.js', async: false },
+          { src: '../reveal.js-3.6.0/plugin/svginline/data-src-svg.js', async: false }
+        ]
+      });
+
+    </script>
+
+  </body>
+</html>

Operation	Sym	Meaning
Selection	$\sigma$	Select a subset of the input rows
Projection	$\pi$	Delete unwanted columns
Cross-product	$\times$	Combine two relations
Set-difference	$-$	Tuples in Rel 1, but not Rel 2
Union	$\cup$	Tuples either in Rel 1 or in Rel 2
Intersection	$\cap$	Tuples in both Rel 1 and Rel 2
Join	$\bowtie$	Pairs of tuples matching a specified condition
Division	$/$	"Inverse" of cross-product
Sort
Limit
Rule	Notes
$\sigma_{C_1\wedge C_2}(R) \equiv \sigma_{C_1}(\sigma_{C_2}(R))$
$\sigma_{C_1\vee C_2}(R) \equiv \sigma_{C_1}(R) \cup \sigma_{C_2}(R)$	Only true for set, not bag union
$\sigma_C(R \times S) \equiv R \bowtie_C S$
$\sigma_C(R \times S) \equiv \sigma_C(R) \times S$	If $C$ references only $R$'s attributes, also works for joins
$\pi_{A}(\pi_{A \cup B}(R)) \equiv \pi_{A}(R)$
$\sigma_C(\pi_{A}(R)) \equiv \pi_A(\sigma_C(R))$	If $A$ contains all of the attributes referenced by $C$
$\pi_{A\cup B}(R\times S) \equiv \pi_A(R) \times \pi_B(S)$	Where $A$ (resp., $B$) contains attributes in $R$ (resp., $S$)
$R \times (S \times T) \equiv (R \times S) \times T$	Also works for joins
$R \times S \equiv S \times R$	Also works for joins
$R \cup (S \cup T) \equiv (R \cup S) \cup T$	Also works for intersection and bag-union
$R \cup S \equiv S \cup R$	Also works for intersections and bag-union
$\sigma_{C}(R \cup S) \equiv \sigma_{C}(R) \cup \sigma_{C}(S)$	Also works for intersections and bag-union
$\pi_{A}(R \cup S) \equiv \pi_{A}(R) \cup \pi_{A}(S)$	Also works for intersections and bag-union
$\sigma_{C}(\gamma_{A, AGG}(R)) \equiv \gamma_{A, AGG}(\sigma_{C}(R))$	If $A$ contains all of the attributes referenced by $C$
Operation	RA	IOs Added (#pages)	Memory (#tuples)
Table Scan	$R$	$\frac{\|R\|}{\mathcal P}$	$O(1)$
Projection	$\pi(R)$	$0$	$O(1)$
Selection	$\sigma(R)$	$0$	$O(1)$
Union	$R \uplus S$	$0$	$O(1)$
Sort (In-Mem)	$\tau(R)$	$0$	$O(\|R\|)$
Sort (On-Disk)	$\tau(R)$	$\frac{2 \cdot \lfloor log_{\mathcal B}(\|R\|) \rfloor}{\mathcal P}$	$O(\mathcal B)$
(B+Tree) Index Scan	$Index(R, c)$	$\log_{\mathcal I}(\|R\|) + \frac{\|\sigma_c(R)\|}{\mathcal P}$	$O(1)$
(Hash) Index Scan	$Index(R, c)$	$1$	$O(1)$
Operation	RA	IOs Added (#pages)	Memory (#tuples)
Nested Loop Join (Buffer $S$ in mem)	$R \times S$	$0$	$O(\|S\|)$
Nested Loop Join (Buffer $S$ on disk)	$R \times_{disk} S$	$(1+ \|R\|) \cdot \frac{\|S\|}{\mathcal P}$	$O(1)$
1-Pass Hash Join	$R \bowtie_{1PH, c} S$	$0$	$O(\|S\|)$
2-Pass Hash Join	$R \bowtie_{2PH, c} S$	$\frac{2\|R\| + 2\|S\|}{\mathcal P}$	$O(1)$
Sort-Merge Join	$R \bowtie_{SM, c} S$	[Sort]	[Sort]
(Tree) Index NLJ	$R \bowtie_{INL, c}$	$\|R\| \cdot (\log_{\mathcal I}(\|S\|) + \frac{\|\sigma_c(S)\|}{\mathcal P})$	$O(1)$
(Hash) Index NLJ	$R \bowtie_{INL, c}$	$\|R\| \cdot 1$	$O(1)$
(In-Mem) Aggregate	$\gamma_A(R)$	$0$	$adom(A)$
(Sort/Merge) Aggregate	$\gamma_A(R)$	[Sort]	[Sort]
Operator	RA	Estimated Size
Table	$R$	$\|R\|$
Projection	$\pi(Q)$	$\|Q\|$
Union	$Q_1 \uplus Q_2$	$\|Q_1\| + \|Q_2\|$
Cross Product	$Q_1 \times Q_2$	$\|Q_1\| \times \|Q_2\|$
Sort	$\tau(Q)$	$\|Q\|$
Limit	$\texttt{LIMIT}_N(Q)$	$N$
Selection	$\sigma_c(Q)$	$\|Q\| \times \texttt{SEL}(c, Q)$
Join	$Q_1 \bowtie_c Q_2$	$\|Q_1\| \times \|Q_2\| \times \texttt{SEL}(c, Q_1\times Q_2)$
Distinct	$\delta_A(Q)$	$\texttt{UNIQ}(A, Q)$
Aggregate	$\gamma_{A, B \leftarrow \Sigma}(Q)$	$\texttt{UNIQ}(A, Q)$