slides
This commit is contained in:
parent
d0d01bef23
commit
035e606ffb
|
@ -124,10 +124,11 @@ schedule:
|
|||
topic: "Distributed Commit"
|
||||
materials:
|
||||
slides: slide/2021-04-22-ParallelUpdates.html
|
||||
- date: "Apr. 26"
|
||||
due: "Checkpoint 3"
|
||||
- date: "Apr. 27"
|
||||
due: "Checkpoint 3"
|
||||
topic: "Indexing Review + Checkpoint 4"
|
||||
materials:
|
||||
slides: slide/2021-04-27-Checkpoint4.html
|
||||
- date: "Apr. 29"
|
||||
topic: "TBD"
|
||||
- date: "May 4"
|
||||
|
|
243
src/teaching/cse-562/2021sp/slide/2021-04-27-Checkpoint4.html
Normal file
243
src/teaching/cse-562/2021sp/slide/2021-04-27-Checkpoint4.html
Normal file
|
@ -0,0 +1,243 @@
|
|||
---
|
||||
template: templates/cse4562_2021_slides.erb
|
||||
title: "Checkpoint 4"
|
||||
date: April, 2021
|
||||
textbook: "Ch. 8.3-8.4, 14.1-14.2, 14.4"
|
||||
---
|
||||
|
||||
<section>
|
||||
|
||||
<section>
|
||||
<h2>Checkpoint 4</h2>
|
||||
|
||||
<p>Just like Checkpoint 3, but now...</p>
|
||||
|
||||
<ul>
|
||||
<li class="fragment">Tighter Bounds</li>
|
||||
<li class="fragment">More Start-Up Time</li>
|
||||
<li class="fragment">Hints About the Table</li>
|
||||
</ul>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
|
||||
<section>
|
||||
<h3>Primary Key</h3>
|
||||
|
||||
<pre><code class="sql">
|
||||
CREATE TABLE CUSTOMER(
|
||||
CUSTKEY INT,
|
||||
NAME STRING,
|
||||
ADDRESS STRING,
|
||||
NATIONKEY INT,
|
||||
PHONE STRING,
|
||||
ACCTBAL FLOAT,
|
||||
MKTSEGMENT STRING,
|
||||
COMMENT STRING
|
||||
) USING csv OPTIONS(
|
||||
path 'data/CUSTOMER.data',
|
||||
delimiter = '|',
|
||||
primary_key = 'custkey'
|
||||
)
|
||||
</code></pre>
|
||||
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h3>Primary Key</h3>
|
||||
<pre><code class="sql">
|
||||
primary_key = 'custkey',
|
||||
</code></pre>
|
||||
or
|
||||
<pre><code class="sql">
|
||||
primary_key = 'orderkey,lineitem',
|
||||
</code></pre>
|
||||
|
||||
<p>comma-separated list describing the primary key of the table</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<dl>
|
||||
<div class="fragment highlight-blue" data-fragment-index="5">
|
||||
<dt>The Reference Implementation</dt>
|
||||
<dd class="fragment" data-fragment-index="1">In-Memory Tables</dd>
|
||||
<dd class="fragment" data-fragment-index="2">Primary Key Index</dd>
|
||||
<dd class="fragment" data-fragment-index="3">Index Scans</dd>
|
||||
</div>
|
||||
|
||||
<div class="fragment highlight-grey" data-fragment-index="5">
|
||||
<dd class="fragment" data-fragment-index="4">Index-Nested Loop Join</dd>
|
||||
<dt class="fragment" data-fragment-index="4">Ideas From Past Submissions</dt>
|
||||
<dd class="fragment" data-fragment-index="4">Materialized Views</dd>
|
||||
<dd class="fragment" data-fragment-index="4">Gather Statistics</dd>
|
||||
<dd class="fragment" data-fragment-index="4">Secondary Indexes</dd>
|
||||
</div>
|
||||
</dl>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<section>
|
||||
<h3>In-Memory Tables</h3>
|
||||
|
||||
<p>Time to scan SF 0.1 LINEITEM</p>
|
||||
<table class="fragment">
|
||||
<tr><th>Source</th><th>Time</th></tr>
|
||||
<tr class="fragment">
|
||||
<td><b>CSV on NVME SSD</b></td>
|
||||
<td>0.88745s</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b> </b></td>
|
||||
<td> </td>
|
||||
</tr>
|
||||
</table>
|
||||
<p> </p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h3>What does that 1s include?</h3>
|
||||
<ul>
|
||||
<li class="fragment">Disk to Ram</li>
|
||||
<li class="fragment">Buffering in the OS</li>
|
||||
<li class="fragment">IPC</li>
|
||||
<li class="fragment">Split on |</li>
|
||||
<li class="fragment">Parse Int, Float, Date</li>
|
||||
<li class="fragment">Iterate Over Each Tuple</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h3>In-Memory Tables</h3>
|
||||
|
||||
<p>Time to scan SF 0.1 LINEITEM</p>
|
||||
<table>
|
||||
<tr><th>Source</th><th>Time</th></tr>
|
||||
<tr>
|
||||
<td><b>CSV on NVME SSD</b></td>
|
||||
<td>0.88745s</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>IndexedSeq[InternalRow]</b></td>
|
||||
<td class="fragment">0.018s</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p class="fragment">~30x speedup</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<p><b>Takeaway:</b> Read data in at <tt>CREATE TABLE</tt></p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h3>Compiler</h3>
|
||||
|
||||
<pre><code class="scala">
|
||||
Table(...)
|
||||
</code></pre>
|
||||
<p>Return an iterator over the preloaded table.</p>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<section>
|
||||
<h3>Primary Key Index</h3>
|
||||
|
||||
<p>Time to filter SF 0.1 LINEITEM for one orderkey</p>
|
||||
<table>
|
||||
<tr><th>Source</th><th>Time</th></tr>
|
||||
<tr>
|
||||
<td><b>CSV on NVME SSD</b></td>
|
||||
<td>0.9196s</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>IndexedSeq[InternalRow]</b></td>
|
||||
<td>0.0624s</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b> </b></td>
|
||||
<td> </td>
|
||||
</tr>
|
||||
</table>
|
||||
<p> </p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h3>What does that 62ms include?</h3>
|
||||
<ul>
|
||||
<li class="fragment">EqualTo(...).eval(...) for each row</li>
|
||||
<li class="fragment">Iterate Over Each Tuple</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h3>Primary Key Index</h3>
|
||||
|
||||
<p>Time to filter SF 0.1 LINEITEM for one orderkey</p>
|
||||
<table>
|
||||
<tr><th>Source</th><th>Time</th></tr>
|
||||
<tr>
|
||||
<td><b>CSV on NVME SSD</b></td>
|
||||
<td>0.9196s</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>IndexedSeq[InternalRow]</b></td>
|
||||
<td>0.0624s</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>Sorted IndexedSeq[InternalRow] + Bin Search</b></td>
|
||||
<td class="fragment">0.0008s</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p class="fragment">~80x speedup</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<p><b>Takeaway: </b> Sort on primary key and binary search.</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h3>Compiler</h3>
|
||||
<pre><code class="scala">
|
||||
Filter(expression, Table(...))
|
||||
</code></pre>
|
||||
<p class="fragment">If <tt>expression</tt> is a ...</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<p>If <tt>expression</tt> is a ...</p>
|
||||
<dl>
|
||||
<dt class="fragment"><tt>EqualTo</tt> between the Table key and a constant</dt>
|
||||
<dd class="fragment">Binary search for the key!</dd>
|
||||
<dt class="fragment"><tt>[Greater|Less]Than[OrEquals]</tt> between the Table key and a constant</dt>
|
||||
<dd class="fragment">Binary search for the lower/upper bound</dd>
|
||||
<dd class="fragment">Greater and Lower</dd>
|
||||
<dd class="fragment">Binary search for the lower and upper bound</dd>
|
||||
<dd class="fragment">One of the above and more</dd>
|
||||
<dd class="fragment">Binary search + Filter the rest</dd>
|
||||
</dl>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<p>But TPC-H doesn't have filters on keys...</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h3>Also Index</h3>
|
||||
<pre><code class="sql">
|
||||
USING csv OPTIONS(
|
||||
path '../TPCH/LINEITEM.csv',
|
||||
delimiter = '|',
|
||||
primary_key = 'orderkey,linenumber',
|
||||
tree_index = 'shipdate',
|
||||
hash_index = 'linestatus|shipmode'
|
||||
)
|
||||
</code></pre>
|
||||
<p><tt>tree_index</tt> and <tt>hash_index</tt> are |-separated lists of ,-separated indexes.</p>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<b>Questions?</b>
|
||||
</section>
|
Loading…
Reference in a new issue