Merge branch 'master' of gram.cse.buffalo.edu:ODIn/Website
This commit is contained in:
commit
bd418203f4
510
src/talks/2023-02-20-CornellDB.erb
Normal file
510
src/talks/2023-02-20-CornellDB.erb
Normal file
|
@ -0,0 +1,510 @@
|
||||||
|
---
|
||||||
|
template: templates/talk_slides_v1.erb
|
||||||
|
title: "Microkernel Notebooks"
|
||||||
|
---
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h2>μKernel Notebooks</h2>
|
||||||
|
|
||||||
|
<h4>Oliver Kennedy</h4>
|
||||||
|
<h5>University at Buffalo</h5>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<svg data-src="graphics/2022-06-20/NotebookOverview.svg" height="400px" style="margin-left: -100px"/>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
|
||||||
|
<div style="display: inline-block; width: 45%;">
|
||||||
|
<img src="graphics/2022-06-20/Pimentel.png" height="400px">
|
||||||
|
<p style="font-size: 70%;"><a href="https://ieeexplore.ieee.org/document/8816763">Pimentel et al</a>: "4.03% of notebooks on github are reproducible"</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style="display: inline-block; width: 45%;" class="fragment">
|
||||||
|
<img src="graphics/2022-06-20/Grus.png">
|
||||||
|
<p style="font-size: 70%;"><a href="https://www.youtube.com/watch?v=7jiPeIFXb6U">Joel Grus</a>: "For beginners, with dozens of cells and more complex code [the ability to run code snippets out of order] is utterly confusing."</p>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<svg data-src="graphics/2022-06-20/Checkpointing.svg" width="800px"/>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3><a href="https://github.com/stitchfix/nodebook">Nodebook</a></h3>
|
||||||
|
<img src="graphics/2022-06-20/Nodebook.png" height="300px">
|
||||||
|
|
||||||
|
<attribution><a href="https://github.com/stitchfix/nodebook">https://github.com/stitchfix/nodebook</a></attribution>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<svg data-src="graphics/2022-06-20/MonokernelCheckpoints.svg" height="400px" />
|
||||||
|
<attribution><a href="https://openclipart.org">https://openclipart.org</a></attribution>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-06-20/NoCheckpointing.png" height="400px">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p>A modest proposal...</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-06-20/MicrokernelCheckpoints.svg" height="400px">
|
||||||
|
<attribution>https://openclipart.com</attribution>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p>So now...</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-06-20/MicrokernelPyV2Checkpoints.svg" height="400px">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-06-20/MicrokernelPyScalaCheckpoints.svg" height="400px">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p>and...</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<svg data-src="graphics/2022-06-20/Parallelism.svg" width="800px"/>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p>and...</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-10-07/kernel-restarting.png">
|
||||||
|
<p class="fragment">... or worse ...</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-10-07/kernel-failed.png">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p>and...</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-10-07/inspector.png">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p>and...</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-10-07/dep_graph.svg" height="400px">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>Provenance</h3>
|
||||||
|
<ul>
|
||||||
|
<li>Automatically re-run dependent cells.</li>
|
||||||
|
<li>"Jump" to the cell that created an artifact.</li>
|
||||||
|
<li>Track problems.</li>
|
||||||
|
<li>... and more</li>
|
||||||
|
</ul>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p>Why are you getting my hopes up?</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<a href="https://vizierdb.info">
|
||||||
|
<img src="graphics/2022-06-20/vizier.svg" height="200px">
|
||||||
|
</a>
|
||||||
|
<h3>The Vizier Notebook</h3>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>... work in progress</h3>
|
||||||
|
<ul>
|
||||||
|
<li>Dependency Analysis</li>
|
||||||
|
<li class="fragment highlight-grey" data-fragment-index="1">Scheduling Cell Execution</li>
|
||||||
|
<li class="fragment highlight-grey" data-fragment-index="1">Python Startup Costs</li>
|
||||||
|
<li class="fragment highlight-grey" data-fragment-index="1">Migrating state between kernels</li>
|
||||||
|
</ul>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<pre><code class="python">
|
||||||
|
def social_link(base, provider = "facebook.com"):
|
||||||
|
if base is None:
|
||||||
|
return None
|
||||||
|
if base.startswith("http://"):
|
||||||
|
base = base.replace("http://", "https://")
|
||||||
|
if base.startswith("https://"):
|
||||||
|
return base
|
||||||
|
if base.startswith(provider) or base.startswith(f"www.{provider}"):
|
||||||
|
return "https://"+base
|
||||||
|
return f"https://{provider}/"+base
|
||||||
|
|
||||||
|
vizierdb.export_module(social_link)
|
||||||
|
</code></pre>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<pre><code class="python">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
vizierdb.export_module(social_link)
|
||||||
|
</code></pre>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>Explicit Exports</h3>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li class="fragment highlight-grey" data-fragment-index="2">Avoid serializing state unnecessarily</li>
|
||||||
|
<li>Mitigate explicit dependency analysis</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p class="fragment" data-fragment-index="1">... but they're annoying</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<pre><code class="python">
|
||||||
|
c = 19
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<pre><code class="python">
|
||||||
|
b = 23
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<pre><code class="python">
|
||||||
|
a = b + c
|
||||||
|
</code></pre>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p><b>Writes: </b> c</p>
|
||||||
|
|
||||||
|
<hr>
|
||||||
|
|
||||||
|
<p><b>Writes: </b> b</p>
|
||||||
|
|
||||||
|
<hr>
|
||||||
|
|
||||||
|
<p><b>Reads: </b>b, c; <b>Writes: </b> c</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p style="font-size: 200px;">🧹</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p>Python's scoping rules are a mess.</p>
|
||||||
|
|
||||||
|
<pre class="fragment"><code class="python">
|
||||||
|
x = 1
|
||||||
|
def foo():
|
||||||
|
x = 2
|
||||||
|
def bar():
|
||||||
|
print(x)
|
||||||
|
return bar
|
||||||
|
x += 10
|
||||||
|
baz = foo()
|
||||||
|
baz() # What is printed?
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<p class="fragment">... fortunately we only care about cross-cell dependencies (for the most part).</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<pre><code class="python">
|
||||||
|
import urrlib.request as r
|
||||||
|
with r.urlopen("https://not.sus.com/code.py") as response:
|
||||||
|
eval( response.read() )
|
||||||
|
</code></pre>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p> </p>
|
||||||
|
<p>???</p>
|
||||||
|
<p class="fragment">... fortunately eval isn't a major part of notebook use.</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<pre><code class="python">
|
||||||
|
import pandas as pd
|
||||||
|
pd.load_csv("myfile.csv")
|
||||||
|
</code></pre>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p> </p>
|
||||||
|
<p>maybe safe???</p>
|
||||||
|
<p class="fragment">... fortunately libraries are usually good at abstracting.</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p><b>Idea:</b> Optimistic Concurrency Control.</p>
|
||||||
|
|
||||||
|
<p class="fragment">(Work in progress)</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>... work in progress</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="fragment highlight-grey" data-fragment-index="1">Dependency Analysis</li>
|
||||||
|
<li>Scheduling Cell Execution</li>
|
||||||
|
<li class="fragment highlight-grey" data-fragment-index="1">Python Startup Costs</li>
|
||||||
|
<li class="fragment highlight-grey" data-fragment-index="1">Migrating state between kernels</li>
|
||||||
|
</ul>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>System</th>
|
||||||
|
<th>Dependencies</th>
|
||||||
|
<th>Execution</th>
|
||||||
|
<th>Parallelism</th>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr class="fragment">
|
||||||
|
<td>Notebook</td>
|
||||||
|
<td>Unknown</td>
|
||||||
|
<td>Manual</td>
|
||||||
|
<td>None</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr class="fragment">
|
||||||
|
<td>Workflows</td>
|
||||||
|
<td>Fully Known</td>
|
||||||
|
<td colspan="2">DAG</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr class="fragment">
|
||||||
|
<td>Vizier</td>
|
||||||
|
<td class="fragment">Bounded+Trace</td>
|
||||||
|
<td colspan="2" class="fragment">???</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>State</h3>
|
||||||
|
<img src="graphics/2022-06-20/MicrokernelCheckpoints.svg" height="400px">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<p>How do we know when it is safe to reuse a result?</p>
|
||||||
|
<p>How do we know what is safe to parallelize?</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h2>State?</h2>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<pre><code class="python">
|
||||||
|
df = pd.load_csv("foo.csv")
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<ol>
|
||||||
|
<li class="fragment">The cell runs</li>
|
||||||
|
<li class="fragment">The object returned by <tt>pd.load_csv</tt> is serialized and stored in a persistent store. <span class="fragment" style="font-weight: bold;">(Artifact)</span></li>
|
||||||
|
<li class="fragment">The persistent store assigns the serialized object an identifier. <span class="fragment" style="font-weight: bold;">(Version)</span></li>
|
||||||
|
<li class="fragment">The state is updated with a mapping from symbol <tt>df</tt> to the identifier. <span class="fragment" style="font-weight: bold;">(Variable)</span></li>
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>State</h3>
|
||||||
|
|
||||||
|
$$\Sigma \rightarrow \mathbb N \cup \{ \emptyset \}$$
|
||||||
|
<p>(variable → version)</p>
|
||||||
|
<p class="fragment">(e.g., $\{ retail \rightarrow 937, markets \rightarrow 252 \}$)</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>Cell History</h3>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt>Last Read</dt>
|
||||||
|
<dd>$2^{\Sigma \times \mathbb N}$ (e.g., $\{ retail \rightarrow 937 \}$)</dd>
|
||||||
|
|
||||||
|
<dt>Last Write</dt>
|
||||||
|
<dd>$2^{\Sigma \times (\mathbb N \cup \{\emptyset\})}$ (e.g., $\{ farmstands \rightarrow 939 \}$)</dd>
|
||||||
|
</dl>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>Execution</h3>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt>Current State</dt>
|
||||||
|
<dd>$\{ retail \rightarrow 946, markets \rightarrow 252 \}$</dd>
|
||||||
|
|
||||||
|
<div class="fragment">
|
||||||
|
<dt>Last Read</dt>
|
||||||
|
<dd class="fragment highlight-red">$\{ retail \rightarrow 937 \}$</dd>
|
||||||
|
</div>
|
||||||
|
</dl>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>Execution</h3>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt>Current State</dt>
|
||||||
|
<dd>$\{ retail \rightarrow 937, markets \rightarrow 252 \}$</dd>
|
||||||
|
|
||||||
|
<dt>Last Read</dt>
|
||||||
|
<dd>$\{ retail \rightarrow 937 \}$</dd>
|
||||||
|
|
||||||
|
<div class="fragment">
|
||||||
|
<dt>Last Write</dt>
|
||||||
|
<dd>$\{ farmstands \rightarrow 939 \}$</dd>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="fragment">
|
||||||
|
<dt>Next State</dt>
|
||||||
|
<dd>$\{ retail \rightarrow 937, markets \rightarrow 252, farmstands \rightarrow 939 \}$</dd>
|
||||||
|
</div>
|
||||||
|
</dl>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>Cell Dependencies</h3>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt>Could Read</dt>
|
||||||
|
<dd>$2^{\Sigma}$ (e.g., $\{retail\}$ or $everything$)</dd>
|
||||||
|
|
||||||
|
<dt>Could Write</dt>
|
||||||
|
<dd>$2^{\Sigma}$</dd>
|
||||||
|
</dl>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>Execution</h3>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt>Current State</dt>
|
||||||
|
<dd>$\{ markets \rightarrow 252 \}$</dd>
|
||||||
|
|
||||||
|
<div class="fragment">
|
||||||
|
<dt>Could Write</dt>
|
||||||
|
<dd>$\{ retail \}$</dd>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="fragment">
|
||||||
|
<dt>Next State</dt>
|
||||||
|
<dd>$\{ retail \rightarrow ?, markets \rightarrow 252 \}$</dd>
|
||||||
|
</div>
|
||||||
|
</dl>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>Execution</h3>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt>Current State</dt>
|
||||||
|
<dd>$\{ retail \rightarrow ?, markets \rightarrow 252 \}$</dd>
|
||||||
|
|
||||||
|
<div class="fragment">
|
||||||
|
<dt>Last Read</dt>
|
||||||
|
<dd class="fragment highlight-blue">$\{ retail \rightarrow 937 \}$</dd>
|
||||||
|
<dd class="fragment">(i.e., State Unknown)</dd>
|
||||||
|
</div>
|
||||||
|
</dl>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>Scheduling</h3>
|
||||||
|
|
||||||
|
<p>is the cell...</p>
|
||||||
|
<ul>
|
||||||
|
<li>Guaranteed Reusable</li>
|
||||||
|
<li>Stale</li>
|
||||||
|
<li>Stale and Runnable</li>
|
||||||
|
<li>Unknown</li>
|
||||||
|
</ul>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h3>Future/Work in progress...</h3>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li class="fragment">Migrating state efficiently</li>
|
||||||
|
<li class="fragment">Re-using python kernels</li>
|
||||||
|
<li class="fragment">Minimizing checkpointing</li>
|
||||||
|
<li class="fragment">Instrumenting python</li>
|
||||||
|
</ul>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
<section>
|
||||||
|
<pre><code>
|
||||||
|
x = 3
|
||||||
|
</code></pre>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<pre><code>
|
||||||
|
from foo import x
|
||||||
|
</code></pre>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<pre><code>
|
||||||
|
x = pandas.read_csv("foo.csv")
|
||||||
|
</code></pre>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-06-20/arrow.png" height="300px">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-06-20/Vizier-System-Diag.svg" height="300px">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-06-20/Vizier-Polyglot.png" height="500px">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-06-20/Vizier-Load.png" height="500px">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-06-20/Vizier-Spreadsheet.png" height="500px">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<img src="graphics/2022-06-20/Vizier-New.png" height="400px">
|
||||||
|
</section>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<a href="https://vizierdb.info">
|
||||||
|
<img src="graphics/2022-06-20/vizier.svg" height="200px">
|
||||||
|
<p style="margin-top: -20px;">https://vizierdb.info</p>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
<p style="font-size: 65%"><b>Mike Brachmann, Boris Glavic, Nachiket Deo, Stefan Muller</b>, Juliana Freire, Heiko Mueller, Sonia Castello, Munaf Arshad Qazi, William Spoth, Poonam Kumari, Soham Patel, and more...</p>
|
||||||
|
</section>
|
Loading…
Reference in a new issue