diff --git a/src/talks/2023-02-20-CornellDB.erb b/src/talks/2023-02-20-CornellDB.erb
new file mode 100644
index 00000000..2f03cd71
--- /dev/null
+++ b/src/talks/2023-02-20-CornellDB.erb
@@ -0,0 +1,510 @@
+---
+template: templates/talk_slides_v1.erb
+title: "Microkernel Notebooks"
+---
+
+
+ μKernel Notebooks
+
+ Oliver Kennedy
+ University at Buffalo
+
+
+
+
+
+
+
+
+
+
+
Joel Grus: "For beginners, with dozens of cells and more complex code [the ability to run code snippets out of order] is utterly confusing."
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ https://openclipart.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ... or worse ...
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Provenance
+
+ - Automatically re-run dependent cells.
+ - "Jump" to the cell that created an artifact.
+ - Track problems.
+ - ... and more
+
+
+
+
+ Why are you getting my hopes up?
+
+
+
+
+
+
+ The Vizier Notebook
+
+
+
+ ... work in progress
+
+ - Dependency Analysis
+ - Scheduling Cell Execution
+ - Python Startup Costs
+ - Migrating state between kernels
+
+
+
+
+
+def social_link(base, provider = "facebook.com"):
+ if base is None:
+ return None
+ if base.startswith("http://"):
+ base = base.replace("http://", "https://")
+ if base.startswith("https://"):
+ return base
+ if base.startswith(provider) or base.startswith(f"www.{provider}"):
+ return "https://"+base
+ return f"https://{provider}/"+base
+
+vizierdb.export_module(social_link)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+vizierdb.export_module(social_link)
+
+
+
+
+ Explicit Exports
+
+
+ - Avoid serializing state unnecessarily
+ - Mitigate explicit dependency analysis
+
+
+ ... but they're annoying
+
+
+
+
+ c = 19
+
+
+
+ b = 23
+
+
+
+ a = b + c
+
+
+
+
+ Writes: c
+
+
+
+ Writes: b
+
+
+
+ Reads: b, c; Writes: c
+
+
+
+
+
+ Python's scoping rules are a mess.
+
+
+ x = 1
+ def foo():
+ x = 2
+ def bar():
+ print(x)
+ return bar
+ x += 10
+ baz = foo()
+ baz() # What is printed?
+
+
+ ... fortunately we only care about cross-cell dependencies (for the most part).
+
+
+
+
+import urrlib.request as r
+with r.urlopen("https://not.sus.com/code.py") as response:
+ eval( response.read() )
+
+
+
+
+
+ ???
+ ... fortunately eval isn't a major part of notebook use.
+
+
+
+
+import pandas as pd
+pd.load_csv("myfile.csv")
+
+
+
+
+
+ maybe safe???
+ ... fortunately libraries are usually good at abstracting.
+
+
+
+ Idea: Optimistic Concurrency Control.
+
+ (Work in progress)
+
+
+
+ ... work in progress
+
+ - Dependency Analysis
+ - Scheduling Cell Execution
+ - Python Startup Costs
+ - Migrating state between kernels
+
+
+
+
+
+
+
+ System |
+ Dependencies |
+ Execution |
+ Parallelism |
+
+
+
+ Notebook |
+ Unknown |
+ Manual |
+ None |
+
+
+
+ Workflows |
+ Fully Known |
+ DAG |
+
+
+
+ Vizier |
+ Bounded+Trace |
+ ??? |
+
+
+
+
+
+ State
+
+
+
+
+ How do we know when it is safe to reuse a result?
+ How do we know what is safe to parallelize?
+
+
+
+
+
+
+ df = pd.load_csv("foo.csv")
+
+
+
+ - The cell runs
+ - The object returned by pd.load_csv is serialized and stored in a persistent store. (Artifact)
+ - The persistent store assigns the serialized object an identifier. (Version)
+ - The state is updated with a mapping from symbol df to the identifier. (Variable)
+
+
+
+
+
+ State
+
+ $$\Sigma \rightarrow \mathbb N \cup \{ \emptyset \}$$
+ (variable → version)
+ (e.g., $\{ retail \rightarrow 937, markets \rightarrow 252 \}$)
+
+
+
+ Cell History
+
+
+ - Last Read
+ - $2^{\Sigma \times \mathbb N}$ (e.g., $\{ retail \rightarrow 937 \}$)
+
+ - Last Write
+ - $2^{\Sigma \times (\mathbb N \cup \{\emptyset\})}$ (e.g., $\{ farmstands \rightarrow 939 \}$)
+
+
+
+
+ Execution
+
+
+ - Current State
+ - $\{ retail \rightarrow 946, markets \rightarrow 252 \}$
+
+
+
- Last Read
+ - $\{ retail \rightarrow 937 \}$
+
+
+
+
+
+ Execution
+
+
+ - Current State
+ - $\{ retail \rightarrow 937, markets \rightarrow 252 \}$
+
+ - Last Read
+ - $\{ retail \rightarrow 937 \}$
+
+
+
- Last Write
+ - $\{ farmstands \rightarrow 939 \}$
+
+
+
+
- Next State
+ - $\{ retail \rightarrow 937, markets \rightarrow 252, farmstands \rightarrow 939 \}$
+
+
+
+
+
+ Cell Dependencies
+
+
+ - Could Read
+ - $2^{\Sigma}$ (e.g., $\{retail\}$ or $everything$)
+
+ - Could Write
+ - $2^{\Sigma}$
+
+
+
+
+ Execution
+
+
+ - Current State
+ - $\{ markets \rightarrow 252 \}$
+
+
+
- Could Write
+ - $\{ retail \}$
+
+
+
+
- Next State
+ - $\{ retail \rightarrow ?, markets \rightarrow 252 \}$
+
+
+
+
+
+ Execution
+
+
+ - Current State
+ - $\{ retail \rightarrow ?, markets \rightarrow 252 \}$
+
+
+
- Last Read
+ - $\{ retail \rightarrow 937 \}$
+ - (i.e., State Unknown)
+
+
+
+
+
+ Scheduling
+
+ is the cell...
+
+ - Guaranteed Reusable
+ - Stale
+ - Stale and Runnable
+ - Unknown
+
+
+
+
+ Future/Work in progress...
+
+
+ - Migrating state efficiently
+ - Re-using python kernels
+ - Minimizing checkpointing
+ - Instrumenting python
+
+
+
+
+
+
+
+
+ https://vizierdb.info
+
+
+ Mike Brachmann, Boris Glavic, Nachiket Deo, Stefan Muller, Juliana Freire, Heiko Mueller, Sonia Castello, Munaf Arshad Qazi, William Spoth, Poonam Kumari, Soham Patel, and more...
+