From 451d226376ea97f7509ec530663566171094c9cd Mon Sep 17 00:00:00 2001 From: Oliver Date: Wed, 10 Apr 2024 00:41:15 -0400 Subject: [PATCH] WIP on the talk --- src/talks/2024-04-12-UIC.erb | 301 +++++++++++++++++- .../graphics/2024-04-12/Dependencies.svg | 225 +++++++++++++ src/talks/ubodin_v1.css | 19 ++ 3 files changed, 542 insertions(+), 3 deletions(-) create mode 100644 src/talks/graphics/2024-04-12/Dependencies.svg diff --git a/src/talks/2024-04-12-UIC.erb b/src/talks/2024-04-12-UIC.erb index b358cfd4..9b8ea40b 100644 --- a/src/talks/2024-04-12-UIC.erb +++ b/src/talks/2024-04-12-UIC.erb @@ -3,6 +3,90 @@ template: templates/talk_slides_v1.erb title: Principled management of notebook state in Vizier --- + + +<% +$cells = [] +def notebook() + $cells = [] + ret = "" + ret += "
" + yield + ret += $cells.join("") + ret += "
" + return ret +end + +def nbdiv(body, varargs={}) + hide = varargs.fetch(:hide, nil) + show = varargs.fetch(:show, nil) + highlight = varargs.fetch(:highlight, nil) + css_class = varargs.fetch(:css, "nbcell") + extra_attrs = "" + unless show.nil? + css_class += " fragment" + extra_attrs += " data-fragment-index='#{show}'" + end + unless hide.nil? + css_class += " fragment fade-out" + extra_attrs += " data-fragment-index='#{hide}'" + end + unless highlight.nil? + css_class += " fragment highlight-blue" + extra_attrs += " data-fragment-index='#{highlight}'" + end + return "
#{body}
" +end + +def nbcell(text, varargs={}) + lang = varargs.fetch(:lang, "python") + idx = varargs.fetch(:idx, nil) + output = varargs.fetch(:output, nil) + idx = $cells.size + 1 if idx.nil? + cmd = "[#{idx}]
#{text}
" + unless output.nil? + cmd += "
#{output}
" + end + $cells += [nbdiv(cmd, varargs)] +end + +def nbnote(note, varargs={}) + varargs[:css_class] = "nbnote" + $cells += [nbdiv(note, varargs)] +end +%> +

<%= title %>

@@ -28,11 +112,222 @@ title: Principled management of notebook state in Vizier
- - +

High-Level Challenges

+ + +

+ So why does everyone use this confusing state model? +

+
+ +
+

High-Level Challenges

+ +
+ +
+ <%= + notebook() do + nbcell("x = 3", idx: 1) + nbcell("y = x + 2", idx: 2) + nbcell("x = 4", idx: 3) + nbcell("print(y)", idx: 4, output: "5") + end + %> +
+
+ <%= + notebook() do + nbcell("x = 3", idx: 1, highlight: 1) + nbcell("y = x + 102", idx: 5) + nbcell("x = 4", idx: 3, highlight: 1) + nbcell("print(y)", idx: 4, output: "5", highlight: 2) + end + %> +
+ +
+

Dependencies

+ +

Reads: x

+ + <%= + notebook() do + nbcell("y = x + 2", idx: 2) + end + %> + +

Writes: y

+ +

+ Question: Which variables does the cell read/write? +

+
+ <%= + notebook() do + nbnote("$$\\{\\;\\;\\}$$", show: 1) + nbcell("x = 3", idx: 1) + nbnote("$$\\{\\;x \\rightarrow \\textbf{@1}\\;\\}$$", show: 2) + nbcell("y = x + 2", idx: 2) + nbnote("$$\\{\\;x \\rightarrow \\textbf{@1},\\;y \\rightarrow \\textbf{@2}\\;\\}$$", show: 3) + nbcell("x = 4", idx: 3) + nbnote("$$\\{\\;x \\rightarrow \\textbf{@3},\\;y \\rightarrow \\textbf{@2}\\;\\}$$", show: 4) + nbcell("print(y)", idx: 4) + end + %> +
+ +
+

Interpreter State: $\{\;x \rightarrow \textbf{@3},\;y \rightarrow \textbf{@2}\;\}$

+ +

... but Cell 2 read $x \rightarrow \textbf{@1}$

+ +

+ Question: How do we get the interpreter back to a known state? +

+
+ +
+ <%= + notebook() do + nbnote("$$\\{\\;x \\rightarrow \\textbf{@1}\\;\\}$$") + nbcell("y = x + 102", idx: 5) + nbnote("$$\\{\\;x \\rightarrow \\textbf{@1},\\;y \\rightarrow \\color{blue}{\\textbf{@4}}\\;\\}$$", show:1) + nbcell("x = 4", idx: 3, highlight: 2) + nbnote("$$\\{\\;x \\rightarrow \\textbf{@3},\\;y \\rightarrow \\color{blue}{\\textbf{@4}}\\;\\}$$", show:3) + nbcell("print(y)", idx: 4, highlight: 4) + end + %> + +

+ Question A cell is stale if a value it read last time changed. +

+
+ +
+

Vizier Demo

+
+ +
+

Workflow-style execution

+ +
    +
  1. Microkernel Notebooks
  2. +
  3. Static Analysis
  4. +
  5. Approximate Dependencies
  6. +
  7. Inter-Kernel Interop [Work In Progress]
  8. +
+
+ + +
+ If we have to have the ability to recover a state, does it have to be the same interpreter? +
+ +
+ Not same interpreter means: + - No worrying about crashes + - Portability / Resume at any point + - Parallel execution +
+ +
+ Outline the data model: + + - Interpreter + - "backend 'state database'" + - Lazy-loading interpreter state +
+ +
+ If we have to have the ability to recover a state, does it have to be the same interpreter *version*? +
+ +
+ If we have to have the ability to recover a state, does it have to be the same language? +
+ +
+ Cool things we can do if we lift the "state lives in the kernel" model + + - Deserialize program state into another interpreter + - Graphical widgets for common tasks (data loading) + - 1-3 slides on spreadsheets +
+ + +
+ How to figure out dependencies + + 1. Run the code (exact, after the fact) + 2. Static analysis (imprecise, incomplete) +
+ +
+ Refer to Aditya's project w.r.t. static analysis +
+ + +
+ How to figure out dependencies + + 1. Run the code (exact, after the fact) + 2. Static analysis (imprecise, incomplete) + 3. Both! +
+ +
+ Idea: use static analysis to create a mask. + + Cell state model: + - stable + - unknown + - stale + - runnable (revisit parallelism) +
+ +
+ Preliminary results: TAPP +
+ + + +
+ State model. Review: + - State needs to come *out* of the cell that created it + - State needs to go *into* the cell that is about to consume it +
+ +
+ Naive approach: Pickle + + ... but pickle doesn't allow interop + ... but pickle doesn't always work (e.g., for 'File' objects) +
+ +
+ Interop: Define standards + + - Primitive Values (int, float, date, etc...) + - Collection Types (map, list, etc...) + - Libraries + - Function [Challenge: Chained Dependencies] + - Dataframe/Series [Challenge: These are BIG] +
+ +
+ +
+ + +<%#
@@ -41,4 +336,4 @@ title: Principled management of notebook state in Vizier

Mike Brachmann, Boris Glavic, Nachiket Deo, Juliana Freire, Heiko Mueller, Sonia Castello, Munaf Arshad Qazi, William Spoth, Poonam Kumari, Soham Patel, and more...

- + %> diff --git a/src/talks/graphics/2024-04-12/Dependencies.svg b/src/talks/graphics/2024-04-12/Dependencies.svg new file mode 100644 index 00000000..f30a050b --- /dev/null +++ b/src/talks/graphics/2024-04-12/Dependencies.svg @@ -0,0 +1,225 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + + Python state is mutable + + + + + 2 + Dependency tracking is hard + + + + diff --git a/src/talks/ubodin_v1.css b/src/talks/ubodin_v1.css index 10b0fdde..a87b7e59 100644 --- a/src/talks/ubodin_v1.css +++ b/src/talks/ubodin_v1.css @@ -406,6 +406,25 @@ body { .reveal .slides section .fragment.red-shadow-current.current-fragment { box-shadow: 0px 0px 12px red; } +.reveal .slides section .takeaway +{ + border-radius: 8px; + border: solid 2px black; + background-color: #ccd4ff; + color: #081669; + padding: 10px; + font-weight: bold; + font-size: 80%; + margin-top: 50px; +} +.reveal .slides section .takeaway b +{ + font-weight: bolder; + color: black; +} + + + /********************************************* * CUSTOM TAGS *********************************************/