pull/1/head
Oliver Kennedy 2019-05-03 02:59:22 -04:00
parent ce7deb1e68
commit 9b34d953cc
5 changed files with 887 additions and 0 deletions

View File

@ -61,4 +61,147 @@ def data_table(schema, data, params = {})
data.zip(row_args).map { |row, args| tag("tr", row.join, args) }.join("\n"),
params.fetch(:table_args, {})
)
end
class RATreeNode
def initialize(type, params, children = [])
@type = type
@params = params
@children = children
@self_width = 100
@self_height = 100
case @type
when :table then
@self_width = 40*params[:name].length
@self_height = 50
when :select, :join then
@self_width += 15*params[:pred].length
when :project then
@self_width += 15*params[:attrs].length
end
@height_above_children = 100
end
def subscript(x)
"<tspan style='font-size: 40%; vertical-align: sub;'>#{x}</tspan>"
end
def symbol
case @type
when :select then "<tspan style='font-size: 200%'>&nbsp;𝛔#{subscript @params[:pred]}</tspan>"
when :project then "<tspan style='font-size: 200%'> 𝛑#{subscript @params[:attrs]}</tspan>"
when :aggregate then "<tspan style='font-size: 200%'>#{subscript @params[:groupby] if @params.has_key? :groupby}𝛄#{subscript @params[:aggregates]}</tspan>"
when :join then "<tspan style='font-size: 400%'>⋈#{subscript @params[:pred]}</tspan>"
when :cross then "<tspan style='font-size: 400%'>⨉</tspan>"
when :diff then "<tspan style='font-size: 300%; font-weight: bold'>&nbsp-</tspan>"
when :union then "<tspan style='font-size: 400%'>⊎</tspan>"
when :table then "<tspan style='font-weight: bold; font-family: Courier, fixed-width; font-size: 150%'>#{@params[:name]}</tspan>"
else type.to_s
end
end
def height(config = {})
unless @height
if @children.nil?
@height = @self_height
else
@height = @children.map { |c| c.height(config) }.max + (@self_height + @height_above_children)
end
end
@height
end
def child_width(config = {})
return 0 if @children.nil?
unless @child_width
separator_x = 20
@child_width = @children.map { |c| c.width(config) }.sum + separator_x * (@children.size-1)
end
@child_width
end
def width(config = {})
unless @width
if @children.nil?
@width = @self_width
else
@width = [
child_width,
@self_width
].max
end
end
@width
end
def symbol_text(config)
symbol_x = width(config) / 2 - (@self_width / 2)
symbol_y = 0
debug = "#{config.fetch(:indent, "")}<rect x='#{symbol_x}' y='#{symbol_y}' width='#{@self_width}' height='#{@self_height}' style='fill: red'/>\n" if config.fetch(:debug, false)
"#{debug}#{config.fetch(:indent, "")}<text x='#{symbol_x}' y='#{symbol_y+@self_height}'>#{symbol}</text>\n"
end
def render(config = {})
return symbol_text(config) if @children.nil?
indent = config.fetch(:indent, "")
separator_x = 20
separator_y = @height_above_children
children_x = [0]
children_x = [(width(config) - child_width(config)) / 2] if width(config) > child_width(config)
(1..@children.length).each { |i| children_x[i] = children_x[i-1] + @children[i-1].width + separator_x }
children_y = separator_y + @self_height
child_blobs = @children.map.with_index do |c, i|
rendered = c.render(config.merge( indent: indent+" " ))
p rendered if config.fetch(:debug, false)
p children_x[i] if config.fetch(:debug, false)
"#{indent} <g transform='translate(#{children_x[i]}, #{children_y})'>\n#{rendered}</g>\n"
end
line_x = width(config) / 2
line_y = (@self_height) * 1.1
target_y = line_y + @height_above_children
child_lines = @children.map.with_index do |c, i|
target_x = (children_x[i] + children_x[i+1] - separator_x) / 2
"#{indent} <line x1='#{line_x}' y1='#{line_y}' x2='#{target_x}' y2='#{target_y}' stroke='black' stroke-width='4'/>\n"
end
symbol_text(config)+child_blobs.join+child_lines.join
end
end
def ra_table(name)
RATreeNode.new(:table, { name: name }, nil)
end
def ra_union(*children)
RATreeNode.new(:union, {}, children)
end
def ra_diff(*children)
RATreeNode.new(:diff, {}, children)
end
def ra_join(predicate, lhs, rhs)
RATreeNode.new(:table, { pred: predicate }, [lhs, rhs])
end
def ra_aggregate(groupby, aggregates, input)
RATreeNode.new(:aggregate, { groupby: groupby, aggregates: aggregates}, [input])
end
def ra_select(predicate, input)
RATreeNode.new(:select, { pred: predicate }, [input])
end
def ra_project(attrs, input)
attrs = attrs.map { |k, v| "#{k}#{v}"}.join("; ") if attrs.is_a? Hash
RATreeNode.new(:project, { attrs: attrs }, [input])
end
def relational_algebra(params = {})
indent = params.fetch(:indent, "")
ra = yield
scale = if ra.height > 500 then 500.0 / ra.height else 1 end
return (
"#{indent}<svg height='#{(ra.height+20)*scale}' width='#{(ra.width+20)*scale}'>\n"+
"#{indent}<g transform='scale(#{scale})'>"+
ra.render(params.merge( indent: indent+" " ))+
"#{indent}</g></svg>\n"
)
end

View File

@ -0,0 +1,326 @@
---
template: templates/cse4562_2019_slides.erb
title: Incomplete and Probabilistic Databases
date: May 1, 2019
textbook: "<a href='https://github.com/UBOdin/mimir/wiki/Concepts-CTables'>PDB Concepts and C-Tables</a>"
dependencies:
- lib/slide_utils.rb
---
<%
require "slide_utils.rb"
%>
<section>
<section>
<img src="graphics/2019-04-31-4or9.png" height="300px" />
</section>
<section>
<img src="graphics/2019-04-31-guacamole.png" class="stretch" />
<attribution><a href="https://www.anishathalye.com/2017/07/25/synthesizing-adversarial-examples/">https://www.anishathalye.com/2017/07/25/synthesizing-adversarial-examples/</a></attribution>
</section>
<section>
<img src="graphics/2019-04-31-catVSdog.jpg" class="stretch" />
<attribution><a href="https://www.pyimagesearch.com/pyimagesearch-gurus/?src=post-deep-learning-libs">Deep Learning Demystified</a></attribution>
</section>
<section>
<h3>What happens when you don't know your data precisely?</h3>
</section>
<section>
<pre><code>
SELECT * FROM Posts WHERE image_class = 'Cat';
</code></pre>
<pre class="fragment"><code>
SELECT COUNT(*) FROM Posts WHERE image_class = 'Cat';
</code></pre>
<pre class="fragment"><code>
SELECT user_id FROM Posts
WHERE image_class = 'Cat'
GROUP BY user_id HAVING COUNT(*) > 10;
</code></pre>
</section>
</section>
<section>
<section>
<h3 class="fragment">Incomplete Databases<br/>↓</h3>
<h3>Probabilistic Databases</h3>
</section>
<section>
<ol>
<li>Representing Incompleteness</li>
<li class="fragment">Querying Incomplete Data</li>
<li class="fragment">Implementing It</li>
</ol>
</section>
<section>
<table><tr><td>
<%= data_table(["Name", "ZipCode"], [["Alice", "10003"], ["Bob","1<span class='fragment highlight-current-red' data-fragment-index='1'>4</span>260"]], name: "$R_1$", rowids: true) %>
</td><td class="fragment" data-fragment-index="3">or</td>
<td class="fragment highlight-current-grey" data-fragment-index="2">
<%= data_table(["Name", "Division"], [["Alice", "10003"], ["Bob","1<span class='fragment highlight-current-red' data-fragment-index='1'>9</span>260"]], name: "$R_2$", rowids: true) %>
</td></tr></table>
</section>
<section>
<p><b>Incomplete Database</b> ($\mathcal D$): A set of <i>possible worlds</i></p>
<p class="fragment"><b>Possible World</b> ($D \in \mathcal D$): One (of many) database instances</p>
<p class="fragment">(Require all possible worlds to have the same schema)</p>
</section>
<section>
<p>What does it mean to run a query on an incomplete database?</p>
<p class="fragment" data-fragment-index="1"><span class="fragment fade-out" data-fragment-index="2">$Q(\mathcal D) = ?$</span></p>
<p class="fragment" data-fragment-index="2">$Q(\mathcal D) = \{\;Q(D)\;|\;D \in \mathcal D \}$</p>
</section>
<section>
<table><tr><td>
<%= data_table(["Name", "ZipCode"], [["Alice", "10003"], ["Bob","14260"]], name: "$R_1$", rowids: true) %>
</td><td>or</td><td>
<%= data_table(["Name", "Division"], [["Alice", "10003"], ["Bob","19260"]], name: "$R_2$", rowids: true) %>
</td></tr></table>
<p class="fragment" style="font-size: 90%">$$Q_1 = \pi_{Name}\big( \sigma_{state = \texttt{'NY'}} (R \bowtie_{zip} ZipLookups) \big)$$</p>
<table class="fragment"><tr>
<td style="font-size: 600%; margin: 0px; padding: 0px; height: 0.5em;">{</td>
<td style="vertical-align: middle;">
<%= data_table(["Name"], [["Alice"], ["Bob"]], name: "$Q(R_1)$", rowids: true) %>
</td><td style="vertical-align: middle; font-weight: bold;">or</td><td style="vertical-align: middle;">
<%= data_table(["Name"], [["Alice"]], name: "$Q(R_2)$", rowids: true) %>
</td>
<td style="font-size: 600%; margin: 0px; padding: 0px; height: 0.5em;">}</td>
</tr></table>
<aside class="notes">
19260 is Phoenixville, PA
</aside>
</section>
<section>
<table><tr><td>
<%= data_table(["Name", "ZipCode"], [["Alice", "10003"], ["Bob","14260"]], name: "$R_1$", rowids: true) %>
</td><td>or</td><td>
<%= data_table(["Name", "Division"], [["Alice", "10003"], ["Bob","19260"]], name: "$R_2$", rowids: true) %>
</td></tr></table>
<p class="fragment" style="font-size: 80%">$$Q_2 = \pi_{Name}\big( \sigma_{region = \texttt{'Northeast'}} (R \bowtie_{zip} ZipLookups) \big)$$</p>
<table class="fragment">
<td style="font-size: 600%; margin: 0px; padding: 0px; height: 0.5em;">{</td>
<td style="vertical-align: middle;">
<%= data_table(["Name"], [["Alice"], ["Bob"]], name: "$Q(R_1)$", rowids: true) %>
</td><td style="vertical-align: middle; font-weight: bold;">or</td><td style="vertical-align: middle;">
<%= data_table(["Name"], [["Alice"], ["Bob"]], name: "$Q(R_2)$", rowids: true) %>
</td>
<td style="font-size: 600%; margin: 0px; padding: 0px; height: 0.5em;">}</td>
</tr></table>
</section>
<section>
<table><tr><td>
<%= data_table(["Name", "ZipCode"], [["Alice", "10003"], ["Bob","14260"]], name: "$R_1$", rowids: true) %>
</td><td>or</td><td>
<%= data_table(["Name", "Division"], [["Alice", "10003"], ["Bob","19260"]], name: "$R_2$", rowids: true) %>
</td></tr></table>
<p style="font-size: 80%">$$Q_2 = \pi_{Name}\big( \sigma_{region = \texttt{'Northeast'}} (R \bowtie_{zip} ZipLookups) \big)$$</p>
<table><tr>
<td style="font-size: 600%; margin: 0px; padding: 0px; height: 0.5em;">{</td>
<td style="vertical-align: middle;">
<%= data_table(["Name"], [["Alice"], ["Bob"]], name: "$Q(R_1)$ or $Q(R_2)$", rowids: true) %>
</td>
<td style="font-size: 600%; margin: 0px; padding: 0px; height: 0.5em;">}</td>
</tr></table>
</section>
<section>
<img src="graphics/2019-04-31-NormalDB.svg" /><br/>
<hr class="fragment" data-fragment-index="1"/>
<svg data-src="graphics/2019-04-31-IncompleteDB.svg" class="fragment" data-fragment-index="1"/>
</section>
</section>
<section>
<section>
<p><b>Challenge:</b> There can be <u>lots</u> of possible worlds.</p>
</section>
<section>
<p><b>Observation: </b> Possibilities for database creation break down into lots of independent choices.</p>
<p class="fragment"><u>Factorize</u> the database.</p>
</section>
<section>
<table><tr><td>
<%= data_table(["Name", "ZipCode"], [["Alice", "10003"], ["Bob","14260"], ["Carol", "13201"]], name: "$R_1$", rowids: true) %>
</td><td>
<%= data_table(["Name", "Division"], [["Alice", "10003"], ["Bob","19260"], ["Carol", "18201"]], name: "$R_2$", rowids: true) %>
</td></tr>
<tr><td>
<%= data_table(["Name", "ZipCode"], [["Alice", "10003"], ["Bob","14260"], ["Carol", "13201"]], name: "$R_3$", rowids: true) %>
</td><td>
<%= data_table(["Name", "Division"], [["Alice", "10003"], ["Bob","19260"], ["Carol", "18201"]], name: "$R_4$", rowids: true) %>
</td></tr></table>
<p class="fragment">Alice appears in both databases. <br/>The only differences are Bob and Carol's zip codes.</p>
</section>
<section>
<h3>List Out Choices</h3>
<ul>
<li>$\texttt{bob}$<span class="fragment" data-fragment-index="1">$ \in \{ 4, 9 \}$</span> (Bob's zip code digit)</li>
<li>$\texttt{carol}$<span class="fragment" data-fragment-index="1">$ \in \{ 3, 8 \}$</span> (Carol's zip code digit)</li>
</ul>
</section>
<% [false, true].each do |with_annotations| %>
<section>
<%= data_table(
["Name", "ZipCode"],
[ ["Alice", "10003"],
["Bob","14260"],
["Bob","14290"],
["Carol","13201"],
["Carol","18201"]
],
name: "$\\mathcal R$",
rowids: true,
annotations: if with_annotations then [
"always",
"if $\\texttt{bob} = 4$",
"if $\\texttt{bob} = 9$",
"if $\\texttt{carol} = 3$",
"if $\\texttt{carol} = 8$"
] else nil end
) %>
<div class="fragment">
<div style="font-size: 200%">+</div>
<p>$\big[\;\texttt{bob} \in \{4, 9\},\; \texttt{carol} \in \{3, 8\}\;\big]$</p>
</div>
</section>
<% end %>
<section>
<%= data_table(
["Name", "ZipCode"],
[ ["Alice", "10003"],
["Bob","14260"],
["Bob","14290"],
["Carol","13201"],
["Carol","18201"]
],
name: "$\\mathcal R$",
rowids: true,
annotations: [
"a",
"b",
"c",
"d",
"e"
]
) %>
<div style="font-size: 200%">+</div>
<p>Pick one of each: $\big[\;\{a\},\; \{b, c\},\; \{d, e\}\;\big]$</p>
<p>Set those variables to $T$ and all others to $F$</p>
</section>
<section>
<p>$R_1 \equiv \big[a \rightarrow T, b \rightarrow T, d \rightarrow T, * \rightarrow F\big]$</p>
<%= data_table(
["Name", "ZipCode"],
[ ["Alice", "10003"],
["Bob","14260"],
["Bob","14290"],
["Carol","13201"],
["Carol","18201"]
],
name: "$\\mathcal R$",
rowids: true,
annotations: [
"T (a)",
"T (b)",
"F (c)",
"T (d)",
"F (e)"
]
) %>
</section>
<section>
<p>Use provenance as before...</p>
<p class="fragment">... but what about aggregates?</p>
</section>
<section>
<pre><code>
SELECT COUNT(*)
FROM R NATURAL JOIN ZipCodeLookup
WHERE State = 'NY'
</code></pre>
<p style="font-size: 70%" class="fragment">
$$= \begin{cases}
1 & \textbf{if } \texttt{bob} = 9 \wedge \texttt{carol} = 8\\
2 & \textbf{if } \texttt{bob} = 4 \wedge \texttt{carol} = 8 \\&\; \vee\; \texttt{bob} = 9 \wedge \texttt{carol} = 3\\
3 & \textbf{if } \texttt{bob} = 4 \wedge \texttt{carol} = 3
\end{cases}$$</p>
<p class="fragment"><b>Problem: </b> A combinatorial explosion of possibilities</p>
</section>
<section>
<p><b>Idea: </b> Simplify the problem</p>
<ol>
<li class="fragment">Is a particular tuple <i>Possible</i>?</li>
<li class="fragment">Is a particular tuple <i>Certain</i>?</li>
</ol>
</section>
<section>
<dl>
<div class="fragment">
<dt>Certain Tuple</dt>
<dd>A tuple that appears in all possible worlds</dd>
<dd class="fragment">$\forall D \in \mathcal D : t \in D$</dd>
</div>
<div class="fragment">
<dt>Possible Tuple</dt>
<dd>A tuple that appears in at least one possible world</dd>
<dd class="fragment">$\exists D \in \mathcal D : t \in D$</dd>
</div>
</dl>
</section>
<section>
<h3>Non-aggregate queries</h3>
<dl>
<dt>Is a tuple Certain?</dt>
<dd class="fragment">Is the provenance polynomial a tautology?</dd>
<dt>Is a tuple Possible?</dt>
<dd class="fragment">Is the provenance polynomial a contradiction?</dd>
</dl>
<p class="fragment">Pick your favorite SAT solver, plug in and go</p>
</section>
<section>
<h3>Aggregate queries</h3>
<p style="margin-top: 50px; margin-bottom: 50px;">
As before, factorize the possible outcomes
</p>
<p class="fragment">
$$1 + \{\;1\;\textbf{if}\;\texttt{bob} = 4\;\} + \{\;1\;\textbf{if}\;\texttt{carol} = 3\;\}$$
</p>
<p style="margin-top: 50px;" class="fragment">
Not bigger than the aggregate input...
</p>
<p class="fragment">
...but at least it only reduces to bin-packing <br/>(or a similarly NP problem.)
</p>
</section>
<section>
<p>In short, incomplete databases are limited, but have some uses.</p>
<p class="fragment">What about probabilities?</p>
</section>
</section>

View File

@ -0,0 +1,355 @@
---
template: templates/cse4562_2019_slides.erb
title: Checkpoint 4
date: May 3, 2019
textbook:
dependencies:
- lib/slide_utils.rb
---
<%
require "slide_utils.rb"
%>
<section>
<section>
<h3>A few things first...</h3>
</section>
<section>
<img src="graphics/2019-05-03-DemoDay.png" class="stretch" />
</section>
<section>
<h3>4/562 Databake Off @ 3:00</h3>
<p>RSVP (limited space available) to participate</p>
</section>
<section>
<h3>A note on optimization...</h3>
<p>Lots of interesting strategies used in Checkpoint 3</p>
<ul>
<li>Pre-parsing</li>
<li>Column Stores</li>
<li>Cost-based Opt</li>
<li class="fragment">Hyper-optimize the slowest query</li>
</ul>
</section>
</section>
<section>
<section>
<h2>Checkpoint 4</h2>
<h3>Implement Updates</h3>
<p class="fragment">(lambda-architecture edition)</p>
<p class="fragment">Due May 20</p>
</section>
<section>
<ul>
<li>A stream of inserts, deletes, updates, and queries.</li>
<li>No restarts.</li>
<li>Answer queries as fast as possible.</li>
<li>Make sure query results account for DDL effects.</li>
</ul>
</section>
<section>
<dl>
<dt>Stage 0</dt>
<dd>10 minutes of prep</dd>
<dt>Stage 1</dt>
<dd>Inserts only</dd>
<dt>Stage 2</dt>
<dd>Inserts + Deletes</dd>
<dt>Stage 3</dt>
<dd>Inserts + Deletes + Updates</dd>
</dl>
<p class="fragment">No restarts.</p>
</section>
</section>
<section>
<section>
<h3>Do I need to implement block-based storage?</h3>
<p class="fragment">No (although you can).</p>
<p class="fragment">Ok... so what else can I do?</p>
</section>
<section>
<h3>Classical Databases</h3>
<img src="graphics/2018-02-19-PrimaryVsSecondary.png" />
</section>
<section>
<p><b>Problem 1:</b> More indexes = Slower writes (bad for OLTP)</p>
<p><b>Problem 2:</b> Fewer indexes = Slower reads (bad for OLAP)</p>
</section>
<section>
<p>What if you have both OLAP and OLTP workloads?</p>
</section>
<section>
<p><b>Idea:</b> Weekly / Nightly / Hourly dump<br/>from OLTP System to OLAP system.</p>
<p class="fragment">(Index the data while dumping)</p>
</section>
<section>
<p><b>Problem:</b> Not seeing the freshest data!</p>
</section>
<section>
<p><b>Better Idea:</b> OLTP DB + OLAP DB.</p>
<p class="fragment">OLTP DB has few indexes, but only stores recent updates.</p>
<p class="fragment">OLAP DB has many indexes, and stores everything except recent updates.</p>
<p class="fragment">Periodically migrate updates into OLAP DB.</p>
<p class="fragment">(Lambda Architecture)</p>
</section>
<section>
<h2>Checkpoint 4</h2>
<h3>Suggested Approach: Lambda-Lite</h3>
</section>
</section>
<section>
<section>
<h3>Handling Inserts</h3>
</section>
<section>
<pre><code class="sql">
INSERT INTO FOO(A, B, C) VALUES (1, 2, 3);
</code></pre>
</section>
<section>
<%=
relational_algebra() do
ra_table("Orig")
end
%>
</section>
<section>
<%=
relational_algebra(debug: false) do
ra_union(
ra_table("Orig"),
ra_table("New")
)
end
%>
</section>
</section>
<section>
<section>
<h3>Example</h3>
</section>
<section>
<pre><code class="sql">
SELECT COUNT(*) FROM lineitem WHERE mktsegment = 'BUILDING';
</code></pre>
</section>
<section>
<%=
relational_algebra do
ra_aggregate(nil, "COUNT(*)",
ra_select("mktsegment = 'BUILDING'",
ra_table("lineitem")
)
)
end
%>
</section>
<section>
<%=
relational_algebra do
ra_aggregate(nil, "COUNT(*)",
ra_select("mktsegment = 'BUILDING'",
ra_union(
ra_table("lineitem"),
ra_table("inserts")
)
)
)
end
%>
</section>
</section>
<section>
<section>
<h3>Handling Deletes</h3>
</section>
<section>
<pre><code class="sql">
DELETE FROM FOO WHERE A > 5;
</code></pre>
</section>
<section>
<%=
relational_algebra do
ra_table("Orig")
end
%>
</section>
<section>
<%=
relational_algebra do
ra_diff(
ra_table("Orig"),
ra_table("New")
)
end
%>
<p class="fragment">... but that's not quite how SQL Delete works.</p>
</section>
<section>
<pre><code class="sql">
DELETE FROM FOO WHERE A > 5;
</code></pre>
<div class="fragment">
<%=
relational_algebra do
ra_select("A ≤ 5",
ra_table("FOO")
)
end
%>
</div>
</section>
<section>
<pre><code class="sql">
DELETE FROM Orig WHERE Something;
</code></pre>
<%=
relational_algebra do
ra_select("NOT Something",
ra_table("Orig")
)
end
%>
</section>
</section>
<section>
<section>
<h3>Example</h3>
</section>
<section>
<pre><code class="sql">
INSERT INTO lineitem(...) VALUES (...);
INSERT INTO lineitem(...) VALUES (...);
DELETE FROM lineitem WHERE shipdate BETWEEN date(1997-10-01)
AND date(1997-10-30);
SELECT COUNT(*) FROM lineitem WHERE mktsegment = 'BUILDING';
</code></pre>
</section>
<section>
<%=
relational_algebra do
ra_aggregate(nil, "COUNT(*)",
ra_select("mktsegment = 'BUILDING'",
ra_table("lineitem")
)
)
end
%>
</section>
<section>
<%=
relational_algebra do
ra_aggregate(nil, "COUNT(*)",
ra_select("mktsegment = 'BUILDING'",
ra_union(
ra_table("lineitem"),
ra_table("inserts")
)
)
)
end
%>
</section>
<section>
<%=
relational_algebra do
ra_aggregate(nil, "COUNT(*)",
ra_select("mktsegment = 'BUILDING'",
ra_select("shipdate NOT BETWEEN ...",
ra_union(
ra_table("lineitem"),
ra_table("inserts")
)
)
)
)
end
%>
</section>
</section>
<section>
<section>
<h3>Handling Updates</h3>
</section>
<section>
<pre><code class="sql">
UPDATE Foo SET A = 1, B = 2 WHERE C = 3;
</code></pre>
</section>
<section>
<pre><code class="sql">
UPDATE Foo SET A = 1, B = 2 WHERE C = 3;
</code></pre>
<%=
relational_algebra do
ra_union(
ra_select( "C = 3",
ra_project( { A: "1", B: "2", C: "C" },
ra_table("Foo")
)
),
ra_select( "C ≠ 3",
ra_table("Foo")
)
)
end
%>
</section>
<section>
<pre><code class="sql">
UPDATE Foo SET A = 1, B = 2 WHERE C = 3;
</code></pre>
<%=
relational_algebra do
ra_project( { A: "CASE WHEN C = 3 THEN 1 ELSE A END", B: "CASE ...", C: "C"},
ra_table("Foo")
)
end
%>
<pre class="fragment "><code class="sql">
SELECT CASE WHEN C = 3 THEN 1 ELSE A END AS A,
CASE WHEN C = 3 THEN 2 ELSE B END AS B,
C AS C
FROM Foo;
</code></pre>
</section>
</section>
<section>
<h3>Final Advice</h3>
<ul>
<li class="fragment">This isn't the only way to implement updates.</li>
<li class="fragment">Optimizer performance is crucial!</li>
<li class="fragment">Consider periodically pausing to collapse updates</li>
</ul>
</section>

View File

@ -0,0 +1,63 @@
---
template: templates/cse4562_2019_slides.erb
title: Incomplete and Probabilistic Databases
date: May 6, 2019
textbook: "<a href='https://github.com/UBOdin/mimir/wiki/Concepts-CTables'>PDB Concepts and C-Tables</a>"
dependencies:
- lib/slide_utils.rb
---
<%
require "slide_utils.rb"
%>
<section>
<section>
<p><b>Idea: </b> Make $\texttt{bob}$ and $\texttt{carol}$ random variables.</p>
</section>
<section>
<p>$$\texttt{bob} = \begin{cases} 4 & p = 0.8 \\ 9 & p = 0.2\end{cases}$$</p>
<p>$$\texttt{carol} = \begin{cases} 3 & p = 0.4 \\ 8 & p = 0.6\end{cases}$$</p>
</section>
<section>
<p style="font-size: 70%">
$$Q(\mathcal D) = \begin{cases}
1 & \textbf{if } \texttt{bob} = 9 \wedge \texttt{carol} = 8\\
2 & \textbf{if } \texttt{bob} = 4 \wedge \texttt{carol} = 8 \\&\; \vee\; \texttt{bob} = 9 \wedge \texttt{carol} = 3\\
3 & \textbf{if } \texttt{bob} = 4 \wedge \texttt{carol} = 3
\end{cases}$$</p>
<p style="font-size: 90%" class="fragment">
$$ = \begin{cases}
1 & p = 0.2 \times 0.6\\
2 & p = 0.8 \times 0.6 + 0.2 \times 0.4\\
3 & p = 0.8 \times 0.4 \end{cases}$$
</p>
<p class="fragment">
$$ = \begin{cases}
1 & p = 0.12\\
2 & p = 0.56\\
3 & p = 0.32\end{cases}$$
</p>
</section>
<section>
<p>
$$Q(\mathcal D) = \begin{cases}
1 & p = 0.12\\
2 & p = 0.56\\
3 & p = 0.32\end{cases}$$
</p>
<p class="fragment" style="margin-top: 50px;">$E\left[Q(\mathcal D)\right] = 0.12+1.12+0.96 = 2.20$</p>
<p class="fragment" style="margin-top: 50px;">$P\left[Q(\mathcal D) \geq 2\right] = 0.56+0.32 = 0.88$</p>
</section>
<section>
<p>In general, computing probabilities exactly is <code>#P</code></p>
<p style="margin-top: 50px;" class="fragment">... so we approximate</p>
</section>
<section>
<p><b>Idea 1</b>: Sample. Pick 10 random possible worlds and compute results for each.</p>
</section>
</section>

Binary file not shown.

After

Width:  |  Height:  |  Size: 756 KiB