diff --git a/slides/talks/2018-1-Tour-Mimir/data/UADBs/10pct.csv b/slides/talks/2018-1-Tour-Mimir/data/UADBs/10pct.csv new file mode 100644 index 00000000..0b059ecb --- /dev/null +++ b/slides/talks/2018-1-Tour-Mimir/data/UADBs/10pct.csv @@ -0,0 +1,4 @@ +Label, PDB-1, PDB-2, PDB-3 +Deterministic,4.714,4.073,5.238 +Mimir,4.962,4.257,6.989 +MayBMS,21.814,9.171,18.137 diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/BI-Analyst.jpg b/slides/talks/2018-1-Tour-Mimir/graphics/BI-Analyst.jpg deleted file mode 100644 index 45d77c69..00000000 Binary files a/slides/talks/2018-1-Tour-Mimir/graphics/BI-Analyst.jpg and /dev/null differ diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/BingTranslate.png b/slides/talks/2018-1-Tour-Mimir/graphics/BingTranslate.png deleted file mode 100644 index d82e5e60..00000000 Binary files a/slides/talks/2018-1-Tour-Mimir/graphics/BingTranslate.png and /dev/null differ diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/Q1.png b/slides/talks/2018-1-Tour-Mimir/graphics/Q1.png new file mode 100644 index 00000000..08a3678c Binary files /dev/null and b/slides/talks/2018-1-Tour-Mimir/graphics/Q1.png differ diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/Q2.png b/slides/talks/2018-1-Tour-Mimir/graphics/Q2.png new file mode 100644 index 00000000..b2b98db8 Binary files /dev/null and b/slides/talks/2018-1-Tour-Mimir/graphics/Q2.png differ diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/Q3.png b/slides/talks/2018-1-Tour-Mimir/graphics/Q3.png new file mode 100644 index 00000000..bb8e876c Binary files /dev/null and b/slides/talks/2018-1-Tour-Mimir/graphics/Q3.png differ diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/blackbox.svg b/slides/talks/2018-1-Tour-Mimir/graphics/blackbox.svg index a2395efc..9ca05f47 100644 --- a/slides/talks/2018-1-Tour-Mimir/graphics/blackbox.svg +++ b/slides/talks/2018-1-Tour-Mimir/graphics/blackbox.svg @@ -7,24 +7,325 @@ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" + xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" - width="108.60823mm" + width="296.23718mm" height="102.27599mm" - viewBox="0 0 108.60823 102.27599" + viewBox="0 0 296.23718 102.27598" version="1.1" id="svg833" inkscape:version="0.92.2 5c3e80d, 2017-08-06" sodipodi:docname="blackbox.svg"> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + transform="translate(93.00389,-54.963)"> Black Box + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 💩 + + diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/data-lake-to-data-swamp.jpg b/slides/talks/2018-1-Tour-Mimir/graphics/data-lake-to-data-swamp.jpg deleted file mode 100644 index 94f43b48..00000000 Binary files a/slides/talks/2018-1-Tour-Mimir/graphics/data-lake-to-data-swamp.jpg and /dev/null differ diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/dataquality-normal.svg b/slides/talks/2018-1-Tour-Mimir/graphics/dataquality-normal.svg new file mode 100644 index 00000000..6b20b924 --- /dev/null +++ b/slides/talks/2018-1-Tour-Mimir/graphics/dataquality-normal.svg @@ -0,0 +1,802 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/iu.jpeg b/slides/talks/2018-1-Tour-Mimir/graphics/iu.jpeg deleted file mode 100644 index 40d11ed8..00000000 Binary files a/slides/talks/2018-1-Tour-Mimir/graphics/iu.jpeg and /dev/null differ diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/nine_or_four.svg b/slides/talks/2018-1-Tour-Mimir/graphics/nine_or_four.svg new file mode 100644 index 00000000..3da1eced --- /dev/null +++ b/slides/talks/2018-1-Tour-Mimir/graphics/nine_or_four.svg @@ -0,0 +1,66 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/possibleworlds.svg b/slides/talks/2018-1-Tour-Mimir/graphics/possibleworlds.svg new file mode 100644 index 00000000..76ef6229 --- /dev/null +++ b/slides/talks/2018-1-Tour-Mimir/graphics/possibleworlds.svg @@ -0,0 +1,128 @@ + + + + + + + + + + + + + + + Q(D) + + + + + + Q(D) + Q(D) + Q(D) + + + + + + + + + + + + Probability + Expectation + Variance + Histogram + + + + + + \ No newline at end of file diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/probdb-init.svg b/slides/talks/2018-1-Tour-Mimir/graphics/probdb-init.svg new file mode 100644 index 00000000..425bd18d --- /dev/null +++ b/slides/talks/2018-1-Tour-Mimir/graphics/probdb-init.svg @@ -0,0 +1,1957 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + 3 + + + + + + - + 2 + + + + + + - + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + + + + + + 2 + + + + + + 3 + + + + + + - + 3 + + + + + + - + 2 + + + + + + - + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + φ + μ,σ + 2 + ( + 0.8 + 0.6 + 0.4 + 0.2 + 0.0 + −5 + −3 + 1 + 3 + 5 + x + 1.0 + −1 + 0 + 2 + 4 + −2 + −4 + + + + x + ) + + + + + + + 0, + μ + = + + + 0, + μ + = + + + 0, + μ + = + + + −2, + μ + = + + + 2 + 0.2, + σ + = + + + 2 + 1.0, + σ + = + + + 2 + 5.0, + σ + = + + + 2 + 0.5, + σ + = + + + + + + + + + + + + diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/probdb-query.svg b/slides/talks/2018-1-Tour-Mimir/graphics/probdb-query.svg new file mode 100644 index 00000000..68a1dbc7 --- /dev/null +++ b/slides/talks/2018-1-Tour-Mimir/graphics/probdb-query.svg @@ -0,0 +1,1957 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + 3 + + + + + + - + 2 + + + + + + - + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + + + + + + 2 + + + + + + 3 + + + + + + - + 3 + + + + + + - + 2 + + + + + + - + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + φ + μ,σ + 2 + ( + 0.8 + 0.6 + 0.4 + 0.2 + 0.0 + −5 + −3 + 1 + 3 + 5 + x + 1.0 + −1 + 0 + 2 + 4 + −2 + −4 + + + + x + ) + + + + + + + 0, + μ + = + + + 0, + μ + = + + + 0, + μ + = + + + −2, + μ + = + + + 2 + 0.2, + σ + = + + + 2 + 1.0, + σ + = + + + 2 + 5.0, + σ + = + + + 2 + 0.5, + σ + = + + + + + + + + + + + + diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/system.png b/slides/talks/2018-1-Tour-Mimir/graphics/system.png new file mode 100644 index 00000000..902ff09b Binary files /dev/null and b/slides/talks/2018-1-Tour-Mimir/graphics/system.png differ diff --git a/slides/talks/2018-1-Tour-Mimir/graphics/system.svg b/slides/talks/2018-1-Tour-Mimir/graphics/system.svg new file mode 100644 index 00000000..0c8aeea0 --- /dev/null +++ b/slides/talks/2018-1-Tour-Mimir/graphics/system.svg @@ -0,0 +1,1662 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/slides/talks/2018-1-Tour-Mimir/index.html b/slides/talks/2018-1-Tour-Mimir/index.html index 0e95fb5f..3a311155 100644 --- a/slides/talks/2018-1-Tour-Mimir/index.html +++ b/slides/talks/2018-1-Tour-Mimir/index.html @@ -185,67 +185,78 @@
-
+ +
-
+
- - - - - - - - - - - - - - - - -

Tons of Curation Heuristics Available!

+
+

We have tools that can solve these problem!

- (OpenClipArt.org)
-

... that can be wrong

+

... most of the time

(google.com)
- -

... very wrong

- (nytimes.com) +

+ Problem: It's hard to trust tools that can be wrong! +

+
+ +
+ +
+ +
+

Options

+
    +
  1. Ignore the Problem
  2. +
  3. +
  4. +
-

- In the name of Codd,
thou shalt not give the user a wrong answer. -

+ +
+ +
+

+ In the name of Codd
Thou shalt not give the user a wrong answer. +

+
+ +
+
-

... but when combined with heuristics

(Fox News)
+ +
+

Options

+
    +
  1. Ignore the Problem
  2. +
  3. Heresy
  4. +
  5. +
+

On representing incomplete information in a relational data base

-

T. Imielinski & W. Lipski Jr.(VLDB 1981)

-

- Incomplete and Probabilistic Databases
have existed since the 1980s... -

+

T. Imielinski & W. Lipski Jr.(VLDB 1981)

@@ -253,8 +264,13 @@
- -

(Typical Heuristics)

+ +

1. ProbDBs Produce Probability Distributions as Outputs

+
+ +
+ +

2. ProbDBs Require Probability Distributions as Inputs

@@ -273,7 +289,7 @@ "labelString": "Query Runtime (s)" } }] - } + } }} --> Label , PDB-1, PDB-2, PDB-3, TPCH-1, TPCH-3, TPCH-5, TPCH-9 @@ -284,17 +300,21 @@ Sampling (x10), 300, 242.5666234549135, 300, 119.61607021316885, 162.00108394436
-
- -

(Probabilistic Query Outputs)

-
-

Probabilistic Databases...

    -
  1. ... require probabilities as inputs
  2. -
  3. ... are slow
  4. -
  5. ... produce probabilities as outputs
  6. +
  7. ... require probabilities as inputs
  8. +
  9. ... produce probabilities as outputs
  10. +
  11. ... are slow
  12. +
+
+ +
+

Options

+
    +
  1. Ignore the Problem
  2. +
  3. Heresy
  4. +
  5. ?
@@ -317,38 +337,598 @@ Sampling (x10), 300, 242.5666234549135, 300, 119.61607021316885, 162.00108394436
-
    -
  • Why should you care about uncertain data?
  • -
  • Background: K-Relations and Possible Worlds
  • -
  • Uncertainty-Annotated Databases
    (Joint work with Boris Glavic, Su Feng, Aaron Huber)
  • -
  • Other Mimir Projects
  • -
+

Uncertainty-Annotated Databases

+

(Joint work with Boris Glavic, Su Feng, Aaron Huber)

+
+

Other Projects

+
    +
  • Adaptive Schemas
  • +
  • Probabilistic Query Compilers
  • +
+
-
    -
  • Semirings
  • -
  • K-Relations
  • +

    Background

    + +
    1. Possible Worlds
    2. -
    3. Certain, Possible Tuples
    4. -
+
  • $K$-Semirings
  • +
  • $K^W$-Semrings
  • + +
    + +
    + +
    + +
    +

    $K$-Semirings

    +
    + +
    + + + + +
    + + + + +
    RAB
    12
    13
    43
    + + + + +
    SBC
    25
    36
    36
    +

    The relational view

    +
    + +
    +

    The functional view

    +

    + $$R(1, 2) \mapsto 1$$ + $$R(1, 3) \mapsto 1$$ + $$R(4, 3) \mapsto 1$$ +

    +

    + $$R(4, 5) \mapsto 0$$ +

    +

    + $$S(3, 6) \mapsto 2$$ +

    +
    + +
    + $$[R_1 \cup R_2](\vec X) \equiv R_1(\vec X) + R_2(\vec X)$$ + +
    + $[S \cup S](3, 6)$ +

    $= S(3, 6) + S(3, 6)$

    +

    $= 2 + 2 = 4$

    +
    +
    + +
    + $$[R_1 \bowtie R_2](\vec X) \equiv R_1(\vec X) \times R_2(\vec X)$$ + +
    + $[R \bowtie S](4, 3, 6)$ +

    $= R(4, 3) \times S(3, 6)$

    +

    $= 1 \times 2 = 2$

    +
    +
    + +
    + $$[\pi_{\vec A} R](\vec X) \equiv \sum_{\vec Y} R(\vec X \vec Y)$$ + + +
    + $[\pi_{B} R](3)$ +

    $= \sum_{Y} R(Y, 3)$

    +

    $ = R(1, 3) + R(4, 3) + \ldots$

    +

    $= 1 + 1 + 0 = 2$

    +
    +
    + +
    + + + + +
    $\cup$ $\approx$$+$
    $\bowtie$$\approx$$\times$
    $\pi$ $\approx$$+$
    +
    + +
    +

    Provenance Semirings

    +

    T.J. Green & G. Karvounarakis & V. Tannen(PODS 2007)

    +
    + +
    + $$\left<\;\mathcal K,\;\oplus,\;\otimes,\;\mathbb 0,\;\mathbb 1\;\right>$$ + + + + + + + + + + + + + + + + + +
    SemiringEquivalent Query Semantics
    $\left<\mathbb N, +, \times, 0, 1\right>$Bag Semantics
    $\left<\mathbb B, \vee, \wedge, \bot, \top\right>$Set Semantics
    $\left<\mathcal K^W, \vec \oplus, \vec \otimes, \mathbb{\vec 0}, \mathbb{\vec 1}\right>$Possible Worlds Semantics
    -
      -
    • $K^W$-Relations
    • -
    • $PW_i$, Certain, Possible
    • -
    • Performance
    • -
    +

    $K^W$-Semirings

    +
    + +
    + + + + + +
    RAB
    12
    13
    3
    +
    + +
    + + + + +
    + + + + +
    $R_1$AB
    12
    13
    43
    + + + + +
    $R_2$AB
    12
    13
    93
    +
    + +
    + + + + + + +
    RAB
    12$\mapsto [1,1]$
    13$\mapsto [1,1]$
    43$\mapsto [1,0]$
    93$\mapsto [0,1]$
    +
    + +
    +

    Extractors

    + + $$\mathcal K^W \rightarrow \mathcal K$$ + (plug in any $K$-Semiring-compatible $\mathcal K$) + +
    +
    Possible World Value
    +
    $\texttt{PW_i}(\vec k) \equiv \vec k_i$
    +
    Certain Value
    +
    $\mathcal C(\vec k) \equiv min(\vec k)$
    +
    Possible Value
    +
    $\mathcal P(\vec k) \equiv max(\vec k)$
    +
    +
    + +
    + + + + + +
    + + + + + + +
    RAB
    12$\mapsto [1,1]$
    13$\mapsto [1,1]$
    43$\mapsto [1,0]$
    93$\mapsto [0,1]$
    +
    +

    $$\texttt{PW}_0(R(1, 2)) = 1$$

    +

    $$\texttt{PW}_0(R(4, 3)) = 1$$

    +

    $$\texttt{PW}_1(R(4, 3)) = 0$$

    +

    $$\mathcal C(R(4, 3)) = 0$$

    +

    $$\mathcal P(R(4, 3)) = 1$$

    +
    -
    Thanks...
    + +
    +

    A quick step back into reality...

    + +
    + +
    + + +
    + +
    + + + + + +
    RAB
    12
    13
    4 or 93
    +

     

    +
    + +
    + + + + + +
    RAB
    12
    13
    4 or 93
    +

    Standard practice: "Just use the best option."

    +
    + +
    + +
    + +
    +

    What's in between these extremes?

    +
    + +
    + + + + + +
    RAB
    12
    13
    43*
    + +

    Use the best option, but mark potential errors.

    +
    + +
    + To answer $Q(\mathcal D)$ we want... + + + + + + + + + + + +
    $PW_{i}(Q(\mathcal D))$The results Alice would have "just used".
    $\mathcal C(Q(\mathcal D))$Which of those results are trustworthy.
    +
    +
    + +
    +
    + $$\texttt{PW}_i(Q(\mathcal D)) \equiv Q(\texttt{PW}_i(\mathcal D))$$ +

    (Computing $PW_{i}(Q(\mathcal D))$ is cheap!)

    +
    + +
    +

    Can we do the same thing for $\mathcal C(Q(\mathcal D))$?

    +
    + +
    +

    No.

    +
    + +
    + + + + + + +
    RAB$K^W$$\mathcal C$
    12$\mapsto$$[1,1]$1
    13$\mapsto$$[1,1]$1
    43$\mapsto$$[1,0]$0
    93$\mapsto$$[0,1]$0
    +

    Compute $\pi_B(R)$

    +
    + +
    + + + + +
    $\pi_B$RB$K^W$$\mathcal C$
    2$\mapsto$$[1,1]$$1$
    3$\mapsto$$[2,2]$$1+0+0=1$
    +
    + +
    +

    So what can we do with $\mathcal C$?

    +
    + +
    +

    We can Approximate

    +
    +
    Soundness
    +
    $Q(\mathcal C(\mathcal D)) \leq \mathcal C(Q(\mathcal D))$
    +
    We can efficiently compute a conservative approximation of $\mathcal C$. + +
    Completeness
    +
    $Q(\mathcal C(\mathcal D)) = \mathcal C(Q(\mathcal D))$ ...if $Q$ is safe
    +
    +
    + +
    +

    ... also attribute level uncertainty

    +
    + +
    + +
    +
    + + + Su Feng +
    + +
    +

    Defining Possible Worlds

    +

    Mimir allows users to define special UDFs called Models.

    +
    
    +    CREATE MODEL TYPE Geocoder AS mimir.models.GeocodingModel;
    +
    +    CREATE MODEL INSTANCE Text_To_Loc USING Geocoder('Google');
    +
    +    SELECT C.name, C.id, Text_To_Loc(C.address) AS address 
    +      FROM Customer C;
    +          
    +

    (Not actual Mimir-SQL. Language adapted for your viewing pleasure.)

    +

    Models...
    ... return one best guess
    ... define the space of alternatives

    +
    + +
    +

    Example Models

    + +
      +
    • Geocoding Addresses
    • +
    • Imputation using a SparkML classifier
    • +
    • Heuristic detection of order-by columns for interpolation
    • +
    • Schema matching based on edit-distance
    • +
    • MayBMS-style probabilistic repair-key
    • +
    • And more...
    • +
    +
    + +
    +

    Convenience Operators: Lenses

    + +

    Lenses instantiate/train a model and wrap a query

    +
      +
    • Domain Constraint Repair / Missing Value Imputation
    • +
    • Schema Matching
    • +
    • Sequence Repair
    • +
    • Key Repair
    • +
    • Arbitrary Choice
    • +
    • Type Detection *
    • +
    • Header Detection *
    • +
    • JSON Shredder *
    • +
    +
    + +
    +

    Evaluation handled by a DBMS or Spark via query rewriting.

    +
    
    +    SELECT C.name, C.id, Text_To_Loc(C.address) AS address 
    +      FROM Customer C;
    +          
    +

    becomes...

    +
    
    +    SELECT C.name, C.id, Text_To_Loc(C.address) AS address,
    +           1 AS name_certain,    1 AS id_certain, 
    +           0 AS address_certain, 1 AS row_certain
    +      FROM Customer C;
    +          
    + +
    + +
    + + +Label, PDB-1, PDB-2, PDB-3 +Deterministic, 4.714, 4.073, 5.238 +Mimir+SQLite, 4.962, 4.257, 6.989 +MayBMS, 21.814, 9.171, 18.137 + +
    + +
    + +
    +

    A few more things we're doing with Mimir...

    +
    + +
    +
    +

    Adaptive Schemas

    + +
      +
    • Domain Constraint Repair / Missing Value Imputation
    • +
    • Schema Matching
    • +
    • Sequence Repair
    • +
    • Key Repair
    • +
    • Arbitrary Choice
    • +
    • Type Detection *
    • +
    • Header Detection *
    • +
    • JSON Shredder *
    • +
    +
    + +
    +

    Adaptive Schemas

    + +
      +
    • Domain Constraint Repair / Missing Value Imputation
    • +
    • Schema Matching
    • +
    • Sequence Repair
    • +
    • Key Repair
    • +
    • Arbitrary Choice
    • +
    • Type Detection *
    • +
    • Header Detection *
    • +
    • JSON Shredder *
    • +
    +
    + +
    +
    
    +      LOAD 'customers.csv';
    +
    +      SELECT name FROM customers WHERE last_purchase < LAST_WEEK();
    +          
    +
    + +
    +

    How does the system know...

    +
    +
    ... which column is 'name'?
    +
    Guess that row 1 is headers.
    + +
    ... that 'last_purchase' is a date?
    +
    All rows look like YYYY-MM-DD
    +
    +

    This is all guesswork!

    +
    + +
    +

    Idea: Make the System Catalog a Probabilistic Table

    +
    +
    + +
    +
    +

    Probabilistic Query Compilers

    +

    Sampling from ProbDBs is Sloooow

    +
    +
    +

    Trivial Sampling

    +

    Evaluate the query $N$ times.
    Plug in samples instead of best guesses.

    +
    +

    Better Solutions

    +

    Merge evaluation to mitigate redundancy.

    +
    +
    + +
    +

    Sparse Encoding

    + + +
    + + + + + + + +
    $R_1$AB
    12
    34
    $R_2$AB
    15
    +
    + âž” + + + + + + +
    $R_{sparse}$ABS#
    121
    341
    152
    +
    +
    + +
    +

    Tuple Bundles

    + + +
    + + + + + + + +
    $R_1$AB
    12
    34
    $R_2$AB
    15
    +
    + âž” + + + + + +
    $R_{bundle}$AB$\phi$
    1[2,5][T,T]
    34[T,F]
    +
    +
    + +
    + + +Label, TPCH-1, TPCH-3, TPCH-5 +Sparse Tables, 119.6160702, 162.0010839, 258.7416881 +Tuple Bundles, 14.65919489, 300, 300 + +
    + +
    +

    Idea: Let the compiler pick the right representation
    (or combination)

    +
    +
    + + +
    @@ -364,21 +944,21 @@ Sampling (x10), 300, 242.5666234549135, 300, 119.61607021316885, 162.00108394436
    Students -

    Aaron
    (PhD-3Y)

    +

    Aaron
    (PhD-3Y)

    Lisa
    (PhD-0Y)

    - -

    Olivia
    (BS-Sr)

    + +

    Gourab
    (MS-2Y)

    - + +
    AlumniAlumni
    @@ -395,7 +975,11 @@ Sampling (x10), 300, 242.5666234549135, 300, 119.61607021316885, 162.00108394436 -

    Shivang
    (MS-2Y)

    +

    Shivang
    (MS 2018)

    +
    + +

    Olivia
    (BS 2017)

    @@ -431,10 +1015,10 @@ Sampling (x10), 300, 242.5666234549135, 300, 119.61607021316885, 162.00108394436 - -
    + Boris Glavic
    (IIT)
    + Su Feng
    (IIT)
    @@ -451,27 +1035,10 @@ Sampling (x10), 300, 242.5666234549135, 300, 119.61607021316885, 162.00108394436
    -

    Mimir is supported by NSF Award ACI-1640864, NPS Award N00244-16-1-0022, and gifts from Oracle

    +

    Mimir is supported by NSF Award ACI-1640864, NPS Award N00244-16-1-0022, and gifts from Oracle

    - - -
    - -


    http://mimirdb.info

    - -
      -
    • It's not the data that's uncertain, it's the interpretation.
    • -
    • Tagged best-guess evaluation is faster and easier to understand.
    • -
    • Not committing to one representation allows faster query processing.
    • -
    - -

    Thanks!

    - -
    - - diff --git a/slides/talks/2018-1-Tour-Mimir/papers/heuristics.svg b/slides/talks/2018-1-Tour-Mimir/papers/heuristics.svg index 8dd39d00..de84b080 100644 --- a/slides/talks/2018-1-Tour-Mimir/papers/heuristics.svg +++ b/slides/talks/2018-1-Tour-Mimir/papers/heuristics.svg @@ -55,8 +55,7 @@ id="layer1" transform="translate(0,103)">