diff --git a/slides/talks/2020-1-CIDR-Vizier/index-boris.html b/slides/talks/2020-1-CIDR-Vizier/index-boris.html new file mode 100644 index 00000000..5ddc1c16 --- /dev/null +++ b/slides/talks/2020-1-CIDR-Vizier/index-boris.html @@ -0,0 +1,845 @@ + +
+ + + +
+
+ VizierDB
+
+
+
+ →
+
+
+ →
+
+
+ ↓
+ ↓
+
+ Assumption
+ ≠
+ Assumption
+
+
Annotation on data value(s) that indicates a concern, e.g.,
+
+
+
+
+ caveat(race_ethnicity,
+ 'Unexpected race_ethnicity: ' & race_ethnicity)
+
+
+
+
+
+
+
+
+ CASE WHEN race_ethnicity NOT IN ('Black Non-Hispanic', /* ... */)
+
+ THEN caveat(race_ethnicity,
+ 'Unexpected race_ethnicity: ' & race_ethnicity)
+
+ ELSE race_ethnicity
+
+
+
+
+
+ SELECT
+ CASE WHEN race_ethnicity NOT IN ('Black Non-Hispanic', /* ... */)
+
+ THEN caveat(race_ethnicity,
+ 'Unexpected race_ethnicity: ' & race_ethnicity)
+
+ ELSE race_ethnicity
+
+ END, /* ... */
+ FROM R
+
+
+ →
+ |
+ ||
↑ | ++ | ↑ | +
Caveats | +→ | +Caveats | +
Can twiddling the caveatted value change the output?
+ +Caveats on
Some conditions may apply
++
+Is a value caveatted?
+≡ Certain answers in incomplete databases
+(coNP-complete)
+
+ Correctness of SQL Queries on Databases with Nulls.
+ Paolo Guagliardo, Leonid Libkin
+
+ Uncertainty Annotated Databases - A Lightweight Approach for Approximating Certain Answers
+ Su Feng, Aaron Huber, Boris Glavic, Oliver Kennedy
+
+ Uncertainty Annotated Databases - A Lightweight Approach for Approximating Certain Answers
+ Su Feng, Aaron Huber, Boris Glavic, Oliver Kennedy
+
Add and maintain a binary "has caveat"
column for each row/column.
Spreadsheet Operations → SQL DDL / SQL DML
+This gives us an edit history in DDL/DML.
+
+ Using Reenactment to Retroactively Capture Provenance for Transactions
+ Bahareh Sadat Arab, Dieter Gawlick, Vasudha Krishnaswamy, Venkatesh Radhakrishnan, Boris Glavic
+
+ Graceful database schema evolution: the PRISM workbench
+ Carlo Curino, Hyun Jin Moon, Carlo Zaniolo
+
+ UPDATE R SET A = 'foo' WHERE ROWID = 3;
+
+ becomes
+
+ SELECT CASE ROWID
+ WHEN 3 THEN 'foo'
+ ELSE A END AS A,
+ B, C, /* ... */
+ FROM R
+
+
+ INSERT INTO R() VALUES ();
+
+ becomes
+
+ SELECT * FROM R
+ UNION ALL
+ SELECT NULL AS A, NULL AS B,
+ NULL AS C, /* ... */
+
+
+ ALTER TABLE R ADD COLUMN `bar`;
+
+ becomes
+
+ SELECT *, NULL as `bar` FROM R;
+
+
+ $> pip3 install --user vizier-webapi
+ $> vizier
+
+ Vizier is supported by NSF Awards ACI-1640864 and IIS-1750460 and gifts from Oracle
+ + + +
+ CREATE VIEW survey_responses AS
+ SELECT language,
+ CASE WHEN CAST(salary AS float) IS NULL THEN
+ caveat(NULL, 'Could not cast [ '&salary&' ] to float.')
+ ELSE CAST(salary AS float) END AS salary
+ FROM raw_csv_data;
+
+
+ CREATE VIEW survey_responses AS
+ SELECT language, CAST(salary AS float) AS salary,
+ FALSE AS _caveat_field_language,
+ CAST(salary as float) IS NULL AS _caveat_field_salary
+ FALSE AS _caveat_row
+ FROM raw_csv_data;
+
+
+ SELECT salary
+ FROM survey_responses
+ WHERE language = 'Scala'
+
+
+ SELECT salary,
+ _caveat_field_salary AS _caveat_field_salary,
+ _caveat_row AND _caveat_field_language AS _caveat_row
+ FROM survey_responses
+ WHERE language = 'Scala'
+
+
+ SELECT AVG(salary) AS salary
+ FROM survey_responses
+
+
+ SELECT AVG(salary),
+ GROUP_OR(_caveat_field_salary
+ OR _caveat_row) AS _caveat_field_salary,
+ FALSE AS _caveat_row
+ FROM survey_responses
+
+
+ SELECT language, AVG(salary) AS salary
+ FROM survey_responses
+ GROUP BY language
+
+
+ SELECT GROUP_OR(_caveat_field_language)
+ FROM survey_responses
+
+ Can often be evaluated statically.
+
+ SELECT language, AVG(salary) AS salary
+ FALSE AS _caveat_field_language
+ TRUE AS _caveat_field_salary
+ GROUP_AND(_caveat_field_language OR
+ _caveat_row) AS _caveat_row
+ FROM by_language
+ GROUP BY language
+
+
+ SELECT language, AVG(salary) AS salary
+ FALSE AS _caveat_field_language
+ GROUP_OR(_caveat_field_salary,
+ _caveat_row) AS _caveat_field_salary
+ GROUP_AND(_caveat_row) AS _caveat_row
+ FROM by_language
+ GROUP BY language
+
+