diff --git a/src/teaching/cse-562/2021sp/index.erb b/src/teaching/cse-562/2021sp/index.erb index ab5eff84..da43edce 100644 --- a/src/teaching/cse-562/2021sp/index.erb +++ b/src/teaching/cse-562/2021sp/index.erb @@ -92,6 +92,8 @@ schedule: count_min_sketch: https://ieeexplore-ieee-org.gate.lib.buffalo.edu/document/6042851 - date: "Mar. 30" topic: "Streaming Queries" + materials: + slides: slide/2021-03-30-StreamingQueries.html - date: "Apr. 1" topic: "Data Updates + Incremental View Maintenance" due: "Checkpoint 2" diff --git a/src/teaching/cse-562/2021sp/slide/2021-03-30-StreamingQueries.erb b/src/teaching/cse-562/2021sp/slide/2021-03-30-StreamingQueries.erb new file mode 100644 index 00000000..93b8ce2d --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-03-30-StreamingQueries.erb @@ -0,0 +1,272 @@ +--- +template: templates/cse4562_2021_slides.erb +title: Streaming Queries +date: March 30, 2021 +textbook: +--- + +
+
+

Sequential Data

+ +
+ +
+

Example Queries

+ +

Find the % change in monthly sales, each month

+ +

+      SELECT A.Month, (A.Sales-B.Sales) / B.Sales 
+      FROM (SELECT … AS Month, SUM(…) AS Sales FROM …) A, 
+           (SELECT … AS Month, SUM(…) AS Sales FROM …) B 
+      WHERE A.Month = B.Month + 1
+    
+
+ +
+

Example Queries

+ +

Find the % change in monthly sales, each month

+ + +
+ +
+

Example Queries

+ +

Find the daily top-5 products by sales in the last week

+ +

+      SELECT Product, SUM(…) AS Sales FROM … WHERE date = today - 1 
+      ORDER BY Sales Desc LIMIT 5 UNION ALL 
+      SELECT Product, SUM(…) AS Sales FROM … WHERE date = today - 2 
+      ORDER BY Sales Desc LIMIT 5 UNION ALL …
+    
+
+ +
+

Example Queries

+ +

Find the trailing n-day moving average of sales

+ +

… almost impossible to express if n is a parameter
(i.e., query size depends on N)

+
+
+ +
+
+

The WINDOW Operator

+
    +
  1. Define a Sequence (i.e., sort the relation)
  2. +
  3. Compute all subsequences
      +
    • Fixed Physical Size: N records exactly.
    • +
    • Fixed Logical Size: Records within N units of time.
    • +
  4. +
  5. Compute an aggregate for each subsequence (one output row per subsequence)
  6. +
+
+ +
+

+    SELECT L.state, T.month, 
+       AVG(S.sales) OVER W as movavg
+    FROM   Sales S, Times T, Locations L
+    WHERE  S.timeid = T.timeid 
+      AND  S.locid = L.locid
+    WINDOW W AS ( 
+       PARTITION BY L.state
+       ORDER BY T.month
+       RANGE BETWEEN INTERVAL ‘1’ MONTH PRECEDING
+             AND INTERVAL ‘1’ MONTH FOLLOWING
+    )
+    
+
+ +
+ +
+ +
+
+
+
PARTITION BY
+
Like GROUP BY
+
+ +
+
ORDER BY
+
The sequence to create. The output has one row for each value of this column.
+
+ +
+
RANGE BETWEEN ... AND ...
+
Physical/Logical size of the window
+
+ +
+
[Aggregate] OVER [WindowName]
+
A single query can have multiple windows.
+
+
+
+
+ +
+
+
+
OLAP
+
Changing Queries, Fixed Data
+ +
OLTP
+
Changing Data, Minimal Queries
+ +
+
Streaming
+
Changing Data, Fixed Queries
+
+
+
+ +
+

Challenge: Need to react to new data as it arrives

+
+ +
+

Streaming Queries

+ +
+
+ +
+
+ +
+ +
+ +
+
+ +
+
+

Stream Joins

+
+ +
+ +
+ +
+ +
+ +
+

Idea 1: Mandate ONLY WINDOW queries

+

Idea 2: Index the buffer!

+ +

Challenge: Maintaining the index as tuples fall out of the buffer

+
+
+ +
+
+

Streaming Indexes

+
+ +
+ +

Tuples always enter from one side and exit out the other

+
+ +
+ +

Lots of lookups for active tuples.

+
+ +
+

What is the best layout?

+
+
+
Queue/Linked List
+
Insert/remove in (nearly) temporal order
+
+ +
+
Hash/Tree
+
Lookup (randomly ordered) Join Key
+
+
+
+ +
+ +
+ +
+

$O(1 + log(|W|))$ insertions.

+

$O(1 + log(|W|))$ expiration.

+
+
+ +
+
+

Streaming Aggregation

+
+ +
+ +
+ +
+ +
+ +
+
+
Ring Aggregates (Sum, Count, Average)
+
Add new values - $O(|\Delta|)$
+
Subtract old values - $O(|\Delta|)$
+ +
Semiring Aggregates (Min, Max)
+
Rescan for new max - $O(|W|)$
+
+
+ +
+ +
+
+ +
+
+

Summary

+
+
Push vs Pull Data Flow
+
Push is a better fit because sources produce data at different rates.
+
Revisit Joins
+
Focus on ripple-style WINDOW joins.
+
Revisit Indexing
+
Linked Hash/Tree Indexes for efficient windowed indexing.
+
Revisit Aggregation
+
Sliding window aggregates.
+
+
+
\ No newline at end of file diff --git a/src/teaching/cse-562/2021sp/slide/2021-03-30/LinkedIndex.svg b/src/teaching/cse-562/2021sp/slide/2021-03-30/LinkedIndex.svg new file mode 100644 index 00000000..7c881eb1 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-03-30/LinkedIndex.svg @@ -0,0 +1,688 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + <1, 1> + + + + <2, 3> + + + + <3, 4> + + + + <4, 3> + + + + <5, 2> + + + + <6, 5> + + + + <7, 3> + + + + <8, 1> + + + + + + + + + + + + + + 12345 + + + + + + + + + + + + + diff --git a/src/teaching/cse-562/2021sp/slide/2021-03-30/OffsetJoin.svg b/src/teaching/cse-562/2021sp/slide/2021-03-30/OffsetJoin.svg new file mode 100644 index 00000000..7d746f7e --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-03-30/OffsetJoin.svg @@ -0,0 +1,820 @@ + + + + + + + + image/svg+xml + + + + + + + + + + Jan + + + + Feb + + + + Mar + + + + Apr + + + + Jun + + + + May + + 5 + 9 + 1 + 2 + 4 + 5 + A + + + + + Jan + + + + Feb + + + + Mar + + + + Apr + + + + Jun + + + + May + + 5 + 9 + 1 + 2 + 4 + 5 + B + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Result + + + Feb + + + + Mar + + + + Apr + + + + Jun + + + + May + + 0.8 + -0.89 + 1 + 1 + 0.25 + + + diff --git a/src/teaching/cse-562/2021sp/slide/2021-03-30/PushVsPull.svg b/src/teaching/cse-562/2021sp/slide/2021-03-30/PushVsPull.svg new file mode 100644 index 00000000..840f9b12 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-03-30/PushVsPull.svg @@ -0,0 +1,754 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + π + + + + σ + <4 + + + + R + + + + + "give me a tuple" + + + + + + + "here's a tuple" + + + + + + + + + + Pull + + + + + + R + + + + + σ + <4 + + + + + + + + + + + π + + + + + + + + + + 6 + + + + 7 + + + + 3 + + + + + + 3 + + + Push + + diff --git a/src/teaching/cse-562/2021sp/slide/2021-03-30/StreamJoin.svg b/src/teaching/cse-562/2021sp/slide/2021-03-30/StreamJoin.svg new file mode 100644 index 00000000..04400166 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-03-30/StreamJoin.svg @@ -0,0 +1,480 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + = + + R + S + + + + R(1) + 1 + + + S(2) + 2 + + + S(1) + 1 + + <1,1> + + R(3) + 3 + + + R(4) + 4 + + + R(5) + 5 + + + S(3) + 3 + + <3,3> + + R(6) + 6 + + + R(7) + 7 + + + R(2) + 2 + + <2,2> + + R(3) + 3 + + <3,3> + + diff --git a/src/teaching/cse-562/2021sp/slide/2021-03-30/Window-Max.svg b/src/teaching/cse-562/2021sp/slide/2021-03-30/Window-Max.svg new file mode 100644 index 00000000..dade8690 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-03-30/Window-Max.svg @@ -0,0 +1,701 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + Jan + + + + Feb + + + + Mar + + + + Apr + + + + May + + + + Jun + + + + Jan + + + + Jan + + + + Mar + + + + May + + + + May + + 5 + 9 + 6 + 9 + 1 + 5 + 2 + 4 + 3 + 10 + 5 + + + + + 1 + + 9 + + + 2 + + 9 + + + 3 + + 9 + + + 4 + + 9 + + + 5 + + 10 + + + 6 + + 10 + Max + + diff --git a/src/teaching/cse-562/2021sp/slide/2021-03-30/Window-MaxTree.svg b/src/teaching/cse-562/2021sp/slide/2021-03-30/Window-MaxTree.svg new file mode 100644 index 00000000..9eaf6215 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-03-30/Window-MaxTree.svg @@ -0,0 +1,813 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + Jan + + + + Feb + + + + Mar + + + + Apr + + + + May + + + + Jun + + + + Jan + + + + Jan + + + + Mar + + + + May + + + + May + + 5 + 9 + 6 + 9 + 1 + 5 + 2 + 4 + 3 + 10 + 5 + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 6 + + Max + + + + + 9 + + + + + 9 + 9 + + + + 5 + + + + + + + + 4 + 10 + 5 + + + + + + diff --git a/src/teaching/cse-562/2021sp/slide/2021-03-30/Window-Sum.svg b/src/teaching/cse-562/2021sp/slide/2021-03-30/Window-Sum.svg new file mode 100644 index 00000000..ccbfe7c0 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-03-30/Window-Sum.svg @@ -0,0 +1,701 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + Jan + + + + Feb + + + + Mar + + + + Apr + + + + May + + + + Jun + + + + Jan + + + + Jan + + + + Mar + + + + May + + + + May + + 5 + 9 + 6 + 9 + 1 + 5 + 2 + 4 + 3 + 10 + 5 + + + + + 1 + + 20 + + + 2 + + 29 + + + 3 + + 35 + + + 4 + + 17 + + + 5 + + 25 + + + 6 + + 24 + Sum + + diff --git a/src/teaching/cse-562/2021sp/slide/2021-03-30/Windows.svg b/src/teaching/cse-562/2021sp/slide/2021-03-30/Windows.svg new file mode 100644 index 00000000..81ae48ea --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-03-30/Windows.svg @@ -0,0 +1,469 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + Jan + + + + Feb + + + + Mar + + + + Apr + + + + May + + + + Jun + + + + Jan + + + + Jan + + + + Mar + + + + May + + + + May + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 6 + + +