diff --git a/src/teaching/cse-562/2021sp/checkpoint1.erb b/src/teaching/cse-562/2021sp/checkpoint1.erb index 75f95ef2..788f1128 100644 --- a/src/teaching/cse-562/2021sp/checkpoint1.erb +++ b/src/teaching/cse-562/2021sp/checkpoint1.erb @@ -202,7 +202,7 @@ case class AttributeReference( That all being said, here are unresolved nodes you can expect to encounter:



 case class UnresolvedRelation(
     nameElements: Seq[String], 
@@ -224,7 +224,7 @@ case class ____(____)
 Note that AttributeSet is a subclass of Seq[Attribute].  In general, the  output field should be given as a sequence of AttributeReferences (see above).



 case class UnresolvedStar(target: Option[Seq[String]])
diff --git a/src/teaching/cse-562/2021sp/index.erb b/src/teaching/cse-562/2021sp/index.erb index 9d453a19..8dd9d4b7 100644 --- a/src/teaching/cse-562/2021sp/index.erb +++ b/src/teaching/cse-562/2021sp/index.erb @@ -14,26 +14,31 @@ schedule: - date: "Feb. 9" topic: "Relational Algebra + Spark" materials: + lecture: https://youtu.be/xnJNTTirgoY slides: slide/2021-02-09-RA-Basics-and-Spark.html - date: "Feb. 11" topic: "Relational Algebra Equivalence Rules" materials: + lecture: https://youtu.be/IJLLCB6tdCk slides: slide/2021-02-11-RA-Equivs.html - date: "Feb. 16" topic: "Algorithms, Checkpoint 1" due: "Checkpoint 0" materials: checkpoint1: "checkpoint1.html" + slides: slide/2021-02-16-Checkpoint1.html - date: "Feb. 18" - topic: "Extended Relational Algebra" + topic: "Relational Algebra Algorithms" + materials: + slides: slide/2021-02-18-QueryAlgorithms.html - date: "Feb. 23" - topic: "Physical Data Layout" + topic: "Extended Relational Algebra" - date: "Feb. 25" - topic: "Indexes: Tree-Based" + topic: "Physical Data Layout" - date: "Mar. 2" - topic: "Indexes: Hash, View-Based" + topic: "Indexes: Tree-Based, Hash" - date: "Mar. 4" - topic: "Indexes: Modern" + topic: "Indexes: View-Based, Modern" - date: "Mar. 9" topic: "Spark's Optimizer + Checkpoint 2" due: "Checkpoint 1" diff --git a/src/teaching/cse-562/2021sp/slide/2021-02-16-Checkpoint1.erb b/src/teaching/cse-562/2021sp/slide/2021-02-16-Checkpoint1.erb new file mode 100644 index 00000000..63fe996d --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-02-16-Checkpoint1.erb @@ -0,0 +1,785 @@ +--- +template: templates/cse4562_2021_slides.erb +title: "Checkpoint 1" +date: February 16, 2021 +textbook: "Ch. 16.1" +--- + + +

Checkpoint 1

+ + + + + + sif$ + + scalac -cp build:Catalyzer.jar -jar submission.jar {all .java files} + sif$ + + + + ls data/ + R.data      S.data     T.data + sif$ + + + head -n 2 data/R.data + 1|3|5 + 2|9|1 + sif$ + + + scala -cp submission.jar:Catalyzer.jar microbase.Microbase + + + $> + + + CREATE TABLE R(A int, B int, C int); + $> + + + SELECT A, C FROM R WHERE B < 5; + + + 1|5 + ... + $> + + +

Checkpoint 1

  • Your code is compiled just the same as in Checkpoint 0.
  • +
  • Print a prompt '$>' at the start and after each command.
  • +
  • Read one command per line.
  • +
  • CREATE TABLE statements tell you the schema of each table.
  • +
  • Data lives in a '|'-separated file in 'data/[tablename].data'
  • +
  • Print query results '|'-separated
  • +
+ +


+ + Add this to your build.sbt (modify as appropriate for your IDE) + +

+  resolvers += "MimirDB" at "https://maven.mimirdb.info/"
+  libraryDependencies += "edu.buffalo.cse.odin" %% "catalyzer" % "3.0"
+ +

Docs: https://doc.odin.cse.buffalo.edu/catalyzer/


Code: https://gitlab.odin.cse.buffalo.edu/okennedy/catalyzer

+ +
+ + + + → SQL + + +
+  ID string, 
+  FIRSTNAME string, 
+  LASTNAME string, 
+  WEIGHT int, 
+ +
+ + + + → SQL + + +

+import org.apache.spark.sql.execution.SparkSqlParser
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+  def parseSql(sql: String): LogicalPlan = 
+    new SparkSqlParser().parsePlan(sql)
+ +
+ + + + → SQL + → ? + + +

+  plan match {
+    case c:CreateTableStatement => 
+      /* do something with c.name, c.tableSchema */
+    case _ => 
+      /* Interpret plan like a query */
+  }
+ +
+ + + + → SQL + + + CREATE TABLE + + + + SELECT + + + +
+  ID string, 
+  FIRSTNAME string, 
+  LASTNAME string, 
+  WEIGHT int, 
+↓ +

There is a table named "PLAYERS"... +

  • ... with 7 attributes
  • +
  • ... who's attributes have the given types
  • +
  • ... with data in the file "data/PLAYERS.data"
  • +

+ + + + → SQL + + + CREATE TABLE + + + + + Saved Schema + + + + .data + + + + + SELECT + + + +
+ + + + → SQL + + + CREATE TABLE + + + + + Saved Schema + + + + .data + + + + + + + + SELECT + + + + + + + Results + + + + + +

Example Queries

  1. SELECT A, B, ... FROM R (Project)
  2. +
  3. SELECT A, B, ... FROM R WHERE ... (Project+Filter)
  4. +
  5. SELECT A+B AS C, ... FROM R (Map)
  6. +
  7. SELECT A+B AS C, ... FROM R WHERE ... (Map+Filter)
  8. +
  9. SELECT SUM(A+B) AS C, ... FROM R (Aggregate)
  10. +
  11. SELECT SUM(A+B) AS C, ... FROM R WHERE ... (Aggregate+Filter)
  12. +
+ + + + → SQL + + + CREATE TABLE + + + + + Saved Schema + + + + .data + + + + + + + + SELECT + + + + + + + Results + + + + + + +

Spark's Workflow

+ +
  1. Analysis
  2. +
  3. Optimization
  4. +
  5. Physical Planning
  6. +
  7. Code Generation
  8. +
  9. Execution
  10. +
+ +
+ + + + → SQL + + + CREATE TABLE + + + + + Saved Schema + + + + .data + + + + + SELECT + + + + + + + LogicalPlan + + + + + + + + + + + + + Analyzed Plan + + + + + + + Iterators + + + + + + + + + + + + + Results + + + + + + + + + + + +
+ +


+ +
  • Replace placeholder values from parsing.
  • +
  • "Wire up" attributes between operators.
  • +
  • Ensure all of the types line up.
  • +
+ +


+ +

+  case class UnresolvedRelation(
+    nameElements: Seq[String], 
+    options: CaseInsensitiveStringMap, 
+    isStreaming: Boolean
+  )

Separation of concerns: The parser doesn't know what tables have been defined.

+ +

Try It


+    println(
+      parser.parsePlan("SELECT * FROM R").treeString
+    )
+ ↓ +

+'Project [*]
++- 'UnresolvedRelation [R], [], false
+ +

+  Project(Seq(UnresolvedStar(None)), 
+    UnresolvedRelation(Seq("R"), CaseInsensitiveStringMap.empty, false)
+  )
+ +

The interesting thing here is the nameElements field
(Seq("R") above)

+ +

This is a sequence to handle multipart names
(e.g., source.tableSeq("source", "table"))

+ +

Replacing Placeholders


+  import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+  ...
+  plan.transform { 
+    case UnresolvedRelation(nameElements, _, _) => ???
+  }
+ +

By the way, ??? is valid Scala.
It means "I haven't implemented this yet".

+ +

So what goes there?

+ +

Suggested Approach


+  import org.apache.spark.sql.catalyst.plans.logical.LeafNode
+  import org.apache.spark.sql.catalyst.expressions.AttributeSequence
+  class Table( /* parameters */ ) extends LeafNode
+  {
+    def output: AttributeSequence = ???
+  }
+ +


+ +





+ +





+ +

+  case class AttributeReference(
+      name: String,
+      dataType: DataType,
+      nullable: Boolean = true,
+      override val metadata: Metadata = Metadata.empty
+    )(
+      val exprId: ExprId = NamedExpression.newExprId,
+      val qualifier: Seq[String] = Seq.empty[String]
+    ) extends Attribute with Unevaluable { ... }
+ +

Simple Constructor: AttributeReference(name, dt)()


ExprId Constructor: AttributeReference(name, dt)(id)

+ +


+ +

+    AttributeReference("a", IntType)().equals(
+      AttributeReference("a", IntType)())
+ +

returns false

+ +

Spark uses exprId equivalence to check whether two attributes are the same.

+ +


+ +

+    val id = NamedExpression.newExprId
+    AttributeReference("a", IntType)(id).equals(
+      AttributeReference("a", IntType)(id))
+ +

returns true

+ +


+ +

+    AttributeReference("foo", IntType)(
+      qualifier = Seq("bar")
+    )
+ +

represents bar.foo

+ +

You don't need to use this, but Spark already uses it, and it helps during analysis.

+ +

Why Do Analysis?

+ +
  • Makes output work automagically on all existing LogicalPlan nodes.
  • +
  • Makes dataType work automagically on all existing Expression nodes.
  • +
  • Makes it easier to support eval on all existing Expression nodes.
  • +
+ +
+ +

Evaluating Expressions

+ +

+  import org.apache.spark.sql.catalyst.InternalRow
+  ...
+  def eval(input: InternalRow): Any = ???  
+ +

Input: An InternalRow


Output: The result of evaluating the expression

+ +

eval is implemented for most existing Spark Expression nodes.

+ +

+  case class AttributeReference(
+      name: String,
+      dataType: DataType,
+      nullable: Boolean = true,
+      override val metadata: Metadata = Metadata.empty
+    )(
+      val exprId: ExprId = NamedExpression.newExprId,
+      val qualifier: Seq[String] = Seq.empty[String]
+    ) extends Attribute with Unevaluable { ... }
+ +

Unevaluable, huh?

+ +

+  abstract class InternalRow extends SpecializedGetters with Serializable {
+    boolean getBoolean(int ordinal);
+    byte getByte(int ordinal);
+    short getShort(int ordinal);
+    int getInt(int ordinal);
+    long getLong(int ordinal);
+    float getFloat(int ordinal);
+    double getDouble(int ordinal);
+    ...
+  }

InternalRow is basically just an Array[Any]


AttributeReference doesn't know which position the attribute will be at.

+ +

Suggested Approach

+ +

Make your own RowLookup subclass of Expression

+ +

Why not use this class instead of AttributeReference in the first place?

+ +

Once we start optimizing, optimization rules (e.g., Projection Pushdown) can change an attribute's position.

+ +

Evaluating LogicalPlan nodes

+ +

Separation of concerns: Look at each LogicalPlan node individually.

+ +

Naive approach: Compute the full result.

+ +

+  def eval(plan: LogicalPlan): Seq[InternalRow] =
+    plan match { 
+      case Project(targets, child) => 
+        evalProject(targets, eval(child))
+      case Filter(condition, child) => 
+        evalFilter(targets, eval(child))
+      ...
+    }
+  def evalProject(targets: Seq[Expression], 
+                  table: Seq[InternalRow]): Seq[InternalRow] = ???
+  def evalFilter(condition: Expression, 
+                 table: Seq[InternalRow]): Seq[InternalRow] = ???
+  ...
+ +

Basic Mindset

+ +

+  r = readCSVFile("R")
+  s = readCSVFile("S")
+  temp1 = evalCrossProduct(r, s)
+  temp2 = evalFilter({R.B=S.B AND S.C=10}, 
+                     temp1)
+  result = evalProject(Seq( {R.A} ), 
+                       temp2)
+ +


+ +

+ $$\sigma_{A \neq 3} R$$ +

+ + + + + +
+ +


+ +

+  def evalFilter(condition: Expression, input: Seq[InternalRow]) =
+      input.filter { row =>
+        condition.eval(row).asInstanceOf[Boolean] 
+      }


+ +

Problem: A "table" can get very very big.

+ +

Better Idea: Iterators

Returns true if there are more rows to return
Returns the next row
Resets the iterator back to the first row

All "functions" can be implemented as iterators that use constant space



+ +

+ $$\sigma_{A \neq 3} R$$ +

+ + + + + + + + + +
getNext()for row in input:
12return row;
getNext()for row in input:
56return row;
getNext()for row in input:
Nonereturn None;
+ +

Hint: Scala makes working with iterators very easy

+ +


+ +

Example: Join (Naive)


+      for r in R:
+        for s in S:
+          emit(merge(r, s))
+ +

Project challenge: Implement this as an iterator


'|'-separated Value File Suggestions

  • Use Scala's scala.io.Source's lineIterator() method
  • +
  • Use String's split() to separate fields.
  • +
  • Parse everything upfront
  • +
  • Iterate over InternalRow
  • +
  • Use InternalRow.fromSeq to create rows.
  • +
  • For Codd's sake, don't store entire tables in memory
  • +
+ + + + + + + +
SQL TypeSpark typeScala Type

it's org.apache.spark.unsafe.types.UTF8String

+ +


+ +

Next time...

+ +

Algorithms for Basic RA

+ diff --git a/src/teaching/cse-562/2021sp/slide/2021-02-18-QueryAlgorithms.erb b/src/teaching/cse-562/2021sp/slide/2021-02-18-QueryAlgorithms.erb new file mode 100644 index 00000000..50fa456d --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-02-18-QueryAlgorithms.erb @@ -0,0 +1,489 @@ +--- +template: templates/cse4562_2021_slides.erb +title: "Algorithms for Queries" +date: February 18, 2021 +textbook: "Ch. 15.1-15.5, 16.7" +--- + + + + +

Query Evaluation Styles

+ +
All-At-Once (Collections)
Bottom-up, one operator at a time.
+ +
Volcano-Style (Iterators)
Operators "request" one tuple at a time from children.
+ +
Push-Style (Buffers)
Operators continuously produce/consume tuples.
+ +

Analyzing Volcano Operators

+ +
  • Memory Bounds
  • +
  • Disk IO Used
  • +
  • CPU Used
  • +
+ +

Databases are usually IO- or Memory-bound

+ +

Memory Bounds

+ +
  • Constant
  • +
  • Scales with output
  • +
  • Scales with part of the input
  • +
  • Worse
  • +
+ +

Core Question: Do we have enough memory to use this operator?

+ +

Disk IO

+ +

IO measured in:

  • Number of Tuples
  • +
  • Number of Data Pages (absolute size)
  • +




Figure out the cost of each individual operator.


Only count the number of IOs added by each operator.

+ +


+ +

We'll be discussing the "default" algorithm for each operator.

+ +

Often, there are many algorithms, some of which cover multiple operators.

+ +

This is why Spark has a PhysicalPlan

+ +

In the suggested Iterator-based approach the Iterators are your PhysicalPlan

+ +

Table Scan ($R$)

Memory Required?
+ +
IOs added?
$|R|$ tuples read
+ +

Select ($\sigma(R)$)

+ +

Select ($\sigma(R)$)

Memory Required?
+ +
IOs added?
None! (Can "inline" into cost of $R$)
+ +


+ +

Example, assume $R$ is 100 tuples.

+ +

How many IOs do we need to compute $Q := R$


How many IOs do we need to compute $Q := \sigma(R)$

+ +

Project ($\pi(R)$)

+ +

Project ($\pi(R)$)

Memory Required?
+ +
IOs added?
+ +


+ +

Example, assume $R$ is 100 tuples.

+ +

How many IOs do we need to compute $Q := \pi(R)$


How many IOs do we need to compute $Q := \pi(\sigma(R))$

+ +

Projection and Selection do not add IO.

+ + +

Union ($R \cup S$)

+ +

Union ($R \cup S$)

Memory Required?
+ +
IOs added?
+ +

Cross ($R \times S$)

+ +

Cross ($R \times S$)

Memory Required?
It depends
+ +
IOs added?
It depends

Cross ($R \times S$)


How do you "reset" $S$?

+ +
"Materialize" S into memory
No extra IOs (but $O(|S|)$ memory)
Rerun the entire iterator
$(|R|-1) \cdot \texttt{cost}(S)$ extra tuples read
"Materialize" S onto disk
$|S|$ tuples written
$(|R|-1) \cdot |S|$ extra tuples read

This can get very expensive

+ +


+ +

Example, assume $R$ and $S$ are both 100 tuples.

+ +

How many IOs do we need to compute $Q := R \cup S$?

  1. Getting an Iterator on $R$: 100 tuples
  2. +
  3. Getting an Iterator on $S$: 100 tuples
  4. +
  5. Getting an Iterator on $R \cup S$ using the above iterators: 0 extra tuples
  6. +
+ +


+ +

Example, assume $R$ is 20 tuples and $S$ is 100 tuples.

+ +

How many IOs do we need to compute $Q := R \times S$?

  1. Getting an Iterator on $R$: 100 tuples
  2. +
  3. Getting an Iterator on $S$: 20 tuples
  4. +
  5. Getting an Iterator on $R \times S$ using the above iterators:
  6. +
+ +
  • Memory: 0 extra tuples
  • +
  • Replay: $(|R|-1) \times \texttt{cost}(S) = 19 \times 100 = 1900$ extra tuples
  • +
  • Cache: $|R| \times |S| = 20 \times 100 = 2000$ extra tuples
  • +
+ +

Best Total Cost $100 + 20 + 1900 = 2020$

+ +


+ +

Example, assume $R$ is 20 tuples and $S$ is 100 tuples,
and $c$ filters out 90% of tuples.

+ +

How many IOs do we need to compute $Q := R \times \sigma_c(R \times S)$

  1. Getting an Iterator on $\sigma_c(R \times S)$: 2020 tuples
  2. +
  3. Getting an Iterator on $R$: 20 tuples
  4. +
  5. Getting an Iterator on $R \times \sigma_c(R \times S)$ using the above iterators:
  6. +
+ +
  • Memory: 0 extra tuples
  • +
  • Replay: $(|R|-1) \times \texttt{cost}(\sigma_c(R \times S)) = 19 \times 2020 = 38380$ extra tuples
  • +
  • Cache: $|R| \times |S| = 20 \times 200 = 4000$ extra tuples
  • +
+ +

Best Total Cost $2020 + 20 + 4000 = 6040$

+ +

Is there a middle ground?

+ +
+ +

Nested-Loop Join

+ +
+ +

Problem: We need to evaluate rhs iterator
once per record in lhs

+ +

Preloading Data

+ +

Better Solution: Load both lhs and rhs records in blocks.

+ +

+                    def apply_cross(lhs, rhs):
+                      result = []
+                      while r_block = lhs.take(100):
+                        while s_block = rhs.take(100):
+                          for r in r_block:
+                            for s in s_block: 
+                              result += [r + s]
+                        rhs.reset()
+                      return result
+ +

Block-Nested Loop Join

+ +
+ +

Block-Nested Loop ($R \times S$)

+ +

(with $\mathcal B$ as the block size for $R$)


(and with caching $S$ to disk)

+ +
Memory Required?
$O(\mathcal B)$
+ +
IOs added?
$|S|$ tuples written.
$(\frac{|R|}{\mathcal B} - 1) \cdot |S|$ tuples read.

In-memory caching is a special case of block-nested loop with $\mathcal B = |S|$


Does the block size for $R$ matter?

+ +

How big should the blocks be?

+ + +
+ +

Cross product is expensive!
Can we do better?


$\sigma_c(R\times S) \equiv R\bowtie_c S$

+ +

Cross Product

+ +

Problem: Naively, any tuple matches any other

+ +

Join Conditions

+ +

Solution: First organize the data

+ +
+ +
+ +

Strategies for Implementing $R \bowtie_{R.A = S.A} S$

+ +
In-Memory Index Join (1-pass Hash; Hash Join)
Build an in-memory index on one table, scan the other.
+ +
Partition Join (2-pass Hash; External Hash Join)
Partition both sides so that tuples don't join across partitions.
+ +
Sort/Merge Join
Sort all of the data upfront, then scan over both sides.
+ +

Hash Functions

+ +
  • A hash function is a function that maps a large data value to a small fixed-size value
    • Typically is deterministic & pseudorandom
    • +
  • +
  • Used in Checksums, Hash Tables, Partitioning, Bloom Filters, Caching, Cryptography, Password Storage, …
  • +
  • Examples: MD5, SHA1, SHA2
    • MD5() part of OpenSSL (on most OSX / Linux / Unix)
    • +
  • +
  • Can map h(k) to range [0,N) with h(k) % N (modulus)
  • +
+ +

Hash Functions

+ +

+ $$h(X) \mod N$$ + +

  • Pseudorandom output between $[0, N)$
  • +
  • Always the same output for a given $X$
  • +

+ +

1-Pass Hash Join

+ +
+ +

1-Pass Hash Join

Limited Queries
Only supports join conditions of the form $R.A = S.B$
+ +
Moderate-High Memory
Keeps 1 full relation in memory
+ +
Low Added IO Cost
Only requires 1 scan over each input.
+ +

Alternative: Build an in-memory tree (e.g., B+Tree) instead of a hash table!

+ +
Limited Queries
Also supports $R.A \geq S.B$, $R.A > S.B$
+ +
Moderate-High Memory
Keeps 1 full relation in memory
+ +
Low Added IO Cost
Only requires 1 scan over each input.
+ +

2-Pass Hash Join

+ +
+ +

2-Pass Hash Join

Limited Queries
Only supports join conditions of the form $R.A = S.B$
+ +
Low Memory
Never need more than 1 pair of partitions in memory
+ +
High IO Cost
$|R| + |S|$ tuples written out
$|R| + |S|$ tuples read in
+ +

Why is it important that the hash function is pseudorandom?

+ +

What if the data is already organized (e.g., sorted) in a useful way?

+ + +
+ +

Sort/Merge Join

+ +
+ +

Sort/Merge Join

Limited Queries
Only supports join conditions of the form $R.A = S.B$
+ +
Low Memory
Only needs to keep ~2 rows in memory at a time (not counting sort).
+ +
Low Added IO Cost
No added IO! (not counting sort).
+ +

Next time...

+ +

Extended Relational Algebra

+ diff --git a/src/teaching/cse-562/2021sp/slide/2021-02-25-PhysicalLayout.erb b/src/teaching/cse-562/2021sp/slide/2021-02-25-PhysicalLayout.erb new file mode 100644 index 00000000..a5c87452 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-02-25-PhysicalLayout.erb @@ -0,0 +1,258 @@ +--- +template: templates/cse4562_2021_slides.erb +textbook: "Ch. 13.1-13.7, 15.7, 16.7" +date: February 25, 2021 +title: "Physical Layout & Memory Management" +--- +
+ +
+ + + + + + + +
'08/27/2015'180683348711'POINT (-73.84421521958048 40.723091773924274)'30'OnCurb''Alive''Fair''Acer rubrum''red maple''None''None''NoDamage''TreesCount Staff''None''No''No''No''No''No''No''No''No''No''108-005 70 AVENUE''11375''Forest Hills'4064'Queens'292816'QN17''Forest Hills'4073900'New York'40.72309177-73.844215221027431.14821202756.768749
'09/03/2015'200540315986'POINT (-73.81867945834878 40.79411066708779)'210'OnCurb''Alive''Fair''Quercus palustris''pin oak''None''None''Damage''TreesCount Staff''Stones''Yes''No''No''No''No''No''No''No''No''147-074 7 AVENUE''11357''Whitestone'4074'Queens'192711'QN49''Whitestone'4097300'New York'40.79411067-73.818679461034455.70109228644.837379
'09/05/2015'204026218365'POINT (-73.93660770459083 40.717580740099116)'30'OnCurb''Alive''Good''Gleditsia triacanthos var. inermis''honeylocust''1or2''None''Damage''Volunteer''None''No''No''No''No''No''No''No''No''No''390 MORGAN AVENUE''11211''Brooklyn'3013'Brooklyn'345018'BK90''East Williamsburg'3044900'New York'40.71758074-73.93660771001822.83131200716.891267
'09/05/2015'204337217969'POINT (-73.93445615919741 40.713537494833226)'100'OnCurb''Alive''Good''Gleditsia triacanthos var. inermis''honeylocust''None''None''Damage''Volunteer''Stones''Yes''No''No''No''No''No''No''No''No''1027 GRAND STREET''11211''Brooklyn'3013'Brooklyn'345318'BK90''East Williamsburg'3044900'New York'40.71353749-73.934456161002420.35833199244.253136
'08/30/2015'189565223043'POINT (-73.97597938483258 40.66677775537875)'210'OnCurb''Alive''Good''Tilia americana''American linden''None''None''Damage''Volunteer''Stones''Yes''No''No''No''No''No''No''No''No''603 6 STREET''11215''Brooklyn'3063'Brooklyn'394421'BK37''Park Slope-Gowanus'3016500'New York'40.66677776-73.97597938990913.775046182202.425999

+ + +

Record Layouts

+ +

How is data stored?

+ +

Problem 1: How should you encode one tuple?

+ +

Record Layout 1: Fixed

+ +
+ +

Record Layout 2: Delimiters

+ +
+ +

Record Layout 3: Headers

+ +
+ +

Record Formats

Constant-size fields. Field $i$ at byte $\sum_{j < i} |Field_j|$
Special character or string (e.g., ,) between fields
Fixed-size header points to start of each field
+ +

Problem 2: How should you encode a file of tuples?

+ +

File Formats

Constant-size records. Record $i$ at byte $|Record| \times i$
Special character or string (e.g., \r\n) at record end
Index in file points to start of each record
Align records to paging boundaries
+ +
+ +
+ +
+ + openclipart.org +
+ +
A collection of pages (or records)
A fixed-size collection of records
Page size is usually dictated by hardware.
Mem Page $\approx$ 4KB   Cache Line $\approx$ 64B
One or more fields (for now)
A primitive value (for now)
+ +

Problem 2.b: How should you store records in a page?

+ +

Goal 1: Where is record $X$?


Goal 2: Support updates/deletions

+ +

Fixed size records

+ +
+ +

What about variable-size records?

+ +
+ +

Why store the key and records from opposite ends?

+ +
+ +
+ +

Problem 3: How should you organize pages in a file?


Key question: What happens when all records on a page are deleted?


Idea: Track empty pages.

+ +
+ +
+ +

An Alternative Layout

+ +

Row-Wise Layouts

+ +

Column-Wise Layouts

+ +
+ +

Each file stores 2-tuples $\left< RowID, Value\right >$.


Values only for one attribute.

+ +


  • Only one attribute to sort per file.
  • +
  • No IO cost for unused attributes ($\pi$-pushdown!)
  • +


  • Result attributes must be stitched back together ($\bowtie$)
  • +

Great for wide, rarely-updated tables where only a few attributes are used per-query

+ +

Example Column Stores

+ +

+ Cassandra logo + Vertica pos blk rgb.svg + MonetDB logo +

+ By Apache Software Foundation - https://svn.apache.org/repos/asf/cassandra/logo/cassandra.svg, Apache License 2.0, Link
+ By Ariolica - Own work, CC BY-SA 4.0, Link
+ By Source (WP:NFCC#4), Fair use, Link
+ +

Buffer Manager


Abstract the messy details of File-IO

+ +
+ + openclipart.org +
+ +
A "slot" managed by the buffer manager that holds one page.
+ +
Pinned Page
A page currently in use by part of the database. Must stay in its current frame until unpinned. (A page may be pinned multiple times)
+ +
Dirty Page
A page that has been modified since it was last read in.
+ +

When a page is requested

+ +

Is the page in the buffer pool? +

  • Yes? Pin the page (again) and return the address.
  • +
  • No?
    • Pick a frame for replacement with your favorite algorithm (e.g., LRU)...
    • +
    • If the frame is dirty, write it to disk
    • +
    • Read requested page into chosen frame
    • +
    • Pin the page and return its address
    • +
  • +
+ +

Does this all sound familiar?


Isn't this just Virtual Memory?

+ +



(Many databases use memory-mapped files as a buffer manager)

+ +

Why Re-implement VMem?

+ +

Databases can predict the future!


SELECT * FROM R WHERE A > 500 AND A < 2000   →    Pages 10-12

+ +

How do we decide which pages hold the query results?


Answers next class!

+ +
diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-16-RA-Tree.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-16-RA-Tree.svg new file mode 100644 index 00000000..910a0882 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-16-RA-Tree.svg @@ -0,0 +1,150 @@ + + + + + + + + + + image/svg+xml + + + + + + + σS.C=10 + πR.A + R.B=S.B + + R + S + + + + + + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Cross.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Cross.svg new file mode 100644 index 00000000..df8d8209 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Cross.svg @@ -0,0 +1,423 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + start + + Have Old 'r'? + + no + Read LHS Row 'r'and Reset RHS + + + + Read RHS Row 's' + not empty + + + + not empty + Return <r s> + + + + yes + + + + Done + + empty + + + + empty + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Project.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Project.svg new file mode 100644 index 00000000..dbe2612e --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Project.svg @@ -0,0 +1,252 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + Read One Row + + not empty + Compute New Row + + + + Return Row + + + + Done! + + empty + + start + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Select.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Select.svg new file mode 100644 index 00000000..43e07a21 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Select.svg @@ -0,0 +1,285 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + Read One Row + + not empty + Check Condition + + + + satisfied + Return Row + + + + + unsatisfied + + + Done! + + empty + + start + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Union.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Union.svg new file mode 100644 index 00000000..26435b5b --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Union.svg @@ -0,0 +1,372 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + start + Read LHS Row + + not empty + Return Row + + + + Read RHS Row + + empty + + + not empty + + + + Done! + + empty + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-1PassHash.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-1PassHash.svg new file mode 100644 index 00000000..8fa80c73 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-1PassHash.svg @@ -0,0 +1,430 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + h(A) = 0 + h(A) = 1 + + + + + 1 + + + + 1 + + + + 3 + + + + 3 + + + + 2 + + + + 2 + + + + 1 + + + + 1 + + + + + + + 3 + + + + + 2 + + + + + 1 + + + + + 3 + + + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-2PassHash.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-2PassHash.svg new file mode 100644 index 00000000..d254f98f --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-2PassHash.svg @@ -0,0 +1,608 @@ + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + h(A) = 0 + h(A) = 1 + + + + + h(A) = 0 + h(A) = 1 + + + + + 1 + + + + 1 + + + + 3 + + + + 3 + + + + 2 + + + + 2 + + + + 1 + + + + 1 + + + + + + + 3 + + + + 3 + + + + 2 + + + + 2 + + + + 1 + + + + 1 + + + + 3 + + + + 3 + + + + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-BNLJ.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-BNLJ.svg new file mode 100644 index 00000000..3f28615b --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-BNLJ.svg @@ -0,0 +1,274 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-Grid.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-Grid.svg new file mode 100644 index 00000000..16848069 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-Grid.svg @@ -0,0 +1,368 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + 1 + 3 + 2 + 1 + + + + + + + + + 3 + 2 + 3 + 1 + + + + + + + + + 3 + 2 + 3 + 1 + 1 + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-NLJ.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-NLJ.svg new file mode 100644 index 00000000..777c5202 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-NLJ.svg @@ -0,0 +1,263 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ... + ... + ... + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-OrderGrid.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-OrderGrid.svg new file mode 100644 index 00000000..8c7c0c45 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-OrderGrid.svg @@ -0,0 +1,353 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + 3 + 3 + + + + + + 2 + + + + + + + + 1 + 1 + + + + + + + 1 + 1 + 2 + 3 + + + + + + + 1 + 2 + 3 + 3 + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-SortMerge.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-SortMerge.svg new file mode 100644 index 00000000..39f1c5e9 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-SortMerge.svg @@ -0,0 +1,303 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + 1 + + + + 1 + + + + + + + 1 + + + + 2 + + + + + + 2 + + + + + + 3 + + + + 3 + + + + + + 3 + + + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Buffer-Manager.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Buffer-Manager.svg new file mode 100644 index 00000000..8215b798 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Buffer-Manager.svg @@ -0,0 +1,577 @@ + + + +image/svg+xml +Higher levels of the DB +Disk Page +Free Frame +Pages allocated to frames as per + +page replacement policy + \ No newline at end of file diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Heap-File-1.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Heap-File-1.svg new file mode 100644 index 00000000..53f2874f --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Heap-File-1.svg @@ -0,0 +1,390 @@ + + + +image/svg+xml +ø +ø +Pages with + +Data +Empty +Pages +Directory +Page +Each page contains 2 pointers plus data + \ No newline at end of file diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Heap-File-2.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Heap-File-2.svg new file mode 100644 index 00000000..0d791aeb --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Heap-File-2.svg @@ -0,0 +1,269 @@ + + + +image/svg+xml +Directory +Pages +Directories are a collection of pages (e.g., a linked list) + +Directories point to all data pages + +(entries can include # of free pages) + +Data + +Pages + \ No newline at end of file diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Layout-ColumnWise.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Layout-ColumnWise.svg new file mode 100644 index 00000000..14aa294f --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Layout-ColumnWise.svg @@ -0,0 +1,473 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + A1 + + + + B1 + + + + C1 + + + + D1 + + + + A2 + + + + B2 + + + + C2 + + + + D2 + + + + A3 + + + + B3 + + + + C3 + + + + D3 + + + + A4 + + + + B4 + + + + C4 + + + + D4 + + File 1 + File 2 + File 3 + File 4 + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Layout-RowWise.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Layout-RowWise.svg new file mode 100644 index 00000000..667af648 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Layout-RowWise.svg @@ -0,0 +1,416 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + A1 + + + + B1 + + + + C1 + + + + D1 + + + + A2 + + + + B2 + + + + C2 + + + + D2 + + + + A3 + + + + B3 + + + + C3 + + + + D3 + + + + A4 + + + + B4 + + + + C4 + + + + D4 + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Page-Layouts-1.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Page-Layouts-1.svg new file mode 100644 index 00000000..e44c25ec --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Page-Layouts-1.svg @@ -0,0 +1,729 @@ + + + +image/svg+xml +6 +1 +2 +3 +4 +5 +6 +7 +8 + +N +1 +2 +3 +4 +5 +6 +7 +8 + +N +N +01101011 + +Packed +Unpacked (Bitmap) +Number of Records +Bit array of occupied slots(and size of page) +Data Records +Free Space + \ No newline at end of file diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Page-Layouts-2.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Page-Layouts-2.svg new file mode 100644 index 00000000..7f1a4b42 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Page-Layouts-2.svg @@ -0,0 +1,385 @@ + + + +image/svg+xml1 2 3 4 … +R1 +R2 +R3 +Variable Size Records +Pointer to start of free space + \ No newline at end of file diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-mem_bulk_loading.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-mem_bulk_loading.svg new file mode 100644 index 00000000..8a8482ee --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-mem_bulk_loading.svg @@ -0,0 +1,9149 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Page + + + + Page + + + + Page + + + + Page + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Page + + + + Page + + + + Page + + + + Page + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Line + + + + Line + + + + Line + + + + Line + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Record + + + BAD! + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-mem_hierarchy.png b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-mem_hierarchy.png new file mode 100644 index 00000000..6f853456 Binary files /dev/null and b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-mem_hierarchy.png differ diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-fixed.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-fixed.svg new file mode 100644 index 00000000..2333ecf7 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-fixed.svg @@ -0,0 +1,209 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + A + B + C + D + + Base Address (X) + + + + + + Address of C (X+|A|+|B|) + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-header.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-header.svg new file mode 100644 index 00000000..6db27914 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-header.svg @@ -0,0 +1,372 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + A + + + + B + + + + C + + + + D + + + + |A| + + + + |B| + + + + |C| + + + + |H| + + Record header points to each field + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-separator.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-separator.svg new file mode 100644 index 00000000..00811f9b --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-separator.svg @@ -0,0 +1,288 @@ + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + A + + + + B + + + + C + + + + D + + + + ',' + + + + ',' + + + + ',' + + Special Separator Characters Delimit Fields + + + + +