diff --git a/src/teaching/cse-562/2021sp/checkpoint1.erb b/src/teaching/cse-562/2021sp/checkpoint1.erb index 75f95ef2..788f1128 100644 --- a/src/teaching/cse-562/2021sp/checkpoint1.erb +++ b/src/teaching/cse-562/2021sp/checkpoint1.erb @@ -202,7 +202,7 @@ case class AttributeReference( That all being said, here are unresolved nodes you can expect to encounter:

-
LogicalPlan
+

LogicalPlan

 case class UnresolvedRelation(
     nameElements: Seq[String], 
@@ -224,7 +224,7 @@ case class ____(____)
 Note that AttributeSet is a subclass of Seq[Attribute].  In general, the  output field should be given as a sequence of AttributeReferences (see above).
 

-
Expression
+

Expression

 case class UnresolvedStar(target: Option[Seq[String]])
 
diff --git a/src/teaching/cse-562/2021sp/index.erb b/src/teaching/cse-562/2021sp/index.erb index 9d453a19..8dd9d4b7 100644 --- a/src/teaching/cse-562/2021sp/index.erb +++ b/src/teaching/cse-562/2021sp/index.erb @@ -14,26 +14,31 @@ schedule: - date: "Feb. 9" topic: "Relational Algebra + Spark" materials: + lecture: https://youtu.be/xnJNTTirgoY slides: slide/2021-02-09-RA-Basics-and-Spark.html - date: "Feb. 11" topic: "Relational Algebra Equivalence Rules" materials: + lecture: https://youtu.be/IJLLCB6tdCk slides: slide/2021-02-11-RA-Equivs.html - date: "Feb. 16" topic: "Algorithms, Checkpoint 1" due: "Checkpoint 0" materials: checkpoint1: "checkpoint1.html" + slides: slide/2021-02-16-Checkpoint1.html - date: "Feb. 18" - topic: "Extended Relational Algebra" + topic: "Relational Algebra Algorithms" + materials: + slides: slide/2021-02-18-QueryAlgorithms.html - date: "Feb. 23" - topic: "Physical Data Layout" + topic: "Extended Relational Algebra" - date: "Feb. 25" - topic: "Indexes: Tree-Based" + topic: "Physical Data Layout" - date: "Mar. 2" - topic: "Indexes: Hash, View-Based" + topic: "Indexes: Tree-Based, Hash" - date: "Mar. 4" - topic: "Indexes: Modern" + topic: "Indexes: View-Based, Modern" - date: "Mar. 9" topic: "Spark's Optimizer + Checkpoint 2" due: "Checkpoint 1" diff --git a/src/teaching/cse-562/2021sp/slide/2021-02-16-Checkpoint1.erb b/src/teaching/cse-562/2021sp/slide/2021-02-16-Checkpoint1.erb new file mode 100644 index 00000000..63fe996d --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-02-16-Checkpoint1.erb @@ -0,0 +1,785 @@ +--- +template: templates/cse4562_2021_slides.erb +title: "Checkpoint 1" +date: February 16, 2021 +textbook: "Ch. 16.1" +--- + + +
+
+

Checkpoint 1

+ + + + + + sif$ + + scalac -cp build:Catalyzer.jar -jar submission.jar {all .java files} + sif$ + + + + ls data/ + R.data      S.data     T.data + sif$ + + + head -n 2 data/R.data + 1|3|5 + 2|9|1 + sif$ + + + scala -cp submission.jar:Catalyzer.jar microbase.Microbase + + + $> + + + CREATE TABLE R(A int, B int, C int); + $> + + + SELECT A, C FROM R WHERE B < 5; + + + 1|5 + ... + $> + + +
+
+

Checkpoint 1

+
    +
  • Your code is compiled just the same as in Checkpoint 0.
  • +
  • Print a prompt '$>' at the start and after each command.
  • +
  • Read one command per line.
  • +
  • CREATE TABLE statements tell you the schema of each table.
  • +
  • Data lives in a '|'-separated file in 'data/[tablename].data'
  • +
  • Print query results '|'-separated
  • +
+
+ +
+

Setup

+ + Add this to your build.sbt (modify as appropriate for your IDE) + +

+  resolvers += "MimirDB" at "https://maven.mimirdb.info/"
+  libraryDependencies += "edu.buffalo.cse.odin" %% "catalyzer" % "3.0"
+    
+ +

Docs: https://doc.odin.cse.buffalo.edu/catalyzer/

+

Code: https://gitlab.odin.cse.buffalo.edu/okennedy/catalyzer

+ +
+
+
+
+ + + + → SQL + + +
CREATE TABLE PLAYERS(
+  ID string, 
+  FIRSTNAME string, 
+  LASTNAME string, 
+  FIRSTSEASON int, 
+  LASTSEASON int, 
+  WEIGHT int, 
+  BIRTHDATE date
+);
+
+SELECT FIRSTNAME, LASTNAME, WEIGHT, BIRTHDATE 
+FROM PLAYERS WHERE WEIGHT>200;
+
+ +
+ + + + → SQL + + +

+import org.apache.spark.sql.execution.SparkSqlParser
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+...
+  def parseSql(sql: String): LogicalPlan = 
+    new SparkSqlParser().parsePlan(sql)
+...
+    
+
+ +
+ + + + → SQL + → ? + + +

+...
+  plan match {
+    case c:CreateTableStatement => 
+      /* do something with c.name, c.tableSchema */
+    case _ => 
+      /* Interpret plan like a query */
+  }
+...
+    
+
+ +
+ + + + → SQL + + + CREATE TABLE + + + + SELECT + + + +
CREATE TABLE PLAYERS(
+  ID string, 
+  FIRSTNAME string, 
+  LASTNAME string, 
+  FIRSTSEASON int, 
+  LASTSEASON int, 
+  WEIGHT int, 
+  BIRTHDATE date
+);
+↓ +

There is a table named "PLAYERS"... +

    +
  • ... with 7 attributes
  • +
  • ... who's attributes have the given types
  • +
  • ... with data in the file "data/PLAYERS.data"
  • +
+

+
+
+ + + + → SQL + + + CREATE TABLE + + + + + Saved Schema + + + + .data + + + + + SELECT + + + +
+ABDELAL01|Alaa|Abdelnaby|1990|1994|240|1968-06-24
+ABDULKA01|Kareem|Abdul-jabbar|1969|1988|225|1947-04-16
+ABDULMA01|Mahmo|Abdul-rauf|1990|2000|162|1969-03-09
+ABDULTA01|Tariq|Abdul-wahad|1997|2002|223|1974-11-03
+ABDURSH01|Shareef|Abdur-rahim|1996|2007|225|1976-12-11
+ABERNTO01|Tom|Abernethy|1976|1980|220|1954-05-06
+ABRAMJO01|John|Abramovic|1946|1947|195|1919-02-09
+ACKERAL01|Alex|Acker|2005|2008|185|1983-01-21
+ACKERDO01|Donald|Ackerman|1953|1953|183|1930-09-04
+ACRESMA01|Mark|Acres|1987|1992|220|1962-11-15
+ACTONCH01|Charles|Acton|1967|1967|210|1942-01-11
+...   
+
+
+ + + + → SQL + + + CREATE TABLE + + + + + Saved Schema + + + + .data + + + + + + + + SELECT + + + + + + + Results + + + + + +

Example Queries

+
    +
  1. SELECT A, B, ... FROM R (Project)
  2. +
  3. SELECT A, B, ... FROM R WHERE ... (Project+Filter)
  4. +
  5. SELECT A+B AS C, ... FROM R (Map)
  6. +
  7. SELECT A+B AS C, ... FROM R WHERE ... (Map+Filter)
  8. +
  9. SELECT SUM(A+B) AS C, ... FROM R (Aggregate)
  10. +
  11. SELECT SUM(A+B) AS C, ... FROM R WHERE ... (Aggregate+Filter)
  12. +
+
+
+ + + + → SQL + + + CREATE TABLE + + + + + Saved Schema + + + + .data + + + + + + + + SELECT + + + + + + + Results + + + + + + +

Spark's Workflow

+ +
    +
  1. Analysis
  2. +
  3. Optimization
  4. +
  5. Physical Planning
  6. +
  7. Code Generation
  8. +
  9. Execution
  10. +
+
+ +
+ + + + → SQL + + + CREATE TABLE + + + + + Saved Schema + + + + .data + + + + + SELECT + + + + + + + LogicalPlan + + + + + + + + + + + + + Analyzed Plan + + + + + + + Iterators + + + + + + + + + + + + + Results + + + + + + + + + + + +
+
+ +
+
+

Analysis

+ +
+
+
Resolution
+
+
    +
  • Replace placeholder values from parsing.
  • +
  • "Wire up" attributes between operators.
  • +
+
+
+
+
Validation
+
+
    +
  • Ensure all of the types line up.
  • +
+
+
+
+
+ +
+

Placeholders

+ +

+  case class UnresolvedRelation(
+    nameElements: Seq[String], 
+    options: CaseInsensitiveStringMap, 
+    isStreaming: Boolean
+  )
+    
+

Separation of concerns: The parser doesn't know what tables have been defined.

+
+ +
+

Try It

+

+    println(
+      parser.parsePlan("SELECT * FROM R").treeString
+    )
+  
+ ↓ +

+'Project [*]
++- 'UnresolvedRelation [R], [], false
+  
+
+ +
+

+  Project(Seq(UnresolvedStar(None)), 
+    UnresolvedRelation(Seq("R"), CaseInsensitiveStringMap.empty, false)
+  )
+    
+ +

The interesting thing here is the nameElements field
(Seq("R") above)

+ +

This is a sequence to handle multipart names
(e.g., source.tableSeq("source", "table"))

+
+ +
+

Replacing Placeholders

+

+  import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+  ...
+  plan.transform { 
+    case UnresolvedRelation(nameElements, _, _) => ???
+  }
+    
+ +

By the way, ??? is valid Scala.
It means "I haven't implemented this yet".

+ +

So what goes there?

+
+ +
+

Suggested Approach

+

+  import org.apache.spark.sql.catalyst.plans.logical.LeafNode
+  import org.apache.spark.sql.catalyst.expressions.AttributeSequence
+
+  class Table( /* parameters */ ) extends LeafNode
+  {
+    def output: AttributeSequence = ???
+  }
+    
+
+ +
+

Expressions

+
+ +
+

org.apache.spark.sql.catalyst.analysis.UnresolvedStar

+

+

Seq(AttributeReference)

+ +

org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute

+

+

AttributeReference

+
+ +
+

+  case class AttributeReference(
+      name: String,
+      dataType: DataType,
+      nullable: Boolean = true,
+      override val metadata: Metadata = Metadata.empty
+    )(
+      val exprId: ExprId = NamedExpression.newExprId,
+      val qualifier: Seq[String] = Seq.empty[String]
+    ) extends Attribute with Unevaluable { ... }
+    
+ +

Simple Constructor: AttributeReference(name, dt)()

+

ExprId Constructor: AttributeReference(name, dt)(id)

+
+ +
+

exprId

+ +

+    AttributeReference("a", IntType)().equals(
+      AttributeReference("a", IntType)())
+  
+ +

returns false

+ +

Spark uses exprId equivalence to check whether two attributes are the same.

+
+ +
+

exprId

+ +

+    val id = NamedExpression.newExprId
+    AttributeReference("a", IntType)(id).equals(
+      AttributeReference("a", IntType)(id))
+  
+ +

returns true

+
+ +
+

qualifiers

+ +

+    AttributeReference("foo", IntType)(
+      qualifier = Seq("bar")
+    )
+    
+ +

represents bar.foo

+ +

You don't need to use this, but Spark already uses it, and it helps during analysis.

+
+ +
+

Why Do Analysis?

+ +
    +
  • Makes output work automagically on all existing LogicalPlan nodes.
  • +
  • Makes dataType work automagically on all existing Expression nodes.
  • +
  • Makes it easier to support eval on all existing Expression nodes.
  • +
+
+ +
+ +
+
+

Evaluating Expressions

+ +

+  import org.apache.spark.sql.catalyst.InternalRow
+  ...
+  def eval(input: InternalRow): Any = ???  
+    
+ +

Input: An InternalRow

+

Output: The result of evaluating the expression

+ +

eval is implemented for most existing Spark Expression nodes.

+
+ +
+

+  case class AttributeReference(
+      name: String,
+      dataType: DataType,
+      nullable: Boolean = true,
+      override val metadata: Metadata = Metadata.empty
+    )(
+      val exprId: ExprId = NamedExpression.newExprId,
+      val qualifier: Seq[String] = Seq.empty[String]
+    ) extends Attribute with Unevaluable { ... }
+    
+ +

Unevaluable, huh?

+
+ +
+

+  abstract class InternalRow extends SpecializedGetters with Serializable {
+    boolean getBoolean(int ordinal);
+    byte getByte(int ordinal);
+    short getShort(int ordinal);
+    int getInt(int ordinal);
+    long getLong(int ordinal);
+    float getFloat(int ordinal);
+    double getDouble(int ordinal);
+    ...
+  }
+    
+

InternalRow is basically just an Array[Any]

+

AttributeReference doesn't know which position the attribute will be at.

+
+ +
+

Suggested Approach

+ +

Make your own RowLookup subclass of Expression

+ +

Why not use this class instead of AttributeReference in the first place?

+ +

Once we start optimizing, optimization rules (e.g., Projection Pushdown) can change an attribute's position.

+
+
+ +
+
+

Evaluating LogicalPlan nodes

+ +

Separation of concerns: Look at each LogicalPlan node individually.

+ +

Naive approach: Compute the full result.

+
+ +
+

+  def eval(plan: LogicalPlan): Seq[InternalRow] =
+    plan match { 
+      case Project(targets, child) => 
+        evalProject(targets, eval(child))
+      case Filter(condition, child) => 
+        evalFilter(targets, eval(child))
+      ...
+    }
+
+  def evalProject(targets: Seq[Expression], 
+                  table: Seq[InternalRow]): Seq[InternalRow] = ???
+  def evalFilter(condition: Expression, 
+                 table: Seq[InternalRow]): Seq[InternalRow] = ???
+  ...
+    
+
+ +
+

Basic Mindset

+ +

+  r = readCSVFile("R")
+
+  s = readCSVFile("S")
+  
+  temp1 = evalCrossProduct(r, s)
+  
+  temp2 = evalFilter({R.B=S.B AND S.C=10}, 
+                     temp1)
+  
+  result = evalProject(Seq( {R.A} ), 
+                       temp2)
+    
+
+ +
+

Select

+ +

+ $$\sigma_{A \neq 3} R$$ +

+ + + + + +
AB
12
34
56
+
+ +
+

Select

+ +

+  def evalFilter(condition: Expression, input: Seq[InternalRow]) =
+      input.filter { row =>
+        condition.eval(row).asInstanceOf[Boolean] 
+      }
+    
+

(All-At-Once)

+
+ +
+

Problem: A "table" can get very very big.

+
+ +
+

Better Idea: Iterators

+
+
hasNext()
+
Returns true if there are more rows to return
+
next()
+
Returns the next row
+
reset()
+
Resets the iterator back to the first row
+
+

All "functions" can be implemented as iterators that use constant space

+
+
+

Select

+ +

+ $$\sigma_{A \neq 3} R$$ +

+ + + + + + + + + +
AB
getNext()for row in input:
12return row;
getNext()for row in input:
34X
56return row;
getNext()for row in input:
Nonereturn None;
+
+ +
+

Hint: Scala makes working with iterators very easy

+
+ +
+

Joins

+
+ +
+

Example: Join (Naive)

+

+      for r in R:
+        for s in S:
+          emit(merge(r, s))
+    
+ +

Project challenge: Implement this as an iterator

+
+
+

'|'-separated Value File Suggestions

+
    +
  • Use Scala's scala.io.Source's lineIterator() method
  • +
  • Use String's split() to separate fields.
  • +
  • Parse everything upfront
  • +
  • Iterate over InternalRow
  • +
  • Use InternalRow.fromSeq to create rows.
  • +
  • For Codd's sake, don't store entire tables in memory
  • +
+
+
+ + + + + + + +
SQL TypeSpark typeScala Type
stringStringTypeUTF8String
intIntTypeInteger
floatFloatTypeFloat
decimalDoubleTypeDouble
dateDateTypejava.time.Date
+

it's org.apache.spark.unsafe.types.UTF8String

+
+
+ +
+

Questions?

+
+ +
+

Next time...

+ +

Algorithms for Basic RA

+
+ diff --git a/src/teaching/cse-562/2021sp/slide/2021-02-18-QueryAlgorithms.erb b/src/teaching/cse-562/2021sp/slide/2021-02-18-QueryAlgorithms.erb new file mode 100644 index 00000000..50fa456d --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-02-18-QueryAlgorithms.erb @@ -0,0 +1,489 @@ +--- +template: templates/cse4562_2021_slides.erb +title: "Algorithms for Queries" +date: February 18, 2021 +textbook: "Ch. 15.1-15.5, 16.7" +--- + + + + +
+
+

Query Evaluation Styles

+ +
+
All-At-Once (Collections)
+
Bottom-up, one operator at a time.
+ +
Volcano-Style (Iterators)
+
Operators "request" one tuple at a time from children.
+ +
Push-Style (Buffers)
+
Operators continuously produce/consume tuples.
+
+
+ +
+

Analyzing Volcano Operators

+ +
    +
  • Memory Bounds
  • +
  • Disk IO Used
  • +
  • CPU Used
  • +
+ +

Databases are usually IO- or Memory-bound

+
+ +
+

Memory Bounds

+ +
    +
  • Constant
  • +
  • Scales with output
  • +
  • Scales with part of the input
  • +
  • Worse
  • +
+ +

Core Question: Do we have enough memory to use this operator?

+
+ +
+

Disk IO

+ +

IO measured in:
+

    +
  • Number of Tuples
  • +
  • Number of Data Pages (absolute size)
  • +
+

+
+

Accounting

+

Figure out the cost of each individual operator.

+

Only count the number of IOs added by each operator.

+
+
+
+ +
+

Note

+ +

We'll be discussing the "default" algorithm for each operator.

+ +

Often, there are many algorithms, some of which cover multiple operators.

+ +

This is why Spark has a PhysicalPlan

+ +

In the suggested Iterator-based approach the Iterators are your PhysicalPlan

+
+ +
+
+

Table Scan ($R$)

+
+
Memory Required?
+
Constant!
+ +
IOs added?
+
$|R|$ tuples read
+
+
+ +
+

Select ($\sigma(R)$)

+ +
+
+

Select ($\sigma(R)$)

+
+
Memory Required?
+
Constant!
+ +
IOs added?
+
None! (Can "inline" into cost of $R$)
+
+
+ +
+

Example

+ +

Example, assume $R$ is 100 tuples.

+ +

How many IOs do we need to compute $Q := R$

+

How many IOs do we need to compute $Q := \sigma(R)$

+
+ +
+

Project ($\pi(R)$)

+ +
+
+

Project ($\pi(R)$)

+
+
Memory Required?
+
Constant!
+ +
IOs added?
+
None!
+
+
+ +
+

Example

+ +

Example, assume $R$ is 100 tuples.

+ +

How many IOs do we need to compute $Q := \pi(R)$

+

How many IOs do we need to compute $Q := \pi(\sigma(R))$

+ +

Projection and Selection do not add IO.

+
+ + +
+

Union ($R \cup S$)

+ +
+
+

Union ($R \cup S$)

+
+
Memory Required?
+
Constant!
+ +
IOs added?
+
None!
+
+
+ +
+

Cross ($R \times S$)

+ +
+
+

Cross ($R \times S$)

+
+
Memory Required?
+
It depends
+ +
IOs added?
+
It depends
+
+
+
+

Cross ($R \times S$)

+

How do you "reset" $S$?

+ +
+
"Materialize" S into memory
+
No extra IOs (but $O(|S|)$ memory)
+
Rerun the entire iterator
+
$(|R|-1) \cdot \texttt{cost}(S)$ extra tuples read
+
"Materialize" S onto disk
+
$|S|$ tuples written
+
$(|R|-1) \cdot |S|$ extra tuples read
+
+

This can get very expensive

+
+ +
+

Example

+ +

Example, assume $R$ and $S$ are both 100 tuples.

+ +

How many IOs do we need to compute $Q := R \cup S$?

+
    +
  1. Getting an Iterator on $R$: 100 tuples
  2. +
  3. Getting an Iterator on $S$: 100 tuples
  4. +
  5. Getting an Iterator on $R \cup S$ using the above iterators: 0 extra tuples
  6. +
+
+ +
+

Example

+ +

Example, assume $R$ is 20 tuples and $S$ is 100 tuples.

+ +

How many IOs do we need to compute $Q := R \times S$?

+
    +
  1. Getting an Iterator on $R$: 100 tuples
  2. +
  3. Getting an Iterator on $S$: 20 tuples
  4. +
  5. Getting an Iterator on $R \times S$ using the above iterators:
  6. +
+
+ +
+
    +
  • Memory: 0 extra tuples
  • +
  • Replay: $(|R|-1) \times \texttt{cost}(S) = 19 \times 100 = 1900$ extra tuples
  • +
  • Cache: $|R| \times |S| = 20 \times 100 = 2000$ extra tuples
  • +
+ +

Best Total Cost $100 + 20 + 1900 = 2020$

+
+ +
+

Example

+ +

Example, assume $R$ is 20 tuples and $S$ is 100 tuples,
and $c$ filters out 90% of tuples.

+ +

How many IOs do we need to compute $Q := R \times \sigma_c(R \times S)$

+
    +
  1. Getting an Iterator on $\sigma_c(R \times S)$: 2020 tuples
  2. +
  3. Getting an Iterator on $R$: 20 tuples
  4. +
  5. Getting an Iterator on $R \times \sigma_c(R \times S)$ using the above iterators:
  6. +
+
+ +
+
    +
  • Memory: 0 extra tuples
  • +
  • Replay: $(|R|-1) \times \texttt{cost}(\sigma_c(R \times S)) = 19 \times 2020 = 38380$ extra tuples
  • +
  • Cache: $|R| \times |S| = 20 \times 200 = 4000$ extra tuples
  • +
+ +

Best Total Cost $2020 + 20 + 4000 = 6040$

+
+ +
+

Is there a middle ground?

+
+
+ +
+ +
+

Nested-Loop Join

+ +
+ +
+

Problem: We need to evaluate rhs iterator
once per record in lhs

+
+ +
+

Preloading Data

+ +

Better Solution: Load both lhs and rhs records in blocks.

+ +

+                    def apply_cross(lhs, rhs):
+                      result = []
+
+                      while r_block = lhs.take(100):
+                        while s_block = rhs.take(100):
+                          for r in r_block:
+                            for s in s_block: 
+                              result += [r + s]
+                        rhs.reset()
+
+                      return result
+    
+
+ +
+

Block-Nested Loop Join

+ +
+ +
+

Block-Nested Loop ($R \times S$)

+ +

(with $\mathcal B$ as the block size for $R$)

+

(and with caching $S$ to disk)

+ +
+
Memory Required?
+
$O(\mathcal B)$
+ +
IOs added?
+
$|S|$ tuples written.
+
$(\frac{|R|}{\mathcal B} - 1) \cdot |S|$ tuples read.
+
+

In-memory caching is a special case of block-nested loop with $\mathcal B = |S|$

+

Does the block size for $R$ matter?

+
+ +
+

How big should the blocks be?

+ + +
+
+ +
+
+

Cross product is expensive!
Can we do better?

+

$\sigma_c(R\times S) \equiv R\bowtie_c S$

+
+ +
+

Cross Product

+ +
+
+

Problem: Naively, any tuple matches any other

+
+ +
+

Join Conditions

+ +

Solution: First organize the data

+
+ +
+ +
+ +
+

Strategies for Implementing $R \bowtie_{R.A = S.A} S$

+ +
+
In-Memory Index Join (1-pass Hash; Hash Join)
+
Build an in-memory index on one table, scan the other.
+ +
Partition Join (2-pass Hash; External Hash Join)
+
Partition both sides so that tuples don't join across partitions.
+ +
Sort/Merge Join
+
Sort all of the data upfront, then scan over both sides.
+
+
+ +
+

Hash Functions

+ +
    +
  • A hash function is a function that maps a large data value to a small fixed-size value
      +
    • Typically is deterministic & pseudorandom
    • +
  • +
  • Used in Checksums, Hash Tables, Partitioning, Bloom Filters, Caching, Cryptography, Password Storage, …
  • +
  • Examples: MD5, SHA1, SHA2
      +
    • MD5() part of OpenSSL (on most OSX / Linux / Unix)
    • +
  • +
  • Can map h(k) to range [0,N) with h(k) % N (modulus)
  • +
+
+ +
+

Hash Functions

+ +

+ $$h(X) \mod N$$ + +

    +
  • Pseudorandom output between $[0, N)$
  • +
  • Always the same output for a given $X$
  • +
+

+
+ +
+

1-Pass Hash Join

+ +
+ +
+

1-Pass Hash Join

+
+
Limited Queries
+
Only supports join conditions of the form $R.A = S.B$
+ +
Moderate-High Memory
+
Keeps 1 full relation in memory
+ +
Low Added IO Cost
+
Only requires 1 scan over each input.
+
+
+ +
+

Alternative: Build an in-memory tree (e.g., B+Tree) instead of a hash table!

+ +
+
Limited Queries
+
Also supports $R.A \geq S.B$, $R.A > S.B$
+ +
Moderate-High Memory
+
Keeps 1 full relation in memory
+ +
Low Added IO Cost
+
Only requires 1 scan over each input.
+
+
+ +
+

2-Pass Hash Join

+ +
+ +
+

2-Pass Hash Join

+
+
Limited Queries
+
Only supports join conditions of the form $R.A = S.B$
+ +
Low Memory
+
Never need more than 1 pair of partitions in memory
+ +
High IO Cost
+
$|R| + |S|$ tuples written out
+
$|R| + |S|$ tuples read in
+
+
+ +
+

Why is it important that the hash function is pseudorandom?

+
+ +
+

What if the data is already organized (e.g., sorted) in a useful way?

+
+ + +
+ +

Sort/Merge Join

+ +
+ +
+

Sort/Merge Join

+
+
Limited Queries
+
Only supports join conditions of the form $R.A = S.B$
+ +
Low Memory
+
Only needs to keep ~2 rows in memory at a time (not counting sort).
+ +
Low Added IO Cost
+
No added IO! (not counting sort).
+
+
+
+ +
+

Next time...

+ +

Extended Relational Algebra

+
+ diff --git a/src/teaching/cse-562/2021sp/slide/2021-02-25-PhysicalLayout.erb b/src/teaching/cse-562/2021sp/slide/2021-02-25-PhysicalLayout.erb new file mode 100644 index 00000000..a5c87452 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/2021-02-25-PhysicalLayout.erb @@ -0,0 +1,258 @@ +--- +template: templates/cse4562_2021_slides.erb +textbook: "Ch. 13.1-13.7, 15.7, 16.7" +date: February 25, 2021 +title: "Physical Layout & Memory Management" +--- +
+ +
+
+ + + + + + + +
CREATED_ATTREE_IDBLOCK_IDTHE_GEOMTREE_DBHSTUMP_DIAMCURB_LOCSTATUSHEALTHSPC_LATINSPC_COMMONSTEWARDGUARDSSIDEWALKUSER_TYPEPROBLEMSROOT_STONEROOT_GRATEROOT_OTHERTRNK_WIRETRNK_LIGHTTRNK_OTHERBRNCH_LIGHBRNCH_SHOEBRNCH_OTHEADDRESSZIPCODEZIP_CITYCB_NUMBOROCODEBORONAMECNCLDISTST_ASSEMST_SENATENTANTA_NAMEBORO_CTSTATELATITUDELONGITUDEX_SPY_SP
'08/27/2015'180683348711'POINT (-73.84421521958048 40.723091773924274)'30'OnCurb''Alive''Fair''Acer rubrum''red maple''None''None''NoDamage''TreesCount Staff''None''No''No''No''No''No''No''No''No''No''108-005 70 AVENUE''11375''Forest Hills'4064'Queens'292816'QN17''Forest Hills'4073900'New York'40.72309177-73.844215221027431.14821202756.768749
'09/03/2015'200540315986'POINT (-73.81867945834878 40.79411066708779)'210'OnCurb''Alive''Fair''Quercus palustris''pin oak''None''None''Damage''TreesCount Staff''Stones''Yes''No''No''No''No''No''No''No''No''147-074 7 AVENUE''11357''Whitestone'4074'Queens'192711'QN49''Whitestone'4097300'New York'40.79411067-73.818679461034455.70109228644.837379
'09/05/2015'204026218365'POINT (-73.93660770459083 40.717580740099116)'30'OnCurb''Alive''Good''Gleditsia triacanthos var. inermis''honeylocust''1or2''None''Damage''Volunteer''None''No''No''No''No''No''No''No''No''No''390 MORGAN AVENUE''11211''Brooklyn'3013'Brooklyn'345018'BK90''East Williamsburg'3044900'New York'40.71758074-73.93660771001822.83131200716.891267
'09/05/2015'204337217969'POINT (-73.93445615919741 40.713537494833226)'100'OnCurb''Alive''Good''Gleditsia triacanthos var. inermis''honeylocust''None''None''Damage''Volunteer''Stones''Yes''No''No''No''No''No''No''No''No''1027 GRAND STREET''11211''Brooklyn'3013'Brooklyn'345318'BK90''East Williamsburg'3044900'New York'40.71353749-73.934456161002420.35833199244.253136
'08/30/2015'189565223043'POINT (-73.97597938483258 40.66677775537875)'210'OnCurb''Alive''Good''Tilia americana''American linden''None''None''Damage''Volunteer''Stones''Yes''No''No''No''No''No''No''No''No''603 6 STREET''11215''Brooklyn'3063'Brooklyn'394421'BK37''Park Slope-Gowanus'3016500'New York'40.66677776-73.97597938990913.775046182202.425999
+
+

+
0101010010111010101010001010101001101001001010010001010101001...
+
+
+ + +
+
+

Record Layouts

+ +

How is data stored?

+
+ +
+

Problem 1: How should you encode one tuple?

+
+ +
+

Record Layout 1: Fixed

+ +
+ +
+

Record Layout 2: Delimiters

+ +
+ +
+

Record Layout 3: Headers

+ +
+ +
+

Record Formats

+
+
Fixed
+
Constant-size fields. Field $i$ at byte $\sum_{j < i} |Field_j|$
+
Delimited
+
Special character or string (e.g., ,) between fields
+
Header
+
Fixed-size header points to start of each field
+
 
+
 
+
+
+ +
+

Problem 2: How should you encode a file of tuples?

+
+ +
+

File Formats

+
+
Fixed
+
Constant-size records. Record $i$ at byte $|Record| \times i$
+
Delimited
+
Special character or string (e.g., \r\n) at record end
+
Header
+
Index in file points to start of each record
+
Paged
+
Align records to paging boundaries
+
+
+ +
+ +
+ +
+ + openclipart.org +
+
+ +
+
+
+
File
+
A collection of pages (or records)
+
Page
+
A fixed-size collection of records
+
Page size is usually dictated by hardware.
Mem Page $\approx$ 4KB   Cache Line $\approx$ 64B
+
Record
+
One or more fields (for now)
+
Field
+
A primitive value (for now)
+
+
+ +
+

Problem 2.b: How should you store records in a page?

+
+ +
+

Goal 1: Where is record $X$?

+

Goal 2: Support updates/deletions

+
+ +
+

Fixed size records

+
+ +
+ +

What about variable-size records?

+
+ +
+ +

Why store the key and records from opposite ends?

+
+ +
+ +
+ +
+

Problem 3: How should you organize pages in a file?

+

Key question: What happens when all records on a page are deleted?

+

Idea: Track empty pages.

+
+
+ +
+
+ +
+
+ +
+
+

An Alternative Layout

+
+ +
+

Row-Wise Layouts

+ +
+
+

Column-Wise Layouts

+ +
+ +
+

Each file stores 2-tuples $\left< RowID, Value\right >$.

+

Values only for one attribute.

+
+ +
+

Benefits

+
    +
  • Only one attribute to sort per file.
  • +
  • No IO cost for unused attributes ($\pi$-pushdown!)
  • +
+

Drawbacks

+
    +
  • Result attributes must be stitched back together ($\bowtie$)
  • +
+

Great for wide, rarely-updated tables where only a few attributes are used per-query

+
+ +
+

Example Column Stores

+ +

+ Cassandra logo + Vertica pos blk rgb.svg + MonetDB logo +

+ By Apache Software Foundation - https://svn.apache.org/repos/asf/cassandra/logo/cassandra.svg, Apache License 2.0, Link
+ By Ariolica - Own work, CC BY-SA 4.0, Link
+ By Source (WP:NFCC#4), Fair use, Link
+
+
+ +
+
+

Buffer Manager

+

Abstract the messy details of File-IO

+
+ +
+ + openclipart.org +
+ +
+
+
Frame
+
A "slot" managed by the buffer manager that holds one page.
+ +
Pinned Page
+
A page currently in use by part of the database. Must stay in its current frame until unpinned. (A page may be pinned multiple times)
+ +
Dirty Page
+
A page that has been modified since it was last read in.
+
+
+ +
+

When a page is requested

+ +

Is the page in the buffer pool? +

    +
  • Yes? Pin the page (again) and return the address.
  • +
  • No?
      +
    • Pick a frame for replacement with your favorite algorithm (e.g., LRU)...
    • +
    • If the frame is dirty, write it to disk
    • +
    • Read requested page into chosen frame
    • +
    • Pin the page and return its address
    • +
  • +
+
+ +
+

Does this all sound familiar?

+

Isn't this just Virtual Memory?

+
+ +
+

Yes!

+

(Many databases use memory-mapped files as a buffer manager)

+
+ +
+

Why Re-implement VMem?

+
+ +

Databases can predict the future!

+
+

SELECT * FROM R WHERE A > 500 AND A < 2000   →    Pages 10-12

+
+ +
+

How do we decide which pages hold the query results?

+

Answers next class!

+
+ +
diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-16-RA-Tree.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-16-RA-Tree.svg new file mode 100644 index 00000000..910a0882 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-16-RA-Tree.svg @@ -0,0 +1,150 @@ + + + + + + + + + + image/svg+xml + + + + + + + σS.C=10 + πR.A + R.B=S.B + + R + S + + + + + + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Cross.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Cross.svg new file mode 100644 index 00000000..df8d8209 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Cross.svg @@ -0,0 +1,423 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + start + + Have Old 'r'? + + no + Read LHS Row 'r'and Reset RHS + + + + Read RHS Row 's' + not empty + + + + not empty + Return <r s> + + + + yes + + + + Done + + empty + + + + empty + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Project.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Project.svg new file mode 100644 index 00000000..dbe2612e --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Project.svg @@ -0,0 +1,252 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + Read One Row + + not empty + Compute New Row + + + + Return Row + + + + Done! + + empty + + start + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Select.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Select.svg new file mode 100644 index 00000000..43e07a21 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Select.svg @@ -0,0 +1,285 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + Read One Row + + not empty + Check Condition + + + + satisfied + Return Row + + + + + unsatisfied + + + Done! + + empty + + start + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Union.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Union.svg new file mode 100644 index 00000000..26435b5b --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Flow-Union.svg @@ -0,0 +1,372 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + start + Read LHS Row + + not empty + Return Row + + + + Read RHS Row + + empty + + + not empty + + + + Done! + + empty + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-1PassHash.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-1PassHash.svg new file mode 100644 index 00000000..8fa80c73 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-1PassHash.svg @@ -0,0 +1,430 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + h(A) = 0 + h(A) = 1 + + + + + 1 + + + + 1 + + + + 3 + + + + 3 + + + + 2 + + + + 2 + + + + 1 + + + + 1 + + + + + + + 3 + + + + + 2 + + + + + 1 + + + + + 3 + + + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-2PassHash.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-2PassHash.svg new file mode 100644 index 00000000..d254f98f --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-2PassHash.svg @@ -0,0 +1,608 @@ + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + h(A) = 0 + h(A) = 1 + + + + + h(A) = 0 + h(A) = 1 + + + + + 1 + + + + 1 + + + + 3 + + + + 3 + + + + 2 + + + + 2 + + + + 1 + + + + 1 + + + + + + + 3 + + + + 3 + + + + 2 + + + + 2 + + + + 1 + + + + 1 + + + + 3 + + + + 3 + + + + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-BNLJ.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-BNLJ.svg new file mode 100644 index 00000000..3f28615b --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-BNLJ.svg @@ -0,0 +1,274 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-Grid.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-Grid.svg new file mode 100644 index 00000000..16848069 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-Grid.svg @@ -0,0 +1,368 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + 1 + 3 + 2 + 1 + + + + + + + + + 3 + 2 + 3 + 1 + + + + + + + + + 3 + 2 + 3 + 1 + 1 + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-NLJ.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-NLJ.svg new file mode 100644 index 00000000..777c5202 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-NLJ.svg @@ -0,0 +1,263 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ... + ... + ... + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-OrderGrid.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-OrderGrid.svg new file mode 100644 index 00000000..8c7c0c45 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-OrderGrid.svg @@ -0,0 +1,353 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + 3 + 3 + + + + + + 2 + + + + + + + + 1 + 1 + + + + + + + 1 + 1 + 2 + 3 + + + + + + + 1 + 2 + 3 + 3 + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-SortMerge.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-SortMerge.svg new file mode 100644 index 00000000..39f1c5e9 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-18-Join-SortMerge.svg @@ -0,0 +1,303 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + 1 + + + + 1 + + + + + + + 1 + + + + 2 + + + + + + 2 + + + + + + 3 + + + + 3 + + + + + + 3 + + + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Buffer-Manager.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Buffer-Manager.svg new file mode 100644 index 00000000..8215b798 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Buffer-Manager.svg @@ -0,0 +1,577 @@ + + + +image/svg+xml +Higher levels of the DB +Disk Page +Free Frame +Pages allocated to frames as per + +page replacement policy + \ No newline at end of file diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Heap-File-1.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Heap-File-1.svg new file mode 100644 index 00000000..53f2874f --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Heap-File-1.svg @@ -0,0 +1,390 @@ + + + +image/svg+xml +ø +ø +Pages with + +Data +Empty +Pages +Directory +Page +Each page contains 2 pointers plus data + \ No newline at end of file diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Heap-File-2.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Heap-File-2.svg new file mode 100644 index 00000000..0d791aeb --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Heap-File-2.svg @@ -0,0 +1,269 @@ + + + +image/svg+xml +Directory +Pages +Directories are a collection of pages (e.g., a linked list) + +Directories point to all data pages + +(entries can include # of free pages) + +Data + +Pages + \ No newline at end of file diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Layout-ColumnWise.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Layout-ColumnWise.svg new file mode 100644 index 00000000..14aa294f --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Layout-ColumnWise.svg @@ -0,0 +1,473 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + A1 + + + + B1 + + + + C1 + + + + D1 + + + + A2 + + + + B2 + + + + C2 + + + + D2 + + + + A3 + + + + B3 + + + + C3 + + + + D3 + + + + A4 + + + + B4 + + + + C4 + + + + D4 + + File 1 + File 2 + File 3 + File 4 + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Layout-RowWise.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Layout-RowWise.svg new file mode 100644 index 00000000..667af648 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Layout-RowWise.svg @@ -0,0 +1,416 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + A1 + + + + B1 + + + + C1 + + + + D1 + + + + A2 + + + + B2 + + + + C2 + + + + D2 + + + + A3 + + + + B3 + + + + C3 + + + + D3 + + + + A4 + + + + B4 + + + + C4 + + + + D4 + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Page-Layouts-1.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Page-Layouts-1.svg new file mode 100644 index 00000000..e44c25ec --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Page-Layouts-1.svg @@ -0,0 +1,729 @@ + + + +image/svg+xml +6 +1 +2 +3 +4 +5 +6 +7 +8 + +N +1 +2 +3 +4 +5 +6 +7 +8 + +N +N +01101011 + +Packed +Unpacked (Bitmap) +Number of Records +Bit array of occupied slots(and size of page) +Data Records +Free Space + \ No newline at end of file diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Page-Layouts-2.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Page-Layouts-2.svg new file mode 100644 index 00000000..7f1a4b42 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-Page-Layouts-2.svg @@ -0,0 +1,385 @@ + + + +image/svg+xml1 2 3 4 … +R1 +R2 +R3 +Variable Size Records +Pointer to start of free space + \ No newline at end of file diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-mem_bulk_loading.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-mem_bulk_loading.svg new file mode 100644 index 00000000..8a8482ee --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-mem_bulk_loading.svg @@ -0,0 +1,9149 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Page + + + + Page + + + + Page + + + + Page + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Page + + + + Page + + + + Page + + + + Page + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Line + + + + Line + + + + Line + + + + Line + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Record + + + BAD! + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-mem_hierarchy.png b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-mem_hierarchy.png new file mode 100644 index 00000000..6f853456 Binary files /dev/null and b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-mem_hierarchy.png differ diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-fixed.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-fixed.svg new file mode 100644 index 00000000..2333ecf7 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-fixed.svg @@ -0,0 +1,209 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + A + B + C + D + + Base Address (X) + + + + + + Address of C (X+|A|+|B|) + + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-header.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-header.svg new file mode 100644 index 00000000..6db27914 --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-header.svg @@ -0,0 +1,372 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + A + + + + B + + + + C + + + + D + + + + |A| + + + + |B| + + + + |C| + + + + |H| + + Record header points to each field + + + diff --git a/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-separator.svg b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-separator.svg new file mode 100644 index 00000000..00811f9b --- /dev/null +++ b/src/teaching/cse-562/2021sp/slide/graphics/2021-02-25-record-separator.svg @@ -0,0 +1,288 @@ + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + A + + + + B + + + + C + + + + D + + + + ',' + + + + ',' + + + + ',' + + Special Separator Characters Delimit Fields + + + + +