diff --git a/src/teaching/cse-562/2021sp/index.erb b/src/teaching/cse-562/2021sp/index.erb index 44de50ae..399edc0a 100644 --- a/src/teaching/cse-562/2021sp/index.erb +++ b/src/teaching/cse-562/2021sp/index.erb @@ -87,7 +87,7 @@ In this course, you will learn...
8 of 9 Forbes Top Software Companies
Have a Focus on Data Management Systems
8 of 10 Forbes Top Software Companies
Have a Focus on Data Management Systems
(same link)
https://canvas.instructure.com/courses/2540988https://canvas.instructure.com/courses/2540988
+https://piazza.com/buffalo/spring2021/cse462562
+ +I've torn the guts out of Apache Spark.
- Your mission: Replace them (sort of).
I've torn the guts out of Catalyst. What remains: +
Your mission: Replace the missing bits (sort of).
+Gutted Catalyst
Data (CSV Files)
Schema Information (CREATE TABLE)
Questions (SQL Queries)
@@ -346,7 +390,7 @@ textbook: "Ch. 1, 2.1-2.2"10/50 pts
8/50 pts
15/50 pts
Your data is currently an Unordered Set
- of Tuples with 100 fields each.
+ of Tuples with 100 attributes each.
Tomorrow, you’ll be repeatedly asked for 1 specific attribute
- of 5 specific rows identified by the first attribute
+ from 5 specific tuples identified by the first attribute
Better Idea: Rewrite data into a 99-Tuple of Maps keyed on the 1st attribute
This representation is equivalent and better for your needs.
@@ -523,3 +579,248 @@ textbook: "Ch. 1, 2.1-2.2"Declarative specifications make it easier to find equivalences.
+ SELECT [DISTINCT] targetlist
+ FROM relationlist
+ WHERE condition
+
+ + This is the least efficient strategy to compute a query! + A good optimizer will find more efficient strategies to compute the same answer. +
+SELECT * FROM Trees;
+
+ Wildcards (*
, tablename.*
) are special targets that select all attributes.
CREATED_AT | TREE_ID | BLOCK_ID | THE_GEOM | TREE_DBH | STUMP_DIAM | CURB_LOC | STATUS | HEALTH | SPC_LATIN | SPC_COMMON | STEWARD | GUARDS | SIDEWALK | USER_TYPE | PROBLEMS | ROOT_STONE | ROOT_GRATE | ROOT_OTHER | TRNK_WIRE | TRNK_LIGHT | TRNK_OTHER | BRNCH_LIGH | BRNCH_SHOE | BRNCH_OTHE | ADDRESS | ZIPCODE | ZIP_CITY | CB_NUM | BOROCODE | BORONAME | CNCLDIST | ST_ASSEM | ST_SENATE | NTA | NTA_NAME | BORO_CT | STATE | LATITUDE | LONGITUDE | X_SP | Y_SP |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
'08/27/2015' | 180683 | 348711 | 'POINT (-73.84421521958048 40.723091773924274)' | 3 | 0 | 'OnCurb' | 'Alive' | 'Fair' | 'Acer rubrum' | 'red maple' | 'None' | 'None' | 'NoDamage' | 'TreesCount Staff' | 'None' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | '108-005 70 AVENUE' | '11375' | 'Forest Hills' | 406 | 4 | 'Queens' | 29 | 28 | 16 | 'QN17' | 'Forest Hills' | 4073900 | 'New York' | 40.72309177 | -73.84421522 | 1027431.14821 | 202756.768749 |
'09/03/2015' | 200540 | 315986 | 'POINT (-73.81867945834878 40.79411066708779)' | 21 | 0 | 'OnCurb' | 'Alive' | 'Fair' | 'Quercus palustris' | 'pin oak' | 'None' | 'None' | 'Damage' | 'TreesCount Staff' | 'Stones' | 'Yes' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | '147-074 7 AVENUE' | '11357' | 'Whitestone' | 407 | 4 | 'Queens' | 19 | 27 | 11 | 'QN49' | 'Whitestone' | 4097300 | 'New York' | 40.79411067 | -73.81867946 | 1034455.70109 | 228644.837379 |
'09/05/2015' | 204026 | 218365 | 'POINT (-73.93660770459083 40.717580740099116)' | 3 | 0 | 'OnCurb' | 'Alive' | 'Good' | 'Gleditsia triacanthos var. inermis' | 'honeylocust' | '1or2' | 'None' | 'Damage' | 'Volunteer' | 'None' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | '390 MORGAN AVENUE' | '11211' | 'Brooklyn' | 301 | 3 | 'Brooklyn' | 34 | 50 | 18 | 'BK90' | 'East Williamsburg' | 3044900 | 'New York' | 40.71758074 | -73.9366077 | 1001822.83131 | 200716.891267 |
'09/05/2015' | 204337 | 217969 | 'POINT (-73.93445615919741 40.713537494833226)' | 10 | 0 | 'OnCurb' | 'Alive' | 'Good' | 'Gleditsia triacanthos var. inermis' | 'honeylocust' | 'None' | 'None' | 'Damage' | 'Volunteer' | 'Stones' | 'Yes' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | '1027 GRAND STREET' | '11211' | 'Brooklyn' | 301 | 3 | 'Brooklyn' | 34 | 53 | 18 | 'BK90' | 'East Williamsburg' | 3044900 | 'New York' | 40.71353749 | -73.93445616 | 1002420.35833 | 199244.253136 |
'08/30/2015' | 189565 | 223043 | 'POINT (-73.97597938483258 40.66677775537875)' | 21 | 0 | 'OnCurb' | 'Alive' | 'Good' | 'Tilia americana' | 'American linden' | 'None' | 'None' | 'Damage' | 'Volunteer' | 'Stones' | 'Yes' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | 'No' | '603 6 STREET' | '11215' | 'Brooklyn' | 306 | 3 | 'Brooklyn' | 39 | 44 | 21 | 'BK37' | 'Park Slope-Gowanus' | 3016500 | 'New York' | 40.66677776 | -73.97597938 | 990913.775046 | 182202.425999 |
... and 683783 more |
+ SELECT tree_id, spc_common, boroname
+ FROM Trees
+ WHERE boroname = 'Brooklyn'
+
+
+ In English, what does this query compute?
+ + <%= sli_do_link_small %> +What is the ID, Commmon Name and Borough of Trees in Brooklyn?
+ +TREE_ID | SPC_COMMON | BORONAME |
---|---|---|
204026 | 'honeylocust' | 'Brooklyn' |
204337 | 'honeylocust' | 'Brooklyn' |
189565 | 'American linden' | 'Brooklyn' |
192755 | 'London planetree' | 'Brooklyn' |
189465 | 'London planetree' | 'Brooklyn' |
... and 177287 more |
+ SELECT latitude, longitude
+ FROM Trees, SpeciesInfo
+ WHERE Trees.spc_common = SpeciesInfo.name
+ AND SpeciesInfo.has_unpleasant_smell = 'Yes';
+
+
+ In English, what does this query compute?
+ + <%= sli_do_link_small %> + +What are the coordinates of Trees with bad smells?
+ +LATITUDE | LONGITUDE | |||
---|---|---|---|---|
40.59378755 | -73.9915968 | |||
40.69149917 | -73.97258754 | |||
40.74829709 | -73.98065645 | |||
40.68767857 | -73.96764605 | |||
40.739991 | -73.86526993 | |||
... and more | +
+ SELECT Trees.latitude, Trees.longitude
+ FROM Trees, SpeciesInfo
+ WHERE Trees.spc_common = SpeciesInfo.name
+ AND SpeciesInfo.has_unpleasant_smell = 'Yes';
+
+
+ ... is the same as ...
+ +
+ SELECT T.latitude, T.longitude
+ FROM Trees T, SpeciesInfo S
+ WHERE T.spc_common = S.name
+ AND S.has_unpleasant_smell = 'Yes';
+
+
+ ... is (usually) the same as ...
+ +
+ SELECT latitude, longitude
+ FROM Trees, SpeciesInfo
+ WHERE spc_common = name
+ AND has_unpleasant_smell = 'Yes';
+
+
+
+ SELECT tree_id,
+ stump_diam / 2 AS stump_radius,
+ stump_area = 3.14 * stump_diam * stump_diam / 4
+ FROM Trees;
+
+
+ + Arithmetic expressions can appear in targets or conditions. + Use ‘=’ or ‘AS’ to assign names to these attributes. + (The behavior of unnamed attributes is unspecified) +
+
+ SELECT tree_id, spc_common FROM Trees WHERE spc_common LIKE '%maple'
+
+ TREE_ID | SPC_COMMON |
---|---|
180683 | 'red maple' |
204325 | 'sycamore maple' |
205044 | 'Amur maple' |
184031 | 'red maple' |
208974 | 'red maple' |
SQL uses single quotes for ‘string literals’
+LIKE
is used for String Matches
‘%
’ matches 0 or more characters
+ SELECT tree_id FROM Trees WHERE spc_common = 'red maple'
+ UNION [ALL]
+ SELECT tree_id FROM Trees WHERE spc_common = 'sycamore maple'
+
+ Computes the set-union of any two union-compatible sets of tuples
+Adding ALL
preserves duplicates across the inputs (bag-union).
+ SELECT [DISTINCT] targetlist
+ FROM relationlist
+ WHERE condition
+ GROUP BY groupinglist
+ HAVING groupcondition
+
+ The targetlist now contains (a) Grouped attributes, and (b) Aggregate expressions.
+Targets of type (a) must be a subset of the grouping-list
+(intuitively each answer tuple corresponds to a single group, and each group must have a single value for each attribute)
+groupcondition is applied after aggregation and may contain aggregate expressions.
+
+ SELECT spc_common, count(*) FROM Trees GROUP BY spc_common
+
+ SPC_COMMON | COUNT |
---|---|
''Schubert' chokecherry' | 4888 |
'American beech' | 273 |
'American elm' | 7975 |
'American hophornbeam' | 1081 |
'American hornbeam' | 1517 |
... and more | +
Scala for Java programmers.
+