preparing for push

pull/1/head
Oliver Kennedy 2015-12-04 12:10:33 -05:00
parent 878c79338e
commit 5a1c6db1f9
26 changed files with 215 additions and 207 deletions

View File

@ -15,7 +15,9 @@ fetch:
curl -O http://www.cse.buffalo.edu/~okennedy/okennedy.json;\
fi
push: all
find . -name .DS_Store | xargs rm
rsync -avz -e ssh --safe-links --progress site/ gram:/var/www/static/
.PHONY: all fetch
.PHONY: all fetch push

102
build.js
View File

@ -13,7 +13,6 @@ var assets = require('metalsmith-assets');
var paths = require('metalsmith-paths');
var linkchecker = require('metalsmith-broken-link-checker');
var rootpath = require('metalsmith-rootpath');
var mjAPI = require("mathjax-node/lib/mj-page.js");
var collections = require('metalsmith-collections');
var filenamedate = require('metalsmith-date-in-filename');
@ -22,100 +21,8 @@ var filenamedate = require('metalsmith-date-in-filename');
* Helpers
*/
var buildPubs = function(files, smith, done){
var lab = smith.metadata().odinLab.members.concat(
smith.metadata().odinLab.alumni);
var pubs = smith.metadata().okennedy.data.publications.concat(
smith.metadata().altPubs);
var venues = smith.metadata().okennedy.venues;
var pubsByYear = {}
for(i in pubs){
var pub = pubs[i]
var venue = venues[pub.venue] || {}
if(pub.type == "patent"){ continue; }
if(venue.type == "techreport") {
if(pub.venue != "ArXiv"){ continue; }
}
var authorFormat =
pub.authors
.map(function(author) {
if(lab.findIndex(function(member, idx, ignore) { return author == member }) >= 0) {
return "<span class='lab_member'>"+author.replace(/ /, "&nbsp;")+"</span>"
} else {
return author.replace(/ /, "&nbsp;")
}
})
.join(", ")
if(typeof pub.year == 'undefined'){
console.log(pub);
throw "Unknown year for "+pub
}
if(typeof pubsByYear[pub.year] == 'undefined') {
pubsByYear[pub.year] = []
}
var resourcesFormat = ""
if(typeof pub.urls == 'object') {
var resources = []
for(cat in pub.urls){
resources.push("<a href="+pub.urls[cat]+">"+cat+"</a>")
}
resourcesFormat = "(&nbsp;"+resources.join("&nbsp;|&nbsp;")+"&nbsp;)";
}
pubsByYear[pub.year].push(
{
title: pub.title,
authorFormat: authorFormat,
authors: pub.authors,
venue: pub.venue+" "+pub.year,
resourcesFormat: resourcesFormat
}
)
}
var out = []
for(i in pubsByYear) {
out.push({year: i, pubs: pubsByYear[i]})
}
// console.log(out)
smith.metadata()["allpubs"] = out.reverse()
done()
}
var renderOneTex = function(file, cnt)
{
// console.log(file)
mjAPI.typeset({
html: file.contents,
renderer: "NativeMML",
inputs: ["TeX"],
xmlns: "mml",
singleDollars: true
}, function(result) {
"use strict";
file.contents = new Buffer(result.html)
cnt.count --;
if(cnt.count <= 0){ cnt.done() }
});
}
mjAPI.start();
var renderTeX = function(files, smith, done)
{
var cnt = { count: 1, done: done };
for(f in files){
cnt.count ++;
renderOneTex(files[f], cnt);
}
cnt.count --;
if(cnt.count <= 0){ cnt.done() }
}
var buildPubs = require('./stages/build-pubs.js');
var renderTeX = require('./stages/render-tex.js');
/**
* Build.
@ -133,7 +40,7 @@ var metalsmith = Metalsmith(__dirname)
odinLab: "metadata/lab.json",
altPubs: "metadata/publications.json",
}))
.use(buildPubs)
.use(buildPubs())
.use(rootpath())
.use(inplace({
engine: "handlebars"
@ -159,10 +66,9 @@ var metalsmith = Metalsmith(__dirname)
limit: 3
}
}))
.use(function(x,y,z){console.log(y.metadata().news); z()})
.use(branch()
.pattern('**/*.html')
.use(renderTeX)
.use(renderTeX())
.use(layouts({
engine: "handlebars",
default: "default.hbs"

View File

@ -97,6 +97,10 @@ body {
}
.left_menu li.article {
font-size: 12px;
border-bottom-style: dotted;
border-bottom-color: #ccc;
border-bottom-width: 1px;
padding-bottom: 5px;
}
.left_menu li:hover{
color: #fff;

View File

@ -1,9 +1,8 @@
<h1 style="text-align: justify;"><span style="text-decoration: underline;"><a href="http://odin.cse.buffalo.edu/wp-content/uploads/2015/10/mimir_logo_final.png"><img class="aligncenter size-full wp-image-720" src="http://odin.cse.buffalo.edu/wp-content/uploads/2015/10/mimir_logo_final.png" alt="mimir_logo_final" width="1078" height="429" /></a></span></h1>
<img src="../../assets/logos/mimir_logo_final.png" alt="mimir_logo_final" width="539" height="214" />
<p style="text-align: justify;"><b>Students:</b> Ying Yang, Niccolo Meneghetti, Arindam Nandi, Vinayak Karuppasamy</p>
<p style="text-align: justify;"><img class="aligncenter wp-image-621 size-full" src="http://odin.cse.buffalo.edu/wp-content/uploads/2015/08/Mimir_Screenshot.png" alt="Mimir_Screenshot" width="965" height="846" /></p>
<img src="http://odin.cse.buffalo.edu/wp-content/uploads/2015/08/Mimir_Screenshot.png" alt="Mimir_Screenshot" width="643" height="564" />
<p style="text-align: justify;">Many analytics tasks are based on information that is initially incomplete, inconsistent, or simply used incorrectly. Although a variety of strategies exist to help people cope with these sources of uncertainty, they often require users to undertake heavyweight upfront organizationational tasks (i.e., tagging, data-cleaning, or modeling) before the data can be used.  Automated techniques exist, but typically introduce their own forms of uncertainty.</p>
<p style="text-align: justify;">Mimir takes a step back and accepts that uncertainty is a fact of life.  Rather than trying to fight it, Mimir embraces uncertainty, and helps users to understand it better.  Combining automated data cleaning and data analysis techniques, Mimir's goal is to help users clean and query uncertain data, and to understand the impact of that uncertainty on the results of their analyses.</p>
[embed]https://www.youtube.com/watch?v=jow4JmDOxPs[/embed]
<hr />
<p style="text-align: justify;">Currently, the Mimir project has two active initiatives:</p>
@ -23,9 +22,9 @@
<li style="text-align: justify;"><b>Lenses: An On-Demand Approach to ETL</b>
<i>VLDB 2015</i> ( <a href="http://odin.cse.buffalo.edu/wp-content/uploads/2015/05/lenses_revised.pdf">paper</a> | <a href="http://odin.cse.buffalo.edu/slides/conference/2015-Ying-VLDB-Mimir.pdf">slides</a> )</li>
<li style="text-align: justify;"><b>Detecting the Temporal Context of Queries</b>
<i>BIRTE 2014</i> ( <a href="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2014/10/BIRTE2014-context-finalCR.pdf">paper</a> )</li>
<i>BIRTE 2014</i> ( <a href="http://odin.cse.buffalo.edu/wp-content/uploads/2014/10/BIRTE2014-context-finalCR.pdf">paper</a> )</li>
<li style="text-align: justify;"><b>On-Demand Query Result Cleaning</b>
<i>VLDB PhD Workshop 2014</i> ( <a href="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2014/10/p1283-Yang.pdf">paper</a> )</li>
<i>VLDB PhD Workshop 2014</i> ( <a href="http://odin.cse.buffalo.edu/wp-content/uploads/2014/10/p1283-Yang.pdf">paper</a> )</li>
</ul>
<h4>Presentations</h4>
<ul>

View File

@ -1,24 +1,20 @@
<h1><a href="http://odin.cse.buffalo.edu/wp-content/uploads/2015/10/PocketData.png"><img class="wp-image-708 alignleft" src="http://odin.cse.buffalo.edu/wp-content/uploads/2015/10/PocketData.png" alt="PocketData" width="207" height="207" /></a><span style="text-decoration: underline;">Pocket-Scale Data Management</span></h1>
<div class="page" title="Page 1">
<div class="layoutArea">
<div class="column">
<p style="text-align: justify;"><strong>Students:</strong> Jerry Ajay</p>
<p style="text-align: justify;">The worlds 2 billion smartphones represent the most powerful and pervasive distributed system ever built. Open application marketplaces, such as the Google Play Store, have resulted in a vibrant software ecosystem comprising millions of smartphone and tablet apps in hundreds of different categories that both meet existing user needs and provide exciting novel capabilities. As mobile apps and devices become even more central to the personal computing experience, it is increasingly important to understand and improve their performance.  In partnership with <a href="https://phone-lab.org">UB's PhoneLab</a>, we are  analyzing SQLite logs from smartphones deployed in the wild.</p>
<img src="../../assets/logos/PocketData.png" alt="PocketData" width="207" height="207" />
# Pocket-Scale Data Management
</div>
<p style="text-align: justify;">The usage of embedded databases by smartphone apps differs from traditional database access patterns in two important ways. First, mobile app embedded databases support interactive workloads where queries arrive in bursts separately by long periods of idleness. Thus, raw throughput is a poor way to evaluate their performance, and opportunities exist to reorganize data after interactive bursts to prepare for future requests.</p>
<p style="text-align: justify;">Second, mobile app embedded databases support single apps that may issue queries with very specific properties. For example, our <a href="http://odin.cse.buffalo.edu/?attachment_id=619">initial analysis</a> has demonstrated that many Android apps use SQLite purely as a key-value store. In other cases, query patterns indicate that apps are using existing object-relational mapping libraries (<a class="external" href="https://en.wikipedia.org/wiki/Object-relational_mapping" target="_blank">ORMs</a>) to persist objects, which produce their own distinctive access patterns. We have also seen cases where SQLite database were never updated by apps, indicating that the entire database may serve as a cache of structured app data that is only updated during app upgrades. <span class="pullquote">In all of these cases, the freedom of the embedded database to couple itself tightly to a specific app may open up new optimization opportunities.</span></p>
<p style="text-align: justify;">To explore these ideas we are beginning by building a smartphone embedded app database benchmark—actually a benchmark <em>generator</em>, which will be able to use traces of database activity to synthesize a benchmark for any app. This feature is critical to ensure that we can support the variety of app database usage patterns and update the benchmark suite easily as they change. We are also evaluating the performance and energy consumption of SQLite, the embedded database provided by default to Android apps, against alternatives such as <a class="external" href="https://en.wikipedia.org/wiki/Berkeley_DB" target="_blank">BerkeleyDB</a> and <a class="external" href="https://en.wikipedia.org/wiki/H-Store" target="_blank">H-Store</a>.</p>
__Students:__ Jerry Ajay
<div class="column">
<h2 style="text-align: justify;">Publications</h2>
<ul>
<li><a href="http://odin.cse.buffalo.edu/wp-content/uploads/2015/06/2015-TPCTC-SQLite-submitted.pdf">Pocket Data: The Need for TPC-MOBILE</a> (TPC-TC 2015)</li>
</ul>
<h2>Resources</h2>
<ul>
<li><a href="https://www.phone-lab.org/static/experiment/sample_dataset.tgz">Publicly Available Query Traces</a> ( <a href="https://www.phone-lab.org/experiment/existing/">documentation</a> | <a href="https://www.phone-lab.org/experiment/request/">more info</a> )</li>
</ul>
</div>
</div>
</div>
The worlds 2 billion smartphones represent the most powerful and pervasive distributed system ever built. Open application marketplaces, such as the Google Play Store, have resulted in a vibrant software ecosystem comprising millions of smartphone and tablet apps in hundreds of different categories that both meet existing user needs and provide exciting novel capabilities. As mobile apps and devices become even more central to the personal computing experience, it is increasingly important to understand and improve their performance.  In partnership with <a href="https://phone-lab.org">UB's PhoneLab</a>, we are  analyzing SQLite logs from smartphones deployed in the wild.
The usage of embedded databases by smartphone apps differs from traditional database access patterns in two important ways. First, mobile app embedded databases support interactive workloads where queries arrive in bursts separately by long periods of idleness. Thus, raw throughput is a poor way to evaluate their performance, and opportunities exist to reorganize data after interactive bursts to prepare for future requests.
Second, mobile app embedded databases support single apps that may issue queries with very specific properties. For example, our <a href="/papers/2015/TPCTC-sqlite-final.pdf">initial analysis</a> has demonstrated that many Android apps use SQLite purely as a key-value store. In other cases, query patterns indicate that apps are using existing object-relational mapping libraries (<a class="external" href="https://en.wikipedia.org/wiki/Object-relational_mapping" target="_blank">ORMs</a>) to persist objects, which produce their own distinctive access patterns. We have also seen cases where SQLite database were never updated by apps, indicating that the entire database may serve as a cache of structured app data that is only updated during app upgrades. In all of these cases, the freedom of the embedded database to couple itself tightly to a specific app may open up new optimization opportunities.
To explore these ideas we are beginning by building a smartphone embedded app database benchmark—actually a benchmark <em>generator</em>, which will be able to use traces of database activity to synthesize a benchmark for any app. This feature is critical to ensure that we can support the variety of app database usage patterns and update the benchmark suite easily as they change. We are also evaluating the performance and energy consumption of SQLite, the embedded database provided by default to Android apps, against alternatives such as <a class="external" href="https://en.wikipedia.org/wiki/Berkeley_DB" target="_blank">BerkeleyDB</a> and <a class="external" href="https://en.wikipedia.org/wiki/H-Store" target="_blank">H-Store</a>.
## Publications
* <a href="http://odin.cse.buffalo.edu/wp-content/uploads/2015/06/2015-TPCTC-SQLite-submitted.pdf">Pocket Data: The Need for TPC-MOBILE</a> (TPC-TC 2015)
## Resources
* <a href="https://www.phone-lab.org/static/experiment/sample_dataset.tgz">Publicly Available Query Traces</a> ( <a href="https://www.phone-lab.org/experiment/existing/">documentation</a> | <a href="https://www.phone-lab.org/experiment/request/">more info</a> )

View File

@ -3,33 +3,33 @@
<li><strong>Deadline</strong>: Feb 6</li>
<li><strong>Grade</strong>: 5% of Overall Grade</li>
</ul>
<h2>The Submission System</h2>
<h1>The Submission System</h1>
Let's first get familiar with the submission system.  It's located at
<p style="text-align: center;"><a href="http://mjolnir.cse.buffalo.edu/cse562">http://mjolnir.cse.buffalo.edu/cse562</a></p>
<img class="aligncenter size-full wp-image-233" src="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/odin_submissions.png" alt="odin_submissions" width="532" height="400" />
<p style="text-align: center;"><a href="http://dubstep.odin.cse.buffalo.edu">http://dubstep.odin.cse.buffalo.edu</a></p>
<img class="aligncenter size-full wp-image-233" src="img/odin_submissions.png" alt="odin_submissions" width="532" height="400" />
Or you can click on the "CSE 562 Submissions" link from the course syllabus.  After loading up the website, you will need to create an account.
<img class=" size-full wp-image-216 aligncenter" src="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/login.png" alt="login" width="619" height="271" />
<img class=" size-full wp-image-216 aligncenter" src="img/login.png" alt="login" width="619" height="271" />
When creating an account, be sure to use your UB email address.  If you don't have a UB email address, contact the teacher or a TA as soon as possible.
<img class="aligncenter size-full wp-image-217" src="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/create.png" alt="create" width="278" height="206" />
<img class="aligncenter size-full wp-image-217" src="img/create.png" alt="create" width="278" height="206" />
After you create an account, you'll receive an email with an activation token.  Click on the link in the email, or copy it into your browser's location bar.
<h4>Forming Groups</h4>
<h2>Forming Groups</h2>
Find up to two other students in the class, and elect one member of your group to be the leader.  You'll also need a group name.  Be creative.  This is how you'll show up on the leaderboards.  The group leader should go to the <strong>Manage Group</strong> tab and click "Or Start a Group..."
<a href="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/invitations.png"><img class="aligncenter size-full wp-image-218" src="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/invitations.png" alt="invitations" width="615" height="187" /></a><img class="aligncenter size-full wp-image-221" src="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/groupcreated.png" alt="groupcreated" width="612" height="338" />After creating and naming your group, your leader should click "Invite more..." on the <strong>Manage Group</strong> tab and add all remaining group members by their email addresses.
<a href="img/invitations.png"><img class="aligncenter size-full wp-image-218" src="img/invitations.png" alt="invitations" width="615" height="187" /></a><img class="aligncenter size-full wp-image-221" src="img/groupcreated.png" alt="groupcreated" width="612" height="338" />After creating and naming your group, your leader should click "Invite more..." on the <strong>Manage Group</strong> tab and add all remaining group members by their email addresses.
<a href="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/invite.png"><img class="aligncenter size-full wp-image-219" src="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/invite.png" alt="invite" width="357" height="132" /></a>
<a href="img/invite.png"><img class="aligncenter size-full wp-image-219" src="img/invite.png" alt="invite" width="357" height="132" /></a>
&nbsp;
All team members should now be able to accept their invitation by logging in and going to their <strong>Manage Group</strong> tab.
<img class="aligncenter size-full wp-image-220" src="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/accept.png" alt="accept" width="631" height="220" />
<h4>GIT and Source Code Management</h4>
<img class="aligncenter size-full wp-image-220" src="img/accept.png" alt="accept" width="631" height="220" />
<h2>GIT and Source Code Management</h2>
For submissions, and for your group's convenience, this course provides your group with a GIT repository.  If you don't know how to use GIT, it's easy, and an important skill to have.  Numerous tutorials and reference materials are available, including
<ul>
<li><a href="http://git-scm.com/documentation">http://git-scm.com/documentation</a></li>
@ -37,7 +37,7 @@ For submissions, and for your group's convenience, this course provides your gro
</ul>
If you don't want to dive headfirst into GIT, there's a good GUI available at <a href="http://www.sourcetreeapp.com">http://www.sourcetreeapp.com</a>, and read on below for a quick and dirty intro to GIT for the project.
The upstream URL of your team's GIT repository is available from the <strong>Manage Group</strong> tab.<img class="aligncenter size-full wp-image-222" src="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/gitrepo.png" alt="gitrepo" width="619" height="292" />
The upstream URL of your team's GIT repository is available from the <strong>Manage Group</strong> tab.<img class="aligncenter size-full wp-image-222" src="img/gitrepo.png" alt="gitrepo" width="619" height="292" />
To access the repository, you'll first need to register your GIT public key.  An overview of public key management can be found <a href="http://git-scm.com/book/en/Git-on-the-Server-Generating-Your-SSH-Public-Key">here</a>.  A public key should look something like this (with no line breaks):
<div class="page" title="Page 2">
@ -48,17 +48,17 @@ To access the repository, you'll first need to register your GIT public key.  A
</div>
</div>
</div>
<h4>Uploading Your Public Key</h4>
<h2>Uploading Your Public Key</h2>
From the <strong>Manage Account</strong> tab, click on "Upload public key..."
<a href="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/pubkey.png"><img class="aligncenter size-full wp-image-223" src="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/pubkey.png" alt="pubkey" width="268" height="104" /></a>
<a href="img/pubkey.png"><img class="aligncenter size-full wp-image-223" src="img/pubkey.png" alt="pubkey" width="268" height="104" /></a>
Copy the entire public key into the field provided and add a short description (useful if you have multiple computers).<img class="aligncenter size-full wp-image-224" src="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/pubkey2.png" alt="pubkey2" width="271" height="376" />
Copy the entire public key into the field provided and add a short description (useful if you have multiple computers).<img class="aligncenter size-full wp-image-224" src="img/pubkey2.png" alt="pubkey2" width="271" height="376" />
You should now be able to clone your team's GIT repository:
<a href="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/gitclone.png"><img class="aligncenter size-full wp-image-225" src="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/gitclone.png" alt="gitclone" width="585" height="192" /></a>
<h4>A Quick and Dirty Intro to GIT</h4>
<a href="img/gitclone.png"><img class="aligncenter size-full wp-image-225" src="img/gitclone.png" alt="gitclone" width="585" height="192" /></a>
<h2>A Quick and Dirty Intro to GIT</h2>
Once you have cloned a copy your repository (a directory called teamX, where X is your group ID), you'll need some organization.  The grading script will attempt to compile all of the java files in the directory 'src' at the root of your git repository.  Create that now.
<pre>$&gt; cd teamX
$&gt; mkdir src
@ -76,10 +76,10 @@ The files are now in your global repository.  Your teammates can now receive yo
<pre>$&gt; cd teamX
$&gt; git pull</pre>
If this works, you should be all set.
<h4>Submitting Code</h4>
<h2>Submitting Code</h2>
To have your project graded click "Create new submission" from the <strong>Manage Group</strong> tab.
<a href="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/project.png"><img class="aligncenter size-full wp-image-226" src="http://mjolnir.cse.buffalo.edu/wp-content/uploads/2015/01/project.png" alt="project" width="626" height="373" /></a>
<a href="img/project.png"><img class="aligncenter size-full wp-image-226" src="img/project.png" alt="project" width="626" height="373" /></a>
A snapshot of your repository will be taken, and your entire group will receive an email notification once your project has been graded. You may only have one submission pending at any given time, but you may resubmit as many times as you like. Note however, that the more times you submit, the lower a priority your project will receive in the grading queue.
@ -93,7 +93,7 @@ A snapshot of your repository will be taken, and your entire group will receive
<li>Validate the output.</li>
</ol>
If these steps fail for any reason, your submission will receive a 0 and you will need to resubmit. A log of the testing process will be made available on the submission page so that you may correct any errors that occur.
<h2>Project: Hello World!</h2>
<h1>Project: Hello World!</h1>
Create a class edu.buffalo.cse562.Main with a main function that that prints out the following (with no newlines) and exits.
<pre>We, the members of our team, agree that we will not submit any code that we have not written ourselves, share our code with anyone outside of our group, or use code that we have not written ourselves as a reference.</pre>
Make sure your class compiles, push your (committed) repository, and hit Submit.

View File

@ -12,7 +12,7 @@
In this project, you will implement a simple SQL query evaluator with support for Select, Project, Join, Bag Union, and Aggregate operations.  You will receive a set of data files, schema information, and be expected to evaluate multiple SELECT queries over those data files.
Your code is expected to evaluate the SELECT statements on provided data, and produce output in a standardized form. Your code will be evaluated for both correctness and performance (in comparison to a naive evaluator based on iterators and nested-loop joins).
<h2>Parsing SQL</h2>
<h1>Parsing SQL</h1>
A parser converts a human-readable string into a s<span style="line-height: 1.5;">tructured representation of the program (or query) that the string describes. A fork of the <a href="http://jsqlparser.sourceforge.net">JSQLParser</a> open-source SQL parser (JSQLParser) will be provided for your use.  The JAR may be downloaded from</span>
<p style="text-align: center;"><a href="http://odin.cse.buffalo.edu/resources/jsqlparser/jsqlparser.jar">http://odin.cse.buffalo.edu/resources/jsqlparser/jsqlparser.jar</a></p>
And documentation for the fork is available at
@ -34,9 +34,9 @@ At this point, you'll need to figure out what kind of statement you're dealing w
} else if(statement instanceof CreateTable) {
// and so forth
}</pre>
<h4>Example</h4>
<h2>Example</h2>
<iframe src="https://www.youtube.com/embed/U4TyaHTJ3Zg" width="420" height="315" frameborder="0" allowfullscreen="allowfullscreen"></iframe>
<h2>Expressions</h2>
<h1>Expressions</h1>
JSQLParser includes an object called Expression that represents a primitive-valued expression parse tree.  In addition to the parser, we are providing a collection of classes for manipulating and evaluating Expressions.  The JAR may be downloaded from
<p style="text-align: center;"><a href="http://odin.cse.buffalo.edu/resources/expressionlib/expression.jar"><small>http://odin.cse.buffalo.edu/resources/expressionlib/expression.jar</small></a></p>
<p style="text-align: left;"> Documentation for the library is available at</p>
@ -49,7 +49,7 @@ JSQLParser includes an object called Expression that represents a primitive-valu
}</pre>
<p style="text-align: left;">After doing this, you can use Eval.eval() to evaluate any expression in the context of tuple.</p>
<h2 style="text-align: left;">Source Data</h2>
<h1 style="text-align: left;">Source Data</h1>
Because you are implementing a query evaluator and not a full database engine, there will not be any tables -- at least not in the traditional sense of persistent objects that can be updated and modified. Instead, you will be given a <strong>Table Schema</strong> and a <strong>CSV File</strong> with the instance in it. To keep things simple, we will use the <tt>CREATE TABLE</tt> statement to define a relation's schema. You do not need to allocate any resources for the table in reaction to a <tt>CREATE TABLE</tt> statement -- Simply save the schema that you are given for later use. Sql types (and their corresponding java types) that will be used in this project are as follows:
<table>
<tbody>
@ -90,7 +90,7 @@ That means that the data directory contains a data file called 'R.dat' that migh
1|2|6
2|3|7</pre>
Each line of text (see <tt>java.io.BufferedReader.readLine()</tt>) corresponds to one row of data. Each record is delimited by a vertical pipe '|' character.  Integers and floats are stored in a form recognized by Javas Long.parseLong() and Double.parseDouble() methods. Dates are stored in YYYY-MM-DD form, where YYYY is the 4-digit year, MM is the 2-digit month number, and DD is the 2-digit date. Strings are stored unescaped and unquoted and are guaranteed to contain no vertical pipe characters.
<h2>Queries</h2>
<h1>Queries</h1>
Your code is expected to support both aggregate and non-aggregate queries with the following features.  Keep in mind that this is only a minimum requirement.
<ul>
<li>Non-Aggregate Queries
@ -146,7 +146,7 @@ Your code is expected to support both aggregate and non-aggregate queries with t
</ul>
</li>
</ul>
<h2>Output</h2>
<h1>Output</h1>
Your code is expected output query results in the same format as the input data:
<ul>
<li>One output row per ('\n'-delimited) line.  If there is no ORDER BY clause, you may emit the rows in any order.</li>
@ -154,7 +154,7 @@ Your code is expected output query results in the same format as the input data
<li>A trailing newline as the last character of the file.</li>
<li>You should not output any header information or other formatting.</li>
</ul>
<h2>Example Queries and Data</h2>
<h1>Example Queries and Data</h1>
These are only examples.  Your code will be expected to handle these queries, as well as others.
<a href="http://odin.cse.buffalo.edu/resources/cse562/Sanity_Check_Examples.tgz">Sanity Check Examples</a>: A thorough suite of test cases covering most simple query features.
@ -187,7 +187,7 @@ Queries that conform to the specifications for this project include: Q1, Q3, Q5,
<li>Q21 uses EXISTS, NOT EXISTS and SubSelects</li>
<li>Q22 requires an implementation of the SUBSTRING function, IN, NOT EXISTS and SubSelects</li>
</ul>
<h2 style="text-align: left;">Code Submission</h2>
<h1 style="text-align: left;">Code Submission</h1>
As before, all .java files in the src directory at the root of your repository will be compiled (and linked against JSQLParser). Also as before, the class
<pre> edu.buffalo.cse562.Main
</pre>
@ -215,7 +215,7 @@ $&gt; java -cp build:jsqlparser.jar edu.buffalo.cse562.Main --data data query.sq
Once again, the data directory contains files named table name.dat where table name is the name used in a CREATE TABLE statement. Notice the effect of CREATE TABLE statements is not to create a new file, but simply to link the given schema to an existing .dat file. These files use vertical-pipe (|) as a field delimiter, and newlines (\n) as record delimiters.
The testing environment is configured with the Sun JDK version 1.8.
<h2>Grading</h2>
<h1>Grading</h1>
Your code will be subjected to a sequence of test cases, most of which are provided in the project code (though different data will be used). Two evaluation phases will be performed. Phase 1 will be performed on small datasets (&lt; 100 rows per input table) and each run will be graded on a per-test-case basis as follows:
<ul>
<li><strong>0/10 (F)</strong>: Your submission does not compile, does not produce correct output, or fails in some other way. Resubmission is highly encouraged.</li>

View File

@ -16,14 +16,14 @@
<hr />
<h2>Join Ordering</h2>
<h1>Join Ordering</h1>
<p style="text-align: justify;">The order in which you join tables together is <strong>incredibly important</strong>, and can change the runtime of your query by <strong>multiple orders of magnitude</strong>.  Picking between different join orderings is incredibly important!  However, to do so, you will need statistics about the data, something that won't really be feasible until the next project.  Instead, here's a present for those of you paying attention.  The tables in each FROM clause are ordered so that you will get our recommended join order by building a <em>left-deep plan</em> going in-order of the relation list (something that many of you are doing already), and (for hybrid hash joins) using the left-hand-side relation to build your hash table.</p>
<h2><span id="LC33" class="line">Blocking Operators and Memory</span></h2>
<h1><span id="LC33" class="line">Blocking Operators and Memory</span></h1>
<p style="text-align: justify;"><span id="LC35" class="line">Blocking operators (e.g., joins other than Merge Join, the Sort operator, etc...) are generally blocking because they need to materialize instances of a relation. For half of this project, you will not have enough memory available to materialize a full relation, to say nothing of join results. To successfully process these queries, you will need to implement out-of core equivalents of these operators: At least one External Join (e.g., Block-Nested-Loop, Hash, or Sort/Merge Join) and an out-of-core Sort Algorithm (e.g., External Sort).</span></p>
<p style="text-align: justify;"><span id="LC37" class="line">For your reference, the evaluation machines have 2GB of memory.  In phase 2,  Java will be configured for 1<strong>00 MB of heap space </strong>(see the command line argument -Xmx).  To work with such a small amount of heap space, <strong>you will need to manually invoke Java's garbage collector</strong> by calling <tt>System.gc()</tt>.  How frequently you do this is up to you.  The more you wait, the greater the chance that you'll run out of memory.  The reference implementation calls it in the Two-Phase sort operator, every time it finishes flushing a file out to disk. </span></p>
<h2>Query Rewriting</h2>
<h1>Query Rewriting</h1>
<p style="text-align: justify;">In Project 1, you were encouraged to parse SQL into a relational algebra tree.  Project 2 is where that design choice begins to pay off.  We've discussed expression equivalences in relational algebra, and identified several that are always good (e.g., pushing down selection operators). The reference implementation uses some simple recursion to identify patterns of expressions that can be optimized and rewrite them.  For example, if I wanted to define a new HashJoin operator, I might go through and replace every qualifying Selection operator sitting on top of a CrossProduct operator with a HashJoin.</p>
<pre class="prettyprint">if(o instanceof Selection){
@ -66,7 +66,7 @@ return o;</pre>
ret.add(e);
}
}</pre>
<h2>Interface</h2>
<h1>Interface</h1>
<p style="text-align: justify;">Your code will be evaluated in exactly the same way as Project 1.  Your code will be presented with a 1GB (SF 1) TPC-H dataset.  Grading will proceed in two phases.  In the first phase, you will have an unlimited amount of memory, but very tight time constraints.  In the second phase, you will have slightly looser time constraints, but will be limited to 100 MB of memory, and presented with either a 1GB or a 200 MB (SF 0.2) dataset.</p>
<p style="text-align: justify;">As before, your code will be invoked with the data directory and the relevant SQL files. An additional parameter will be used in Phase 2:</p>
@ -85,7 +85,7 @@ This example uses the following directories and files:
<li><tt>[swap]</tt>: A temporary directory for an individual run. This directory will be emptied after every trial.</li>
<li><tt>[sqlfileX]</tt>: A file containing CREATE TABLE and SELECT statements, defining the schema of the dataset and the query to process</li>
</ul>
<h2>Grading</h2>
<h1>Grading</h1>
<p style="text-align: justify;">Your code will be subjected to a sequence of test cases and evaluated on speed and correctness.  Note that unlike Project 1, you will neither receive a warning about, nor partial credit for out-of-order query results if the outermost query includes an ORDER BY clause.</p>
<p style="text-align: justify;">Phase 1 (big queries) will be graded on a TPC-H SF 1 dataset (1 GB of raw text data).  Phase 2 (limited memory) will be graded on either a TPC-H SF 1 or SF 0.2 (200 MB of raw text data) dataset as listed in the chart below.  Grades are assigned based on per-query thresholds:</p>

View File

@ -16,7 +16,7 @@
<hr />
<h2>BerkeleyDB</h2>
<h1>BerkeleyDB</h1>
<p style="text-align: justify;">For this project, you will get access to a new library: BerkeleyDB (Java Edition).  Don't let the name mislead you, BDB is not actually a full database system.  Rather, BDB implements the indexing and persistence layers of a database system.  Download BDB at:</p>
<p style="text-align: center;"><a href="http://odin.cse.buffalo.edu/resources/berkeleydb/berkeleydb.jar">http://odin.cse.buffalo.edu/resources/berkeleydb/berkeleydb.jar</a></p>
<p style="text-align: justify;">The BerkeleyDB documentation is mirrored at:</p>
@ -27,11 +27,11 @@ And the javadoc at:
<p style="text-align: center;"><a href="http://odin.cse.buffalo.edu/resources/berkeleydb/java/">http://odin.cse.buffalo.edu/resources/berkeleydb/java/</a></p>
<p style="text-align: justify;">BDB can be used in two ways: The Direct Persistence layer, and the Base API.  The <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide/dpl.html">Direct Persistence Layer</a> is easier to use at first, as it handles index management and serialization through compiler annotations.  However, this ease comes at the cost of flexibility.  Especially if you plan to use secondary indexes, you may find it substantially easier to work with the <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide/baseapi.html">Base API</a>.  For this reason, this summary will focus on the Base API.</p>
<h2 style="text-align: justify;">Environments and Databases</h2>
<h1 style="text-align: justify;">Environments and Databases</h1>
<p style="text-align: justify;">A relation or table is represented in BDB as a <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide/databases.html#DBOpen">Database</a>, which is grouped into units of storage called an <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide/env.html">Environment</a>.  The first thing that you should to do in the pre-computation phase is to <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide/databases.html#DBOpen">create an Environment and one or more Databases</a>.  <strong>Be absolutely sure to <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide/databases.html#dbclose">close both the environment and the database</a> before you exit</strong>, as not doing so could lead to file corruption.</p>
<p style="text-align: justify;">BDB Databases are in effect clustered indexes, which means that every record stored in one is identified (and sorted by) a key.  A database supports efficient access to records or ranges of records based on their keys.</p>
<h2 style="text-align: justify;">Representing, Storing, and Reading Tuples</h2>
<h1 style="text-align: justify;">Representing, Storing, and Reading Tuples</h1>
<p style="text-align: justify;">Every tuple must be marked with a primary key, and may include one or more secondary keys.  In t<span style="line-height: 1.5;">he Base API, both the value and its key are represented as a string of bytes.  Both key and value must be </span><a style="line-height: 1.5;" href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide/DBEntry.html#usingDbEntry">stored as a byte array encapsulated in a DatabaseEntry object</a><span style="line-height: 1.5;">.  Secondary Keys are defined when creating a secondary index.</span></p>
<p style="text-align: justify;">Note that you will need to manually extract the key from the rest of the record and write some code to serialize the record and the key into byte arrays.  You could use <span style="line-height: 1.5;">toString(), but you may find it substantially faster to use Java's native object serialization:</span></p>
<p style="text-align: center;"><a href="http://docs.oracle.com/javase/8/docs/api/java/io/ObjectOutputStream.html">ObjectOutputStream </a> |  <a href="http://docs.oracle.com/javase/8/docs/api/java/io/ObjectInputStream.html">ObjectInputStream</a></p>
@ -39,23 +39,23 @@ And the javadoc at:
<p style="text-align: center;"><a href="http://docs.oracle.com/javase/8/docs/api/java/io/DataOutputStream.html">DataOutputStream</a>  |  <a href="http://docs.oracle.com/javase/8/docs/api/java/io/DataInputStream.html">DataInputStream</a></p>
<p style="text-align: justify;">Like a Hash-Map, BDB supports a <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide/usingDbt.html">simple get/put interface</a>.  Tuples can be stored or looked up by their key.  Like your code, BDB also provides an iterator interface called a <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide/Cursors.html">Cursor</a>.  Of note, BDB's cursor interface supports <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide/Positioning.html#cursorsearch">index lookups</a>.</p>
<h2 style="text-align: justify;">Secondary Indexes</h2>
<h1 style="text-align: justify;">Secondary Indexes</h1>
<p style="text-align: justify;">The Database represents a clustered index.  In addition, BDB has support for unclustered indexes, which it calls <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide/indexes.html">SecondaryDatabases</a>. As an unclustered index, a secondary database doesn't dictate how the tuples themselves are laid out, but still allows for (mostly) efficient lookups for secondary "keys".  The term "keys" is in quotation marks, because unlike the primary key used in the primary database, a secondary database allows for multiple records with the same secondary key.</p>
<p style="text-align: justify;">To automate the management process, a secondary index is defined using an implementation of <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide/keyCreator.html">SecondaryKeyCreator</a>.  This class should map record DatabaseEntry objects to a (not necessarily unique) DatabaseEntry object that acts as a secondary key.</p>
<h2 style="text-align: justify;">BDB Joins</h2>
<h1 style="text-align: justify;">BDB Joins</h1>
<p style="text-align: justify;">Another misnomer, BDB allows you to define so-called <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide/joins.html">Join Cursors</a>. This is <strong>not</strong> a relational join in the traditional sense.   Rather, a Join Cursor allows you to define multiple <strong>equality</strong> predicates over the base relation and scan over all records that match all of the specified lookup conditions.</p>
<h2 style="text-align: justify;">Performance Tuning</h2>
<h1 style="text-align: justify;">Performance Tuning</h1>
<p style="text-align: justify;">BerkeleyDB can be quite tricky to get performance out of.  There are a number of options, and ways of interacting with it that can help you get the most out of this indexing software.  Since evaluation on the grading boxes takes time due to the end-to-end testing process, I encourage you to evaluate on your own machines.  For best results, be sure to store your database on an HDD (Results from SSDs will not be representative of the grading boxes).  Recall that the grader boxes have 4 GB of RAM.</p>
<h4 style="text-align: justify;">Heap Scans</h4>
<h2 style="text-align: justify;">Heap Scans</h2>
<p style="text-align: justify;">Depending on how you've implemented deserialization of the raw data files, you may find it faster to read directly from the clustered index rather than from the data file.  In the reference implementation, reading from a clustered index is about twice as fast as from a data file, but this performance boost stems from several factors.  If you choose to do this, take a look at <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/java/com/sleepycat/je/DiskOrderedCursor.html">DiskOrderedCursor</a>, which my experiments show is roughly about twice as fast as a regular in-order Cursor on an HDD on a fully compacted relation.</p>
<h4 style="text-align: justify;">Locking Policies</h4>
<h2 style="text-align: justify;">Locking Policies</h2>
<p style="text-align: justify;">Locking is slow.  Consistency is slow.  As long as you're not implementing your code multithreaded or with updates or transactions, you'll find that cursor operations will be faster under <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/java/com/sleepycat/je/LockMode.html">LockMode</a>.<a href="http://odin.cse.buffalo.edu/resources/berkeleydb/java/com/sleepycat/je/LockMode.html#READ_UNCOMMITTED">READ_UNCOMMITTED</a>.  See below for ways to set this parameter globally.</p>
<h4 style="text-align: justify;">Config Options</h4>
<h2 style="text-align: justify;">Config Options</h2>
<p style="text-align: justify;">BDB also has numerous options that will affect the performance of your system.  Several options you may wish to evaluate, both for the load and run phases:</p>
<ul>
@ -91,7 +91,7 @@ And the javadoc at:
<hr />
<h2>Interface</h2>
<h1>Interface</h1>
<p style="text-align: justify;">Your code will be evaluated in exactly the same way as Projects 1 and 2.  Your code will be presented with a 500MB (SF 0.5) TPC-H dataset.  Before grading begins, your code will be run once to preprocess the data.  You will have up to 5 minutes, after which your process will be killed (if it has not yet terminated).  Your code will then be run on the test suite.</p>
<p style="text-align: justify;">As before, your code will be invoked with the data directory and the relevant SQL files. Two additional parameters will be used in the preprocessing stage:</p>
@ -111,7 +111,7 @@ This example uses the following directories and files:
<li><tt>[db]</tt>: A directory for permanent data files.  This directory will be persisted across all runs of the</li>
<li><tt>[sqlfileX]</tt>: A file containing CREATE TABLE and SELECT statements, defining the schema of the dataset and the query to process.  If --load appears on the command line, these files will contain only CREATE TABLE statements.</li>
</ul>
<h2>Grading</h2>
<h1>Grading</h1>
<p style="text-align: justify;">Your code will be subjected to a sequence of test cases and evaluated on speed and correctness.</p>
<ul>

View File

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 28 KiB

View File

Before

Width:  |  Height:  |  Size: 22 KiB

After

Width:  |  Height:  |  Size: 22 KiB

View File

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 26 KiB

View File

Before

Width:  |  Height:  |  Size: 50 KiB

After

Width:  |  Height:  |  Size: 50 KiB

View File

Before

Width:  |  Height:  |  Size: 38 KiB

After

Width:  |  Height:  |  Size: 38 KiB

View File

Before

Width:  |  Height:  |  Size: 21 KiB

After

Width:  |  Height:  |  Size: 21 KiB

View File

Before

Width:  |  Height:  |  Size: 22 KiB

After

Width:  |  Height:  |  Size: 22 KiB

View File

Before

Width:  |  Height:  |  Size: 29 KiB

After

Width:  |  Height:  |  Size: 29 KiB

View File

Before

Width:  |  Height:  |  Size: 33 KiB

After

Width:  |  Height:  |  Size: 33 KiB

View File

Before

Width:  |  Height:  |  Size: 70 KiB

After

Width:  |  Height:  |  Size: 70 KiB

View File

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 18 KiB

View File

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

View File

@ -1,4 +1,4 @@
<h6 style="text-align: center;">Spring 2015</h6>
<h1 style="text-align: center;">Spring 2015</h1>
<p style="text-align: justify;">Data Management Systems (including Relational Databases, Non-Relational Databases, and NoSQL storage systems) form the basis of the Big Data Economy we now live in.  A data management system is responsible for storing data, enabling efficient access to that data, as well as mediating concurrent modifications.  This class approaches the challenges of designing a data management system from a standpoint that is both principled and practical.  The course revolves around a term-long programming assignment, in which you will build a system that answers SQL queries efficiently.  Course lectures will focus on the conceptual basis for this system, and will discuss how the techniques you learn generalize (e.g., to the use of NoSQL systems)</p>
In this course, you will learn...
<ul>
@ -9,7 +9,7 @@ In this course, you will learn...
<li>... how to recover state after software and hardware failures.</li>
<li>... how to query and update distributed data consistently.</li>
</ul>
<h4>Course Details</h4>
<h2>Course Details</h2>
<ul>
<li><strong>Class</strong>: M/W/F, 12:00-12:50 PM in <a href="http://www.buffalo.edu/buildings/building?id=nsc">NSC 201</a></li>
<li><strong>Class Forum</strong>: <a href="https://piazza.com/class/i4xda6rvshkgk">Piazza</a></li>
@ -21,7 +21,7 @@ In this course, you will learn...
<li>Ning Deng (TA Lounge, Tue 9:00-11:00)</li>
</ul>
</li>
<li><strong>Project Submission</strong>:<a href="http://odin.cse.buffalo.edu/cse562"> http://odin.cse.buffalo.edu/cse562</a></li>
<li><strong>Project Submission</strong>:<a href="http://dubstep.odin.cse.buffalo.edu"> http://dubstep.odin.cse.buffalo.edu</a></li>
<li><strong>Project Groups</strong>: 1-3 people</li>
<li><strong>Grading</strong>:
<ul>
@ -34,22 +34,22 @@ In this course, you will learn...
</li>
<li>50% projects
<ul>
<li>5% <a title="Checkpoint 0" href="http://odin.cse.buffalo.edu/teaching/cse-562/checkpoint-0/">Project 0</a> due on Feb. 6</li>
<li>15% <a title="Checkpoint 1" href="http://odin.cse.buffalo.edu/teaching/cse-562/checkpoint-1/">Project 1</a> due on Feb. 23 (code-review after)</li>
<li>15% <a title="Checkpoint 2" href="http://odin.cse.buffalo.edu/teaching/cse-562/checkpoint-2/">Project 2</a> due on Mar. 30 (code-review after)</li>
<li>15% <a title="Checkpoint 3" href="http://odin.cse.buffalo.edu/teaching/cse-562/checkpoint-3/">Project 3</a> due on May 8 (code-review after)</li>
<li>5% <a title="Checkpoint 0" href="checkpoint0.html">Project 0</a> due on Feb. 6</li>
<li>15% <a title="Checkpoint 1" href="checkpoint1.html">Project 1</a> due on Feb. 23 (code-review after)</li>
<li>15% <a title="Checkpoint 2" href="checkpoint2.html">Project 2</a> due on Mar. 30 (code-review after)</li>
<li>15% <a title="Checkpoint 3" href="checkpoint3.html">Project 3</a> due on May 8 (code-review after)</li>
</ul>
</li>
</ul>
</li>
</ul>
<h4>Library Documentation</h4>
<h2>Library Documentation</h2>
<ul>
<li><strong>JSqlParser</strong> (<a href="http://odin.cse.buffalo.edu/resources/jsqlparser">JavaDoc</a>, <a href="https://youtu.be/U4TyaHTJ3Zg">Demo</a>)</li>
<li><strong>ExpressionLib</strong> (<a href="http://odin.cse.buffalo.edu/resources/expressionlib">JavaDoc</a>)</li>
<li><strong>BerkeleyDB</strong> (<a href="http://odin.cse.buffalo.edu/resources/berkeleydb/java/index.html">JavaDoc</a>, <a href="http://odin.cse.buffalo.edu/resources/berkeleydb/GettingStartedGuide">Guide</a>)</li>
</ul>
<h4>Lecture Schedule</h4>
<h2>Lecture Schedule</h2>
<ul>
<li><em>Jan. 26</em>:  Intro and Outline (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/01-IntroAndStructure">Slides</a>, <a href="http://youtu.be/lsw_4p9TSLw">Video</a>)</li>
<li><em>Jan. 28</em>: Relational Algebra 1/2 (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/02-RA">Slides</a>, <a href="http://youtu.be/h9L7j5Q4W_I">Video</a>, <a href="https://piazza.com/class_profile/get_resource/i4xda6rvshkgk/i5xb15qumpn2il">Example DB</a>)</li>
@ -58,7 +58,7 @@ In this course, you will learn...
<li><em>Feb. 4</em>: Translating SQL to Relational Algebra (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/05-TranslatingSQL">Slides</a>)</li>
<li><em>Feb. 6</em>: Evaluating Relational Algebra (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/06-EvaluatingRA">Slides</a>, <a href="http://youtu.be/lee20kp_zPw">Video</a>)</li>
<li><em>Feb. 9</em>: Extended Relational Algebra (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/07-ExtendedRA">Slides</a>, <a href="http://youtu.be/2JxV8-gF-dw">Video</a>)</li>
<li><em>Feb. 11</em>: <a title="Checkpoint 1" href="http://odin.cse.buffalo.edu/teaching/cse-562/checkpoint-1/">Project 1</a> Review (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/08-Project1Review">Slides</a>, <a href="http://youtu.be/y7i4STVgI9k">Video</a>)</li>
<li><em>Feb. 11</em>: <a title="Checkpoint 1" href="checkpoint1.html">Project 1</a> Review (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/08-Project1Review">Slides</a>, <a href="http://youtu.be/y7i4STVgI9k">Video</a>)</li>
<li><em>Feb. 13</em>: Data Modeling - The E/R Model (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/09-DataModeling">Slides</a>, <a href="http://youtu.be/O233R8_XnZM">Video</a>)</li>
<li><em>Feb 16</em>: Data Modeling - Constraints (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/10-DataModeling">Slides</a>, <a href="http://youtu.be/DXaf0hNmD8s">Video</a>)</li>
<li><em>Feb 18</em>: Query Optimization (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/11-Rewrites">Slides</a>, <a href="http://youtu.be/WNsQB-0r7G4">Video</a>)</li>
@ -68,7 +68,7 @@ In this course, you will learn...
<li><em>Feb 27</em>: Out-of-Core Algorithms (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/15-ExternalSort">Slides</a>, <a href="http://youtu.be/wo9U6qX6-R4">Video</a>)</li>
<li><em>Mar 2</em>: Midterm 1 Review (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/16-Review">Slides</a>, <span style="text-decoration: underline;">No Video</span>)</li>
<li><em>Mar 4</em>: Midterm 1 (<a href="http://odin.cse.buffalo.edu/resources/cse562/test_sp2015_m1_solutions.pdf">Solutions</a>)</li>
<li><em>Mar 6</em>: <a title="Checkpoint 2" href="http://odin.cse.buffalo.edu/teaching/cse-562/checkpoint-2/">Project 2</a> Review (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/17-Project2Review">Slides</a>, <a href="https://youtu.be/vv38YCSO80g">Video</a>)</li>
<li><em>Mar 6</em>: <a title="Checkpoint 2" href="checkpoint2.html">Project 2</a> Review (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/17-Project2Review">Slides</a>, <a href="https://youtu.be/vv38YCSO80g">Video</a>)</li>
<li><em>Mar 9</em>: Cost-Based-Optimization (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/18-CostBasedOptimization">Slides</a>, <a href="https://youtu.be/GFKt089eGls">Video</a>)</li>
<li><em>Mar 11</em>: Cost-Based-Optimization (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/19-CostBasedOptimization">Slides</a>, <span style="text-decoration: underline;">No Video</span>)</li>
<li><em>Mar 13</em>: Storage/Serialization (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/20-LowLevelLayout">Slides</a>, <a href="https://youtu.be/uup4TrtmXKg">Video</a>)</li>
@ -77,7 +77,7 @@ In this course, you will learn...
<li><em>Mar 25</em>: Locking (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/22-TransactionLocking">Slides</a>)</li>
<li><em>Mar 27</em>: Deadlock Management (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/23-TransactionDeadlock">Slides</a>, <a href="https://youtu.be/euQ4mwoQLyk">Video</a>)</li>
<li><em>Mar 30</em>: Optimistic Concurrency Control (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/24-TransactionOCC">Slides</a>)</li>
<li><em>Apr 1</em>: <a title="Checkpoint 3" href="http://odin.cse.buffalo.edu/teaching/cse-562/checkpoint-3/">Project 3</a> Review (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/25-Project3Review">Slides</a>)</li>
<li><em>Apr 1</em>: <a title="Checkpoint 3" href="checkpoint3.html">Project 3</a> Review (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/25-Project3Review">Slides</a>)</li>
<li><em>Apr 3</em>: Logging (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/26-Logging">Slides</a>)</li>
<li><em>Apr 6</em>: Midterm 2 Content Review (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/27-Review">Slides</a>)</li>
<li><em>Apr. 8</em>: Midterm 2 (<a href="http://odin.cse.buffalo.edu/resources/cse562/test_sp2015_m2_solutions.pdf">Solutions</a>)</li>
@ -96,10 +96,10 @@ In this course, you will learn...
<li><em>May 8</em>: Final Review 3 (<a href="http://odin.cse.buffalo.edu/slides/cse562sp2015/40-FinalReview3/">Slides</a>)</li>
<li><em>May 14</em>: Final Exam 4 PM</li>
</ul>
<h4>Content Outline</h4>
<h2>Content Outline</h2>
<ul>
<li><a title="Checkpoint 0" href="http://odin.cse.buffalo.edu/teaching/cse-562/checkpoint-0/">Project 0</a> - Basic Setup</li>
<li><a title="Checkpoint 1" href="http://odin.cse.buffalo.edu/teaching/cse-562/checkpoint-1/">Project 1</a> - Infrastructure &amp; Evaluation
<li><a title="Checkpoint 0" href="checkpoint0.html">Project 0</a> - Basic Setup</li>
<li><a title="Checkpoint 1" href="checkpoint1.html">Project 1</a> - Infrastructure &amp; Evaluation
<ul>
<li><strong>Relational Algebra</strong> (Ch 2.4, 5.1)</li>
<li><strong>SQL</strong> (Ch 2.3 and 6.1-6.4)</li>
@ -107,7 +107,7 @@ In this course, you will learn...
<li><strong>Data Modeling</strong> (Ch 2.1-2.2)</li>
</ul>
</li>
<li><a title="Checkpoint 2" href="http://odin.cse.buffalo.edu/teaching/cse-562/checkpoint-2/">Project 2</a> - Optimization &amp; External Algorithms
<li><a title="Checkpoint 2" href="checkpoint2.html">Project 2</a> - Optimization &amp; External Algorithms
<ul>
<li><strong>Algebraic Query Optimization</strong> (Ch 16.2)</li>
<li><strong>Join Algorithms</strong> (Ch 15.4, 15.5)</li>
@ -116,7 +116,7 @@ In this course, you will learn...
<li><strong>Physical Plans</strong> (Ch 16.7)</li>
</ul>
</li>
<li><a title="Checkpoint 3" href="http://odin.cse.buffalo.edu/teaching/cse-562/checkpoint-3/">Project 3</a> - Indexing &amp; Physical Layout
<li><a title="Checkpoint 3" href="checkpoint3.html">Project 3</a> - Indexing &amp; Physical Layout
<ul>
<li><strong>The Memory Hierarchy</strong> (Ch 13.1-13.3)</li>
<li><strong>Physical Design</strong> (Ch 13.5-13.7)</li>
@ -139,14 +139,14 @@ In this course, you will learn...
</ul>
</li>
</ul>
<h4>Academic Integrity</h4>
<h2>Academic Integrity</h2>
<p style="text-align: justify;">Students may discuss and advise one another on their lab projects, but groups are expected to turn in their own work.  Discussing concepts is permitted.  Referencing another group's code is not.  Cheating on any course deliverable will result in an automatic grade of F in the course.  The University's policy on academic integrity can be reviewed at:</p>
<p style="text-align: center;"><a href="http://grad.buffalo.edu/Academics/Policies-Procedures/Academic-Integrity.html">The Graduate School Academic Integrity Policy</a></p>
<h4>Medical Emergencies</h4>
<h2>Medical Emergencies</h2>
<p style="text-align: justify;">Accommodations for medical emergencies will be made on a case-by-case basis.  Requests for extensions based on medical emergencies must be accompanied by documentation of the emergency from student health services:</p>
<p style="text-align: center;"><a href="http://www.student-affairs.buffalo.edu/shs/student-health/">Student Health Services</a></p>
<h4 style="text-align: left;">Accessibility Resources</h4>
<h4 style="text-align: left;">Accessibility Resources</h2>
<p style="text-align: justify;">If you have a diagnosed disability (physical, learning, or psychological) that will make it difficult for you to carry out the course work as outlined, or that requires accommodations such as recruiting note-takers, readers, or extended time on exams or assignments, please advise the instructor during the first two weeks of the course so that we may review possible arrangements for reasonable accommodations. In addition, if you have not yet done so, contact:</p>
<p style="text-align: center;"><a href="http://www.student-affairs.buffalo.edu/ods/">The Office of Accessibility Resources</a>.</p>

View File

@ -1,16 +0,0 @@
<h2>CSE 562 - Graduate Databases</h2>
<ul>
<li><a title="CSE 562" href="http://odin.cse.buffalo.edu/cse-562/">Spring 2015</a></li>
<li><a href="https://piazza.com/class/hbnssrlgj3o2xl">Spring 2014</a></li>
<li><a href="https://piazza.com/buffalo/spring2013/cse562/home">Spring 2013</a></li>
</ul>
<h2>CSE 662 - Langs. &amp; Runtimes for Big Data</h2>
<ul>
<li><a href="https://piazza.com/buffalo/fall2015/cse662/home">Fall 2015</a></li>
</ul>
<h2>CSE 704 - Seminar</h2>
<ul>
<li><a href="https://piazza.com/buffalo/fall2014/cse704/home">Fall 2014 - DB, PL, and Data Structures</a></li>
<li><a href="https://piazza.com/buffalo/fall2013/cse704/home">Fall 2013 - Streaming, Incrementa, and Online Data Processing</a></li>
<li><a href="http://www.cse.buffalo.edu/~okennedy/courses/cse704fa2012.html">Fall 2012 - Web-Scale Data Management</a></li>
</ul>

15
src/teaching/index.md Normal file
View File

@ -0,0 +1,15 @@
# CSE 562 - Graduate Databases
* <a title="CSE 562" href="cse-562/index.html">Spring 2015</a>
* <a href="https://piazza.com/class/hbnssrlgj3o2xl">Spring 2014</a>
* <a href="https://piazza.com/buffalo/spring2013/cse562/home">Spring 2013</a>
# CSE 662 - Langs. &amp; Runtimes for Big Data
* <a href="https://piazza.com/buffalo/fall2015/cse662/home">Fall 2015</a>
# CSE 704 - Seminar
* <a href="https://piazza.com/buffalo/fall2014/cse704/home">Fall 2014 - DB, PL, and Data Structures</a>
* <a href="https://piazza.com/buffalo/fall2013/cse704/home">Fall 2013 - Streaming, Incrementa, and Online Data Processing</a>
* <a href="http://www.cse.buffalo.edu/~okennedy/courses/cse704fa2012.html">Fall 2012 - Web-Scale Data Management</a>

65
stages/build-pubs.js Normal file
View File

@ -0,0 +1,65 @@
module.exports = plugin;
function plugin() {
return function (files, smith, done){
var lab = smith.metadata().odinLab.members.concat(
smith.metadata().odinLab.alumni);
var pubs = smith.metadata().okennedy.data.publications.concat(
smith.metadata().altPubs);
var venues = smith.metadata().okennedy.venues;
var pubsByYear = {}
for(i in pubs){
var pub = pubs[i]
var venue = venues[pub.venue] || {}
if(pub.type == "patent"){ continue; }
if(venue.type == "techreport") {
if(pub.venue != "ArXiv"){ continue; }
}
var authorFormat =
pub.authors
.map(function(author) {
if(lab.findIndex(function(member, idx, ignore) { return author == member }) >= 0) {
return "<span class='lab_member'>"+author.replace(/ /, "&nbsp;")+"</span>"
} else {
return author.replace(/ /, "&nbsp;")
}
})
.join(", ")
if(typeof pub.year == 'undefined'){
console.log(pub);
throw "Unknown year for "+pub
}
if(typeof pubsByYear[pub.year] == 'undefined') {
pubsByYear[pub.year] = []
}
var resourcesFormat = ""
if(typeof pub.urls == 'object') {
var resources = []
for(cat in pub.urls){
resources.push("<a href="+pub.urls[cat]+">"+cat+"</a>")
}
resourcesFormat = "(&nbsp;"+resources.join("&nbsp;|&nbsp;")+"&nbsp;)";
}
pubsByYear[pub.year].push(
{
title: pub.title,
authorFormat: authorFormat,
authors: pub.authors,
venue: pub.venue+" "+pub.year,
resourcesFormat: resourcesFormat
}
)
}
var out = []
for(i in pubsByYear) {
out.push({year: i, pubs: pubsByYear[i]})
}
// console.log(out)
smith.metadata()["allpubs"] = out.reverse()
done()
}
}

37
stages/render-tex.js Normal file
View File

@ -0,0 +1,37 @@
module.exports = plugin;
function plugin(){
var mjAPI = require("mathjax-node/lib/mj-page.js");
var renderOneTex = function(file, cnt)
{
// console.log(file)
mjAPI.typeset({
html: file.contents,
renderer: "NativeMML",
inputs: ["TeX"],
xmlns: "mml",
singleDollars: true
}, function(result) {
"use strict";
file.contents = new Buffer(result.html)
cnt.count --;
if(cnt.count <= 0){ cnt.done() }
});
}
mjAPI.start();
return function renderTeX(files, smith, done)
{
var cnt = { count: 1, done: done };
for(f in files){
cnt.count ++;
renderOneTex(files[f], cnt);
}
cnt.count --;
if(cnt.count <= 0){ cnt.done() }
}
}