Merge branch 'master' of gitlab.odin.cse.buffalo.edu:odin-lab/Website

pull/1/head
Oliver Kennedy 2017-08-25 10:10:51 -04:00
commit 247d4933d1
21 changed files with 18546 additions and 117 deletions

View File

@ -193,32 +193,52 @@ task :collab => "artifacts/okennedy_collaborators.txt"
task :conflicts => "artifacts/okennedy_collaborators.txt"
file "artifacts/oracle_papers.txt" => [ $db.files, "artifacts", "Rakefile" ].flatten do
File.open("artifacts/oracle_papers.txt", "w+") do |fh|
fh.puts $db["publications"].
where { |pub| pub.fetch("projects", []).include? "mimir" }.
map { |pub|
pub = LabMetadata::complete_venue(pub)
case pub["type"]
when "conference", "journal", "workshop", "abstract" then
[ pub["title"],
pub["authors"].join(", "),
"#{LabMetadata::venue_name(pub, size: :full_parens)} #{pub["year"]}",
pub["urls"].map { |k,v| "#{k.capitalize}: #{v}" },
LabMetadata.bibtex_for(pub).split("\n")
].flatten
when "thesis" then
[ pub["title"],
pub["authors"].join(", "),
"Thesis #{pub["degree"]} #{pub["year"]}",
pub["urls"].map { |k,v| "#{k.capitalize}: #{v}" },
LabMetadata.bibtex_for(pub).split("\n")
].flatten
else nil
end
}.
compact.
map { |record| "> #{record.join("\n ")}" }.
join("\n\n")
["mimir", "insider-threats", "astral"].each do |project|
file "artifacts/#{project}_papers.txt" => [ $db.files, "artifacts", "Rakefile" ].flatten do
File.open("artifacts/#{project}_papers.txt", "w+") do |fh|
fh.puts $db["publications"].
where { |pub| pub.fetch("projects", []).include? project }.
map { |pub| LabMetadata::format_pub_for_report(pub) }.
compact.
join("\n\n")
end
end
task :projectreport => "artifacts/#{project}_papers.txt"
end
task :report => :projectreport
file "artifacts/grant_report.txt" do
File.open("artifacts/grant_report.txt", "w+") do |fh|
$db["publications"]
.map { |pub|
pub.fetch("grants_cited", [])
.map { |grant|
[ [
grant["agency"],
grant.fetch("agency_id", grant.fetch("project_name", "???"))
], pub
]
}
}.flatten(1)
.reduce
.each { |grant, pubs|
fh.puts grant[0]+"/"+grant[1]
fh.puts "-----------"
pubs.each { |pub|
pub = LabMetadata::format_pub_for_report(pub)
fh.puts "\n"+pub unless pub.nil?
}
fh.puts "\n\n"
}
end
end
task :report => "artifacts/grant_report.txt"
file "artifacts/nsf_merit_blurb.txt" => [ $db.files, "artifacts", "Rakefile" ].flatten do
# File.open("artifacts/nsf_related_blurb.txt", "w+") do |fh|
citable_pubs =
$db["publications"]
.where { |pub| pub.fetch("grants_cited", []).index { |grant| /NSF/ =~ grant.fetch("agency", "") } }
citable_pubs.each { |pub| puts pub["title"] }
# end
end

File diff suppressed because one or more lines are too long

BIN
assets/people/carl.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

BIN
assets/people/missing.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

BIN
assets/people/shivang.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

BIN
assets/people/ting.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 123 KiB

View File

@ -102,22 +102,29 @@
],
"honors" : [
{ "description" :
"The pVLDB 2012 paper titled \"DBToaster: Higher-order Delta Processing for Dynamic, Frequently Fresh Views\" was accepted to the \"Best of VLDB 2012\" issue of the VLDB Journal",
"The pVLDB 2012 paper titled \"DBToaster: Higher-order Delta Processing for Dynamic, Frequently Fresh Views\" was accepted to the \"Best of VLDB 2012\" issue of the VLDB Journal.",
"year" : 2012,
"type" : "best-paper",
"source" : "VLDB",
"individual" : "NO"
},
{ "description" :
"Oliver received a UB CSE Outstanding Funding Award for attracting over $3m of funding to the University at Buffalo in 2016",
"Oliver received a UB CSE Outstanding Funding Award for attracting over $3m of funding to the University at Buffalo in 2016.",
"year" : 2016,
"source" : "UB-CSE",
"type" : "award",
"individual" : "YES"
},
{ "description" :
"The SIGMOD 2017 paper titled \"Beta Probabilistic Databases: A Scalable Approach to Belief Updating and Parameter Learning\" was invited to submit a Best-Of-SIGMOD extended article to the ACM Transactions on Database Systems Journal.",
"year" : 2017,
"type" : "best-paper",
"source" : "SIGMOD",
"individual" : "NO"
}
],
"reviewer" : [
{ "venue" : "VLDBJ", "years" : [ 2013 ] },
{ "venue" : "VLDBJ", "years" : [ 2013, 2017 ] },
{ "venue" : "TKDE", "years" : [ 2013, 2014 ] },
{ "venue" : "TODS", "years" : [ 2015 ] },
{ "venue" : "CSE", "years" : [ 2015 ] },
@ -181,14 +188,30 @@
"class" : "software",
"description" : "An index structure that dynamically adapts to variable workloads.",
"released" : "07/2014",
"url" : "http://github.com/UBOdin/jitd"
"url" : "http://github.com/UBOdin/jitd",
"metrics" : {
"github" : {
"watchers" : 5,
"stars" : 5,
"forks" : 16
}
},
"metricsasof" : "July 27, 2017"
},
{
"name" : "Mimir",
"class" : "software",
"released" : "01/2015",
"description" : "A probabilistic overlay for database systems",
"url" : "http://mimirdb.info"
"url" : "http://mimirdb.info",
"metrics" : {
"github" : {
"watchers" : 11,
"stars" : 8,
"forks" : 8
}
},
"metricsasof" : "July 27, 2017"
},
{
"name" : "DBToaster",

View File

@ -28,7 +28,7 @@
"role" : "Co-I",
"amount" : 114781,
"effort" : "50%",
"status" : "submitted",
"status" : "rejected",
"start" : "12/2017", "end" : "05/2021",
"type" : "grant",
"copis" : ["Barry Smith"],

View File

@ -1,17 +1,38 @@
{
"members" : {
"Oliver Kennedy" : {
"status" : "Faculty",
"ubit" : "okennedy"
"status" : "Assistant Professor",
"ubit" : "okennedy",
"pic" : {
"file" : "http://odin.cse.buffalo.edu/assets/people/oliver.jpg",
"width" : 230,
"height" : 346
},
"github" : "okennedy",
"twitter" : "xthemage",
"scholar" : "9Q9tiCsAAAAJ",
"dblp" : "hd/k/Kennedy:Oliver",
"cv" : "http://odin.cse.buffalo.edu/artifacts/okennedy.pdf"
},
"Mike Brachman" : {
"status" : "Senior Research Developer",
"ubit" : "mrb24"
"Mike Brachmann" : {
"status" : "Senior Scientific Programmer",
"ubit" : "mrb24",
"pic" : {
"file" : "https://avatars1.githubusercontent.com/u/25045473?v=4&s=460",
"width" : 200,
"height" : 200
},
"github" : "mrb24"
},
"Ting Xie" : {
"status" : "PhD",
"projects" : ["insider-threats"],
"ubit" : "tingxie"
"ubit" : "tingxie",
"pic" : {
"file" : "http://odin.cse.buffalo.edu/assets/people/ting.jpg",
"width" : 270,
"height" : 360
}
},
"Gökhan Kul" : {
"status" : "PhD",
@ -19,14 +40,24 @@
"ubit" : "gokhanku",
"advisor" : ["Shambhu Upadhyaya"],
"link" : "http://www.cse.buffalo.edu/~gokhanku/",
"joint_advisor": true
"joint_advisor": true,
"pic" : {
"file" : "https://www.cse.buffalo.edu/~gokhanku/gokhankul.png",
"width" : 150,
"height" : 150
}
},
"Duc Thanh Luong" : {
"status" : "PhD",
"projects" : ["insider-threats"],
"ubit" : "ducthanh",
"advisor" : ["Varun Chandola"],
"link" : "http://www.cse.buffalo.edu/~ducthanh/"
"link" : "http://www.cse.buffalo.edu/~ducthanh/",
"pic" : {
"file" : "https://www.cse.buffalo.edu//~ducthanh/img/profile_pic.jpg",
"width" : 160,
"height" : 220
}
},
"Poonam Kumari" : {
"status" : "PhD",
@ -38,25 +69,11 @@
"projects" : ["mimir"],
"ubit" : "wmspoth"
},
"Jon Logan" : {
"status" : "PhD",
"projects" : ["mimir"],
"email" : "jonathan.logan@cubrc.org",
"ubit" : "jmlogan",
"joint_advisor" : true,
"advisor" : ["Moises Sudit"]
},
"Aaron Huber" : {
"status" : "PhD",
"projects" : ["mimir"],
"ubit" : "ahuber"
},
"Hank Lin" : {
"status" : "BS",
"projects" : ["astral"],
"advisor" : ["Luke Ziarek"],
"joint_advisor": true
},
"Saurav Singhi" : {
"status" : "PhD",
"projects" : ["astral"],
@ -64,44 +81,77 @@
"joint_advisor": true,
"ubit" : "sauravsi"
},
"Grant Wrazen" : {
"status" : "BS",
"projects" : ["pocketdata"],
"advisor" : ["Luke Ziarek", "Geoff Challen"],
"joint_advisor": true
},
"Lisa Lu" : {
"status" : "BS",
"projects" : ["mimir"],
"ubit" : "lisalu"
"ubit" : "lisalu",
"joint_advisor" : true,
"advisor" : ["Luke Ziarek"]
},
"Gourab Mitra" : {
"status" : "MS",
"projects" : ["astral"],
"joint_advisor" : true,
"advisor" : ["Luke Ziarek"],
"ubit" : "gourabmi"
"ubit" : "gourabmi",
"link" : "https://about.me/gourabmitra",
"pic" : {
"file" : "https://aboutme.imgix.net/background/users/g/o/u/gourabmitra_1493138987_082.jpg?q=40&dpr=2&auto=format&fit=max&w=200&h=267",
"width" : 200,
"height" : 267
}
},
"Shivang Aggarwal" : {
"status" : "MS",
"projects" : ["mimir"],
"ubit" : "shivanga"
"status" : "MS",
"projects" : ["mimir"],
"ubit" : "shivanga",
"link" : "http://about.me/shivangaggarwal",
"pic" : {
"file" : "http://odin.cse.buffalo.edu/assets/people/shivang.jpg",
"width" : 216,
"height" : 199
}
},
"Darshana Balakrishnan" : {
"status" : "MS",
"projects" : ["astral"],
"ubit" : "dbalakri"
"status" : "MS",
"projects" : ["astral"],
"ubit" : "dbalakri",
"link": "https://about.me/darshanabalakrishnan",
"joint_advisor" : true,
"advisor" : ["Luke Ziarek"],
"link" : "https://about.me/darshanabalakrishnan",
"pic" : {
"file" : "https://aboutme.imgix.net/background/users/d/a/r/darshanabalakrishnan_1502635565_956.jpg?q=40&dpr=2&auto=format&fit=max&w=620&h=413.3333333333333&rect=200,319,500,640",
"width" : 200,
"height" : 256
}
},
"Olivia Alphonce" : {
"status" : "BS",
"projects" : ["mimir"],
"ubit" : "oalphonc"
},
"Carl Nuessle" : {
"status" : "PhD",
"projects" : ["pocketdata"],
"joint_advisor" : true,
"advisor" : ["Luke Ziarek"],
"pic" : {
"file" : "http://odin.cse.buffalo.edu/assets/people/carl.jpg",
"width" : 200,
"height" : 200
}
}
},
"temp-leave" : {
"Michael Kulbacki" : {
"status" : "BS",
"projects" : ["mimir"]
"Jon Logan" : {
"status" : "PhD",
"projects" : ["mimir"],
"email" : "jonathan.logan@cubrc.org",
"ubit" : "jmlogan",
"joint_advisor" : true,
"advisor" : ["Moises Sudit"]
},
"Lakshmi Prasanna Ethiraj" : {
"status" : "BS",
@ -137,7 +187,8 @@
"company" : "Microsoft",
"projects" : ["astral"],
"joint_advisor" : true,
"advisor" : ["Lukasz Ziarek"]
"advisor" : ["Lukasz Ziarek"],
"status_updated" : 2017
},
"Sumit Agarwal" : {
"degree" : "BS, MS",
@ -146,7 +197,8 @@
"position" : "Developer",
"projects" : ["astral"],
"joint_advisor" : true,
"advisor" : ["Lukasz Ziarek"]
"advisor" : ["Lukasz Ziarek"],
"status_updated" : 2014
},
"Daniel Bellinger" : {
"company" : "Global Foundries",
@ -155,25 +207,29 @@
"year" : 2014,
"projects" : ["astral"],
"joint_advisor" : true,
"advisor" : ["Lukasz Ziarek"]
"advisor" : ["Lukasz Ziarek"],
"status_updated" : 2014
},
"Palaniappan Meiyappan" : {
"company" : "NetSuite",
"position" : "Software Engineer",
"degree" : "MS",
"year" : 2014
"year" : 2014,
"status_updated" : 2014
},
"Nikhil Kataria" : {
"degree" : "MS",
"year" : 2014,
"position" : "Performance Engineer",
"company" : "Salesforce"
"company" : "Salesforce",
"status_updated" : 2014
},
"Saket Adusumilli" : {
"degree" : "MS",
"year" : 2015,
"position" : "Software Engineer",
"company" : "Amazon Web Services"
"company" : "Amazon Web Services",
"status_updated" : 2015
},
"Thomas Mitchell" : {
"degree" : "BS",
@ -182,7 +238,8 @@
"company" : "Stark and Wayne",
"year" : 2015,
"joint_advisor" : true,
"advisor" : ["Varun Chandola", "Shambhu Upadhyaya"]
"advisor" : ["Varun Chandola", "Shambhu Upadhyaya"],
"status_updated" : 2016
},
"Vinayak Karuppasamy" : {
"degree" : "MS",
@ -190,7 +247,8 @@
"ubit" : "vinayakk",
"position" : "Software Engineer",
"company" : "Bloomberg",
"year" : 2015
"year" : 2015,
"status_updated" : 2015
},
"Arindam Nandi" : {
"degree" : "MS",
@ -199,7 +257,8 @@
"link" : "http://legacy25.github.io/about.html",
"position" : "Software Engineer",
"company" : "HPE/Vertica",
"year" : 2016
"year" : 2016,
"status_updated" : 2017
},
"Niccolò Meneghetti" : {
"degree" : "PhD",
@ -210,7 +269,8 @@
"position" : "Software Engineer",
"company" : "HPE/Vertica",
"joint_advisor" : true,
"year" : 2016
"year" : 2016,
"status_updated" : 2017
},
"Ying Yang" : {
"degree" : "PhD",
@ -219,15 +279,37 @@
"ubit" : "yyang25",
"year" : 2017,
"position" : "Software Engineer",
"company" : "Oracle"
"company" : "Oracle",
"status_updated" : 2017
},
"Patrick Coonan" : {
"status" : "BS/MS",
"degree" : "BS/MS",
"projects" : ["insider-threats"],
"ubit" : "pcoonan",
"year" : 2017,
"position" : "Software Engineer",
"company" : "Roswell Park Cancer Institute"
"company" : "Roswell Park Cancer Institute",
"status_updated" : 2017
},
"Grant Wrazen" : {
"degree" : "BS",
"year" : 2017,
"projects" : ["pocketdata"],
"advisor" : ["Luke Ziarek", "Geoff Challen"],
"joint_advisor": true,
"position" : "Software Engineer",
"company" : "ACV Auctions",
"status_updated" : 2017
},
"Hank Lin" : {
"degree" : "BS",
"year" : 2017,
"projects" : ["astral"],
"advisor" : ["Luke Ziarek"],
"joint_advisor": true,
"position" : "Software Engineer",
"company" : "FactSet",
"status_updated" : 2017
}
}
}

View File

@ -1,4 +1,14 @@
[
{
"title" : "Interactive Data Management and Data Analysis",
"authors" : ["Ying Yang"],
"venue" : "UB-PhD",
"year" : 2017,
"projects" : ["mimir"],
"urls" : {
"thesis" : "https://search.proquest.com/docview/1925945636"
}
},
{
"title" : "Beta Probabilistic Databases: A Scalable Approach to Belief Updating and Parameter Learning",
"authors" : [
@ -15,7 +25,12 @@
"video" : "https://youtu.be/CbTaDRdTG7s?t=2m56s",
"slides" : "http://odin.cse.buffalo.edu/slides/conference/2017-Niccolo-BetaPDBs.pptx",
"poster" : "http://odin.cse.buffalo.edu/slides/conference/2017-Niccolo-BetaPDBs-poster.ppt"
}
},
"grants_cited" : [
{ "agency" : "NSF: CISE: SaTC", "agency_id" : "1409551" },
{ "agency" : "Oracle University Relations", "start_date" : "05/2016", "project_name" : "Intuitive Data Interpretation" },
{ "agency" : "The US Naval Postgraduate School", "agency_id" : "N00244-16-1-0022" }
]
},
{
"title" : "Small Data (Panel Discussion)",
@ -66,8 +81,9 @@
"slides" : "http://odin.cse.buffalo.edu/slides/talks/2017-1-EDBT-Inference/"
},
"grants_cited" : [
{ "agency" : "NSF: CISE: SaTC", "agency_id" : "1409551" },
{ "agency" : "NSF: ACI: DIBBS", "agency_id" : "1640864" },
{ "agency" : "Oracle University Relations", "start_date" : "05/2016" }
{ "agency" : "Oracle University Relations", "start_date" : "05/2016", "project_name" : "Intuitive Data Interpretation" }
]
},
{
@ -87,7 +103,7 @@
},
"grants_cited" : [
{ "agency" : "NSF: ACI: DIBBS", "agency_id" : "1640864" },
{ "agency" : "Oracle University Relations", "start_date" : "05/2016" },
{ "agency" : "Oracle University Relations", "start_date" : "05/2016", "project_name" : "Intuitive Data Interpretation" },
{ "agency" : "The US Naval Postgraduate School", "agency_id" : "N00244-16-1-0022" }
]
},
@ -142,7 +158,7 @@
},
"grants_cited" : [
{ "agency" : "NSF: ACI: DIBBS", "agency_id" : "1640864" },
{ "agency" : "Oracle University Relations", "start_date" : "05/2016" },
{ "agency" : "Oracle University Relations", "start_date" : "05/2016", "project_name" : "Intuitive Data Interpretation" },
{ "agency" : "The US Naval Postgraduate School", "agency_id" : "N00244-16-1-0022" }
]
},
@ -160,7 +176,7 @@
"slides" : "http://odin.cse.buffalo.edu/slides/talks/2016-4-HILDA/"
},
"grants_cited" : [
{ "agency" : "Oracle University Relations", "start_date" : "05/2016" }
{ "agency" : "Oracle University Relations", "start_date" : "05/2016", "project_name" : "Intuitive Data Interpretation" }
]
},
{ "title": "Provenance-aware Versioned Dataworkspaces",

View File

@ -111,7 +111,7 @@ class CV < Latex::Builder
textbf { huge; puts @title }
hfill
textbf { huge; puts @data["name"] }
vspace "0.05in"
endl "0.05in"
hrule
end
@ -378,7 +378,7 @@ class CV < Latex::Builder
noindent
tabular("rl") do
tabular("p{0.15\\textwidth}p{0.8\\textwidth}") do
# For each field being displayed
fields_to_display.each do |k|
@ -435,7 +435,7 @@ class CV < Latex::Builder
else # case k
puts r[k]
end # case k
endl
endl("0.02in")
end # unless r[k].nil?
end # fields_to_display.each
end # block("tabular") (field table)
@ -743,9 +743,10 @@ class CV < Latex::Builder
case m
when "sitevisits" then "#{safe_text v} unique website visitors"
when "downloads" then "#{safe_text v} unique downloads"
when "github" then v.to_a.map { |gh_m, gh_v| "#{gh_v.to_i} GitHub #{gh_m}" }
else puts "#{safe_text v} #{safe_text m}"
end
end
end.flatten
case metrics.length
when 0 then raise "Empty Metrics Field for #{r["name"]}"

View File

@ -256,4 +256,28 @@ module LabMetadata
return { "name" => name }.merge(info)
end
def LabMetadata.format_pub_for_report(pub)
pub = LabMetadata::complete_venue(pub)
pub =
case pub["type"]
when "conference", "journal", "workshop", "abstract" then
[ pub["title"],
pub["authors"].join(", "),
"#{LabMetadata::venue_name(pub, size: :full_parens)} #{pub["year"]}",
pub["urls"].map { |k,v| "#{k.capitalize}: #{v}" },
LabMetadata.bibtex_for(pub).split("\n")
].flatten
when "thesis" then
[ pub["title"],
pub["authors"].join(", "),
"Thesis #{pub["degree"]} #{pub["year"]}",
pub.fetch("urls", []).map { |k,v| "#{k.capitalize}: #{v}" },
LabMetadata.bibtex_for(pub).split("\n")
].flatten
else nil
end
pub = "> " + pub.join("\n ") unless pub.nil?
return pub
end
end

View File

@ -4,9 +4,27 @@ title: Members and Affiliates
<div class="person_list">
<h1>Lab Members</h1>
<% $db["lab/members"].each do |member, data| %>
<div class="person">
<div class="person" style="vertical-align: middle;">
<%
pic = data.fetch("pic", { "file" => "../assets/people/missing.png", "width" => 275, "height" => 438 })
pic_scale = 50.0 / [ pic["width"].to_i, pic["height"].to_i ].max
padding_width = [ (50.0 - pic["width"].to_i * pic_scale) / 2, 0 ].max
padding_height = [ (50.0 - pic["height"].to_i * pic_scale) / 2, 0 ].max
%>
<img
src="<%= pic["file"] %>"
width="<%= pic["width"].to_i * pic_scale %>"
height="<%= pic["height"].to_i * pic_scale %>"
style="
margin-left: <%= padding_width %>px;
margin-right: <%= padding_width + 20 %>px;
margin-top: <%= padding_height %>px;
margin-bottom: <%= padding_height %>px;
"
/>
<a href="<%= if data["link_relative"] then File.join(root_path, data["link"]) else data["link"] end %>"><%= member %></a>
<span class="status">
<span class="status" style="padding-top: 15px;">
<%= data["status"] %>
<% if data.has_key? "advisor" %>;
<%= if data.has_key? "joint_advisor" then "Jointly " else "" end %>

View File

@ -1,14 +1,5 @@
---
title: Oliver Kennedy
github: okennedy
twitter: xthemage
scholar: 9Q9tiCsAAAAJ
dblp: hd/k/Kennedy:Oliver
cv: http://odin.cse.buffalo.edu/artifacts/okennedy.pdf
pic:
file: ../assets/people/oliver.jpg
width: 230
height: 346
---
Oliver Kennedy is an Assistant Professor at the University at Buffalo. Oliver's primary area of research is Databases, although his research interests frequently cross over into Programming Languages and Datastructures. His work focuses on self-service analytics, making messy data, schema design, and physical layout decisions more approachable. Through real-world usage metrics gathered from industry collaborations and the use of real-world testbeds, Oliver's work aims to address the practical problems faced by data consumers everywhere. His projects include a UI for ad-hoc cleaning and analytics called <a href="http://www.mimirdb.info">Mimir</a> and a universal, <a href="/research/astral/index.html">Just In-Time Datastructure</a>.

View File

@ -0,0 +1,25 @@
<div id="disqus_thread"></div>
<script>
/**
* RECOMMENDED CONFIGURATION VARIABLES: EDIT AND UNCOMMENT THE SECTION BELOW TO INSERT DYNAMIC VALUES FROM YOUR PLATFORM OR CMS.
* LEARN WHY DEFINING THESE VARIABLES IS IMPORTANT: https://disqus.com/admin/universalcode/#configuration-variables*/
/*
var disqus_config = function () {
// Replace PAGE_URL with your page's canonical URL variable
this.page.url = http://odin.cse.buffalo.edu/teaching/cse-662/2017fa/disqus_test.html;
// Replace PAGE_IDENTIFIER with your page's unique identifier variable
this.page.identifier = "cse662.2017fa.test;
};
*/
(function() { // DON'T EDIT BELOW THIS LINE
var d = document, s = d.createElement('script');
s.src = 'https://ubodin.disqus.com/embed.js';
s.setAttribute('data-timestamp', +new Date());
(d.head || d.body).appendChild(s);
})();
</script>
<noscript>Please enable JavaScript to view the <a href="https://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>

View File

@ -0,0 +1,215 @@
---
title: CSE 662 - Languages and Runtimes for Big Data - Fall 2016
---
# CSE 662 - Fall 2017
Addressing the challenges of big data requires a combination of human intuition
and automation. Rather than tackling these challenges head-on with
build-from-scratch solutions, or through general-purpose database systems,
developer and analyst communities are turning to building blocks: Specialized
languages, runtimes, data-structures, services, compilers, and frameworks that
simplify the task of creating a systems that are powerful enough to handle
terabytes of data or more or efficient enough to run on your smartphone. In
this class, we will explore these fundamental building blocks and how they
relate to the constraints imposed by workloads and the platforms they run on.
Coursework consists of lectures and a multi-stage final project. Students are
expected to attend all lectures. Projects may be performed individually or in
groups. Projects will be evaluated in three stages through code deliverables,
reports, and group meetings with either or both of the instructors. During
these meetings, instructors will question the entire group extensively about
the group's report, deliverables, and any related tools and technology.
1. At initial stage, students are expected to demonstrate a high level of
proficiency with the tools, techniques, data structures, algorithms and
source code that will form the basis of their project. The group is expected
to submit and defend a roughly 5-page report surveying the space in which
their project will be performed. This report and presentation constitute 15%
of the final grade.
2. At the second stage, students are expected to provide a detailed design for
their final project. A roughly 5-page report should outline the groups
proposed design, any algorithms or data structures being introduced, as well
as a strategy for evaluating the resulting project against the current state
of the art. This report and presentation constitute 35% of the final grade.
3. At the final stage, students are expected to provide a roughly 5-page report
detailing their project, any algorithms or data structures developed, and
evaluating their project against any comparable state of the art systems and
techniques. Groups will also be expected to demon- strate their project and
present their findings in-class, or in a meeting with both instructors if
necessitated by time constraints. This report and presentation constitute
50% of the final grade.
-----
## Information
* **Instructors**
* Oliver Kennedy (Davis 338H; Office Hours Weds 1:00-3:00)
* **Time**: MWF 12:00-12:50
* **Location**: Knox 04
------
## Course Objectives
After the taking the course, students should be able to:
* **Design domain specific query languages**, by first developing an understanding
the common tropes of a target domain, exploring ways of allowing users to
efficiently express those tropes, and developing ways of mapping the resulting
programs to an efficient evaluation strategy.
* **Identify concurrency challenges in data-intensive computing tasks**, and
address them through locking, code associativity, and correctness analysis.
* **Understand a variety of index data structures**, as well as their application
and use in data management systems for high velocity, volume, veracity,
and/or variety data.
* **Understand query and program compilation techniques**, including the design of
intermediate representations, subexpression equivalence, cost estimation, and
the construction of target-representation code.
-----
## Course Schedule
* **Aug. 28** : Introduction ([overview](2017-08-28-Introduction.html))
* **Aug. 30** : Project Seeds - Mimir
* **Sept. 01** : Project Seeds - JITDs &amp; PocketData
* **Sept. 04** : Database Cracking ( [Cracking](http://stratos.seas.harvard.edu/files/IKM_CIDR07.pdf) )
* **Sept. 06** : Functional Data Structures
* **Sept. 12** : Just-in-Time Data Structures ( [JITDs])(http://odin.cse.buffalo.edu/papers/2015/CIDR-jitd-final.pdf) )
* **Sept. 8** : Incomplete Databases 1
* **Sept. 11** : Incomplete Databases 2
* **Sept. 13** : Incomplete Databases 3
* **Sept. 15** : Mimir ( [Mimir](http://odin.cse.buffalo.edu/papers/2015/VLDB-lenses-final.pdf) )
* **Sept. 18** : MayBMS ( [MayBMS](http://maybms.sourceforge.net/download/INFOSYS-TR-2007-2.pdf) )
* **Sept. 20** : Sampling From Probabilistic Queries ( [MCDB](http://dl.acm.org/citation.cfm?id=1376686) )
* **Sept. 22** : Probabilistic Constraint Repair ( [Sampling from Repairs](https://cs.uwaterloo.ca/~ilyas/papers/BeskalesVLDBJ2014.pdf) )
* **Sept. 25** : R-Trees and Multidimensional Indexing
* **Sept. 27 - Sept. 29** : Student Project Presentations
* **Oct. 2** : BloomL ( [Bloom/Bud](http://cidrdb.org/cidr2011/Papers/CIDR11_Paper35.pdf), [BloomL](http://dl.acm.org/citation.cfm?id=2391230) )
* **Oct. 4 - Oct. 6** : *Oliver Away* (Content TBD)
* **Oct. 9** : NoDB ( [NoDB](http://www.vldb.org/pvldb/vol7/p1119-karpathiotakis.pdf) )
* **Oct. 11 - Oct. 13** : Student Project Presentations
* **Oct. 16** : Lazy Transactions ( [Stickies](http://dl.acm.org/citation.cfm?id=2610529) )
* **Oct. 18** : Streaming ( [Cayuga](http://www.cs.cornell.edu/johannes/papers/2007/2007-CIDR-Cayuga.pdf) )
* **Oct. 20** : Scan Sharing ( [Crescando](http://dl.acm.org/citation.cfm?id=1807326) )
* **Oct. 23 - Oct. 27** : Checkpoint 2 Reviews
* **Oct. 30** : Declarative Games ( [SGL](https://infoscience.epfl.ch/record/166858/files/31-sigmod2007_games.pdf) )
* **Nov. 1 - Nov. 3** : Student Project Presentations
* **Nov. 6 - Nov. 10** : *Oliver Away* (Content TBD)
* **Nov. 13** : *Buffer*
* **Nov. 15 - Nov. 17** : Student Project Presentations
* **Nov. 20** : *Buffer*
* **Nov. 22 - Nov. 24** : Student Project Presentations
* **Nov. 27** : *Buffer*
* **Nov. 29 - Dec. 1** : Student Project Presentations
* **Dec. 4 - Dec. 8** : Checkpoint 3 Reviews
-----
## Project Seeds
#### Deferred Constraint-Based Data Validation
There are a number of reasons that data might go bad: sensor errors, data entry errors, lag spikes, filesystem corruption, and more. One thing is certain though: you don't want to make decisions based on bad data. What people will often do is do basic sanity checks. For example, if we have a record of someone checking _out_ of a hospital, we should have a record of them checking _in_ at some point earlier. Declaring these sanity checks is hard, but fixing violations is even harder. In this project, you will explore ways to safely defer repairing the data. Challenges include:
1. Deciding what types of sanity constraints you want to be able to support.
2. Interfacing with an existing database (Spark, SQLite, or Oracle) to identify sets of tuples that come together to violate a sanity constraint.
3. Using Mimir to warning users when a query result depends on a tuple that participates in a violation
4. Suggesting and ranking modifications that repair violations
###### Background material:
* [Sampling from Repairs](https://cs.uwaterloo.ca/~ilyas/papers/BeskalesVLDBJ2014.pdf)
* [Qualitative Data Cleaning](http://dl.acm.org/citation.cfm?id=3007320)
* [Mimir Website](http://mimirdb.info)
* [Mimir on GitHub](https://github.com/UBOdin/mimir)
* [Mimir Concepts](https://github.com/ubodin/mimir/wiki/Concepts)
#### Query Sampling Optimizer
Most probabilistic database systems aim to produce all possible results. A few, most notably [MCDB](http://dl.acm.org/citation.cfm?id=1376686), instead generate samples of possible results. The basic idea is to split the database into a fixed number (N) of _possible worlds_, and run the query on all N possible worlds in parallel. There are actually a few different ways to do this. Three relatively common examples include:
* **Naive**: Literally run N copies of the query and union the results at the end.
* **Interleave**: Tag each tuple with the possible world that it comes from, and then just run one query. Make sure the query ensures that tuples from different possible worlds can't interact (i.e., Joins always happen between tuples from the same world and the world becomes another group-by column)
* **Tuple Bundle**: Create mega-tuples, that represent alternative versions of the same tuple in different possible worlds. If an attribute value is the same in all possible worlds store only one copy of it. (See [MCDB](http://dl.acm.org/citation.cfm?id=1376686))
Perhaps counterintuitively, our preliminary implementations of the [Interleave](https://github.com/UBOdin/mimir/blob/master/src/main/scala/mimir/exec/mode/SampleRows.scala) and [Tuple Bundle](https://github.com/UBOdin/mimir/blob/master/src/main/scala/mimir/exec/mode/TupleBundle.scala) algorithms suggest that none of these approaches will be the best in all cases. For example, in a simple select-aggregate query, tuple-bundles are the most efficient. Conversely, if you're joining on an attribute with different values in each possible world, interleave will be faster. We suspect that there are some cases where Naive will win out as well. The aim of this project is to implement a query optimizer for sampling-based probabilistic database queries. If I hand you a query, you tell me which strategy is fastest for that query. As an optional extension, you may be able to interleave different strategies, each evaluating a different part of the query.
###### Background material:
* [MCDB](http://dl.acm.org/citation.cfm?id=1376686)
* [BlinkDB](http://blinkdb.org/)
* [Mimir Website](http://mimirdb.info)
* [Mimir on GitHub](https://github.com/UBOdin/mimir)
* [Probabilistic Databases](https://github.com/ubodin/mimir/wiki/Concepts-CTables)
#### Explaining Offset-Outliers
When looking at queries, a common question is "why is this result the way it is?" While a broad question, database researchers have been hard at work isolating and addressing specific cases. For this particular project, we'd like to explore one specific category of explanation, where users have provided us with points of stability: Group-by aggregates that are supposed to remain stable over time. For example, consider
```
SELECT author, STDDEV(cnt) FROM (
SELECT author, year, COUNT(*) AS cnt FROM Publications
);
```
This query gives the variation per user in terms of number of publications per year. We might use a query like this to define a constraint that says "For any author, the number of publications per year stays roughly constant". Constraints like this help can us to explain aggregate values. For example let's say you run the following query and the result is lower than expected.
```
SELECT COUNT(*) FROM Publications WHERE author = 'Alice' AND venue = 'ICDE' AND year = 2017;
```
If you ask "Why is this result so low", the system can look at the above constraint and figure out that there's another aggregate query that is higher than usual (to preserve the stability of the publications/year constraint defined above)
```
SELECT COUNT(*) FROM Publications WHERE author = 'Alice' AND venue = 'ICDE' AND year = 2017;
```
The aim of this project would be to implement a simple frontend to an existing database system (Spark, SQLite, or Oracle) that accepts a set of constrants and answers questions like this. This project is part of ongoing joint work with Boris Glavic and Sudeepa Roy.
###### Background material:
* [Causality and Explanations in Databases](https://users.cs.duke.edu/~sudeepa/vldb2014-Tutorial-causality-explanations.pdf)
* [DBExplain](https://cudbg.github.io/lab/dbexplain)
* [Scorpion](http://sirrice.github.io/files/papers/scorpion-vldb13.pdf)
#### Adaptive Multidimensional Indexing
(Summary In Progress)
#### Mimir on SparkSQL
(Summary In Progress)
#### Garbage Collection in Embedded Databases
(Summary In Progress)
-----
## Academic Content
The course will involve lectures and readings drawn from an assortment of
academic papers selected by the instructors. There is no textbook for the
course.
-----
## Academic Integrity
Students may discuss and advise one another on their lab projects, but groups
are expected to turn in their own work. Cheating on any course deliverable will
result in automatic failure of the course. The University policy on academic
integrity can be reviewed at:
http://academicintegrity.buffalo.edu/policies/index.php
-----
## Accessibility Resources
If you have a diagnosed disability (physical, learning, or psychological) that
will make it difficult for you to carry out the course work as outlined, or
that requires accommodations such as recruiting note-takers, readers, or
extended time on exams or assignments, please advise the instructor during the
first two weeks of the course so that we may review possible arrangements for
reasonable accommodations. In addition, if you have not yet done so, contact
the Office of Accessibility Resources (formerly the Office of Disability
Services).

View File

@ -0,0 +1,6 @@
---
redirect: 2017sp/index.html
title: CSE 662 - Languages and Runtimes for Big Data
---
Redirecting you <a href="2017fa/index.html">here</a>...

View File

@ -2,15 +2,12 @@
title: Courses
---
<h2>Database Seminar</h2>
<ul>
<li><a href="<%=root_path("seminar/index.html")%>">Spring 2016</a></li>
<li><a href="<%=root_path("seminar/2015fa.html")%>">Fall 2015</a></li>
</ul>
<h2>CSE 4/562 - Database Systems</h2>
<p>
<b>Database Systems</b> teaches the inner workings of data management systems. Focus areas include organizational data structures (physical layouts, indexes, materialized views), data processing algorithms (join, sort), query optimization (relational algebra equivalences, query planning, cost modeling), transactional semantics (X-serializability, locking, OCC, MVCC), and recovery (WAL, Undo Logging, ARIES). The course involves a term-long project where students build a qyery processing system.
</p>
<ul>
<li><a href="cse-562/2017sp/index.html">Spring 2017</a> (Graduate Only)</li>
<li><a href="cse-462/index.html">Spring 2016</a> (Undergraduate Only)</li>
@ -19,17 +16,36 @@ title: Courses
<li><a href="https://piazza.com/buffalo/spring2013/cse562/home">Spring 2013</a> (Graduate Only)</li>
</ul>
<h2>CSE 662 - Langs &amp; Runtimes for Big Data</h2>
<h2>CSE 662 - Languages &amp; Runtimes for Big Data</h2>
<p>
<b>Languages &amp; Runtimes for Big Data</b> is a project based course exploring topics at the intersection of Data Management and Programming languages. Focus areas include indexing, databases on new hardware, uncertain data management, and concurrency. In addition to reading papers from ongoing research in these areas, students are expected to complete a term-long project based on one of several seed ideas provided by the instructors.
</p>
<ul>
<li><a href="cse-662/index.html">Fall 2016</a></li>
<li><a href="cse-662/2017fa/index.html">Fall 2017</a></li>
<li><a href="cse-662/2016fa/index.html">Fall 2016</a></li>
<li><a href="https://piazza.com/buffalo/fall2015/cse662/home">Fall 2015</a></li>
</ul>
<h2>UBDB Seminar</h2>
<p>We run a yearly seminar with invited speakers from the database community each spring.</p>
<ul>
<li><a href="<%=root_path("seminar/index.html")%>">UBDB 2016</a></li>
<li><a href="<%=root_path("seminar/2015fa.html")%>">UBDB 2015</a></li>
</ul>
<h2>CSE 704 - Seminar</h2>
<p>
Seminar topics vary from term to term. At present, no further official seminars will be held.
</p>
<ul>
<li><a href="https://piazza.com/buffalo/fall2014/cse704/home">Fall 2014 - DB, PL, and Data Structures</a></li>
<li><a href="https://piazza.com/buffalo/fall2013/cse704/home">Fall 2013 - Streaming, Incrementa, and Online Data Processing</a></li>
<li><a href="http://www.cse.buffalo.edu/~okennedy/courses/cse704fa2012.html">Fall 2012 - Web-Scale Data Management</a></li>
</ul>
</ul>

View File

@ -11,6 +11,14 @@ name = if defined? name then name else "A Mysterious Stranger" end
<% end %>
<% end %>
<h1><%= name %></h1>
<% if defined? status %>
<h5><%=
case status
when "PhD", "MS", "BS" then status + " student"
else status
end
%></h5>
<% end %>
<div class="links">
<% if defined? github %><a href="http://github.com/<%=github%>">GitHub</a><% end %>
@ -22,7 +30,10 @@ name = if defined? name then name else "A Mysterious Stranger" end
<%= body %>
<h2>Publications</h2>
<%= LabMetadata::render_pubs(LabMetadata::publications_for(name)) %>
<% pubs = LabMetadata::publications_for(name) %>
<% unless pubs.empty? %>
<h2>Publications</h2>
<%= LabMetadata::render_pubs(pubs) %>
<% end %>
</div>