Updated justification
commit
2ee6edd726
|
@ -1,5 +1,7 @@
|
|||
ABSTRACT
|
||||
/.DS_Store
|
||||
/badwords
|
||||
/biosketches
|
||||
/shortproposal.pdf
|
||||
/fullproposal.pdf
|
||||
/.xxxnote
|
||||
|
@ -16,7 +18,9 @@ ABSTRACT
|
|||
*.fls
|
||||
*.log
|
||||
*.synctex.gz
|
||||
*.fdb_latexmk
|
||||
.deps
|
||||
.~lock*
|
||||
Bios/*.pdf
|
||||
/fullproposal.fdb_latexmk
|
||||
/fullproposal-body.pdf
|
||||
/fullproposal-refs.pdf
|
||||
|
|
Binary file not shown.
15
Makefile
15
Makefile
|
@ -1,16 +1,21 @@
|
|||
TEX_FILES=fullproposal.tex main.bib geoffreychallen.bib $(wildcard sections/*)
|
||||
TARGET=fullproposal
|
||||
TEX_FILES=$(TARGET).tex main.bib geoffreychallen.bib $(wildcard sections/*)
|
||||
|
||||
|
||||
all: fullproposal.pdf todo
|
||||
@if [ `uname` = "Darwin" ] ; then open fullproposal.pdf; fi
|
||||
@if [ `uname` = "Darwin" ] ; then open $(TARGET).pdf; fi
|
||||
|
||||
graphs:
|
||||
@cd graphs; rake
|
||||
|
||||
fullproposal.pdf: $(TEX_FILES)
|
||||
latexmk -pdf fullproposal.tex
|
||||
$(TARGET).pdf: $(TEX_FILES)
|
||||
latexmk -pdf $(TARGET).tex
|
||||
|
||||
open: fullproposal.pdf todo
|
||||
open: $(TARGET).pdf todo
|
||||
open $<
|
||||
|
||||
split: $(TARGET).pdf SplitProposal.workflow
|
||||
automator -i `pwd`/$(TARGET).pdf SplitProposal.workflow 2>&1 | grep -v "Type1 font data"
|
||||
|
||||
clean:
|
||||
latexmk -CA -bibtex
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>CFBundleName</key>
|
||||
<string>SplitProposal</string>
|
||||
</dict>
|
||||
</plist>
|
Binary file not shown.
After Width: | Height: | Size: 171 KiB |
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,35 @@
|
|||
Sharad Agarwal (Microsoft Research)
|
||||
Nilanjan Banerjee (University of Maryland)
|
||||
Milind Buddhikot (Bell Labs)
|
||||
Yih-Farn Chen (AT\&T Labs Research)
|
||||
Murat Demirbas (University at Buffalo)
|
||||
Prabal Dutta (University of Michigan)
|
||||
Wen Dong (University at Buffalo)
|
||||
Carla Schlatter Ellis (Duke University)
|
||||
Shyamnath Gollakota (University of Washington)
|
||||
Michelle Gong (Google)
|
||||
Marco Gruteser (Rutgers University)
|
||||
Mark Hempstead (Drexel University)
|
||||
Oliver Kennedy (University at Buffalo)
|
||||
Robin Kravets (University of Illinois, Urbana-Champaign
|
||||
Steven Y. Ko (University at Buffalo)
|
||||
Tevfik Kosar (University at Buffalo)
|
||||
Dimitrios Koutsonikolas (University at Buffalo)
|
||||
Branislav Kusy (CSIRO)
|
||||
Eyal de Lara (University of Toronto)
|
||||
James Martin (Clemson University)
|
||||
Tommaso Melodia (Northeastern University)
|
||||
Emiliano Miluzzo (Apio Systems)
|
||||
Iqbal Mohomed (IBM Research)
|
||||
James Pepin (Clemson University)
|
||||
Matthai Philipose (Microsoft Research)
|
||||
Sami Rollins (University of San Francisco)
|
||||
Margo Seltzer (Harvard University)
|
||||
Ivan Seskar (Rutgers University)
|
||||
Jacob Sorber (Clemson University)
|
||||
Aaron Striegel (Notre Dame)
|
||||
Khai N. Truong (University of Toronto)
|
||||
Chunming Qiao (University at Buffalo)
|
||||
Kuangching Wang (Clemson University)
|
||||
Lin Zhong (Rice University)
|
||||
Lukasz Ziarek (University at Buffalo)
|
|
@ -0,0 +1,23 @@
|
|||
Umut Acar; CMU
|
||||
Ali-Reza Adl-Tabatabai; Intel
|
||||
Ethan Blanton; Fiji Systems Inc.
|
||||
Patrick Eugster; Purdue University
|
||||
Mathew Fluet; Rochester
|
||||
Christoph Hoffman; Purdue University
|
||||
Anthony Hosking; Purdue University
|
||||
Suresh Jagannathan; Purdue University
|
||||
Bharat Jayaraman; University at Buffalo
|
||||
Oliver Kennedy; University at Buffalo
|
||||
Steve Ko; University at Buffalo
|
||||
Sree Harsha Konduri; Amazon
|
||||
Amit Kulkarni; University at Buffalo
|
||||
Zihuan Li; Purdue University
|
||||
Vijay Menon; Google
|
||||
Filip Pizlo; Apple Inc.
|
||||
Jennifer Sartor; Ghent University
|
||||
Tatiana Shpeisman; Intel
|
||||
KC Sivaramakrishnan; Purdue University
|
||||
Sam Tobin-Hochstadt; Indiana University
|
||||
Jan Vitek; Purdue University, Fiji Systems Inc.
|
||||
Adam Welc; Oracle
|
||||
Yin Yan; University at Buffalo
|
|
@ -1,9 +0,0 @@
|
|||
*-separate.pdf
|
||||
.xxxnote
|
||||
*.swp
|
||||
*.aux
|
||||
*.log
|
||||
*.out
|
||||
*.bbl
|
||||
*.blg
|
||||
.deps
|
Binary file not shown.
|
@ -68,6 +68,8 @@ Lukasz Ziarek (Univ. of Buffalo, Dept. of Comp. Sci. and Eng.)}
|
|||
\input{sections/5-priorresults}
|
||||
|
||||
\pagebreak
|
||||
\setcounter{page}{1}
|
||||
|
||||
{
|
||||
\bibliographystyle{nsf}
|
||||
\bibliography{main,geoffreychallen}
|
||||
|
|
Binary file not shown.
265
main.bib
265
main.bib
|
@ -1,20 +1,254 @@
|
|||
%% This BibTeX bibliography file was created using BibDesk.
|
||||
%% http://bibdesk.sourceforge.net/
|
||||
|
||||
%% Created for Oliver Kennedy at 2016-01-18 23:35:08 -0500
|
||||
%% Created for Oliver Kennedy at 2016-01-19 22:40:45 -0500
|
||||
|
||||
|
||||
%% Saved with string encoding Unicode (UTF-8)
|
||||
|
||||
|
||||
|
||||
@incollection{ashok2015benchmarking,
|
||||
Author = {Joshi, Ashok and Nambiar, Raghunath and Brey, Michael},
|
||||
Booktitle = {Big Data Benchmarking},
|
||||
Date-Added = {2016-01-20 02:41:38 +0000},
|
||||
Date-Modified = {2016-01-20 02:41:38 +0000},
|
||||
Doi = {10.1007/978-3-319-20233-4_4},
|
||||
Editor = {Rabl, Tilmann and Sachs, Kai and Poess, Meikel and Baru, Chaitanya and Jacobson, Hans-Arno},
|
||||
Isbn = {978-3-319-20232-7},
|
||||
Language = {English},
|
||||
Pages = {29-36},
|
||||
Publisher = {Springer International Publishing},
|
||||
Series = {Lecture Notes in Computer Science},
|
||||
Title = {Benchmarking Internet of Things Solutions},
|
||||
Url = {http://dx.doi.org/10.1007/978-3-319-20233-4_4},
|
||||
Volume = {8991},
|
||||
Year = {2015},
|
||||
Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-319-20233-4_4}}
|
||||
|
||||
@inproceedings{Pizlo:2010:SFR:1806596.1806615,
|
||||
Acmid = {1806615},
|
||||
Address = {New York, NY, USA},
|
||||
Author = {Pizlo, Filip and Ziarek, Lukasz and Maj, Petr and Hosking, Antony L. and Blanton, Ethan and Vitek, Jan},
|
||||
Booktitle = {Proceedings of the 2010 ACM SIGPLAN Conference on Programming Language Design and Implementation},
|
||||
Date-Added = {2016-01-19 22:46:55 +0000},
|
||||
Date-Modified = {2016-01-19 22:46:55 +0000},
|
||||
Doi = {10.1145/1806596.1806615},
|
||||
Isbn = {978-1-4503-0019-3},
|
||||
Keywords = {fragmentation, mark-region, mark-sweep, real-time, replication-copying},
|
||||
Location = {Toronto, Ontario, Canada},
|
||||
Numpages = {14},
|
||||
Pages = {146--159},
|
||||
Publisher = {ACM},
|
||||
Series = {PLDI '10},
|
||||
Title = {Schism: Fragmentation-tolerant Real-time Garbage Collection},
|
||||
Url = {http://doi.acm.org/10.1145/1806596.1806615},
|
||||
Year = {2010},
|
||||
Bdsk-Url-1 = {http://doi.acm.org/10.1145/1806596.1806615},
|
||||
Bdsk-Url-2 = {http://dx.doi.org/10.1145/1806596.1806615}}
|
||||
|
||||
@misc{dbworld,
|
||||
Author = {{ACM SIGMOD}},
|
||||
Date-Added = {2016-01-19 04:34:31 +0000},
|
||||
Date-Modified = {2016-01-19 04:35:06 +0000},
|
||||
Date-Added = {2016-01-19 22:45:54 +0000},
|
||||
Date-Modified = {2016-01-19 22:45:54 +0000},
|
||||
Howpublished = {https://research.cs.wisc.edu/dbworld/},
|
||||
Title = {DBWorld}}
|
||||
|
||||
@inproceedings{nomadlog-sigcomm14,
|
||||
Acmid = {2626333},
|
||||
Address = {New York, NY, USA},
|
||||
Author = {Gao, Zhaoyu and Venkataramani, Arun and Kurose, James F. and Heimlicher, Simon},
|
||||
Booktitle = {Proceedings of the 2014 ACM Conference on SIGCOMM},
|
||||
Date-Added = {2016-01-19 22:44:45 +0000},
|
||||
Date-Modified = {2016-01-19 22:44:45 +0000},
|
||||
Doi = {10.1145/2619239.2626333},
|
||||
Isbn = {978-1-4503-2836-4},
|
||||
Keywords = {location-independence, mobility, network architecture},
|
||||
Location = {Chicago, Illinois, USA},
|
||||
Numpages = {12},
|
||||
Pages = {259--270},
|
||||
Publisher = {ACM},
|
||||
Series = {SIGCOMM '14},
|
||||
Title = {Towards a Quantitative Comparison of Location-independent Network Architectures},
|
||||
Url = {http://doi.acm.org/10.1145/2619239.2626333},
|
||||
Year = {2014},
|
||||
Bdsk-Url-1 = {http://doi.acm.org/10.1145/2619239.2626333},
|
||||
Bdsk-Url-2 = {http://dx.doi.org/10.1145/2619239.2626333}}
|
||||
|
||||
@inproceedings{locking-chi2016,
|
||||
Author = {Marian Harbach and Alexander De Luca and Serge Egelman},
|
||||
Booktitle = {Proceedings of the 2016 ACM Conference on Human Factors in Computing Systems (CHI'2016)},
|
||||
Date-Added = {2016-01-19 22:44:45 +0000},
|
||||
Date-Modified = {2016-01-19 22:44:45 +0000},
|
||||
Title = {The Anatomy of Smartphone Unlocking: A Field Study of Android Lock Screens},
|
||||
Year = {2016}}
|
||||
|
||||
@article{Ziarek:2008:FTS:1466762.1466777,
|
||||
Acmid = {1466777},
|
||||
Address = {Hingham, MA, USA},
|
||||
Author = {Ziarek, Lukasz and Weeks, Stephen and Jagannathan, Suresh},
|
||||
Date-Added = {2016-01-19 22:42:22 +0000},
|
||||
Date-Modified = {2016-01-19 22:42:22 +0000},
|
||||
Doi = {10.1007/s10990-008-9035-3},
|
||||
Issn = {1388-3690},
|
||||
Issue_Date = {September 2008},
|
||||
Journal = {Higher Order Symbol. Comput.},
|
||||
Keywords = {Compilation, Flattening, Optimization, SSA, Tuples, Unboxing},
|
||||
Month = sep,
|
||||
Number = {3},
|
||||
Numpages = {26},
|
||||
Pages = {333--358},
|
||||
Publisher = {Kluwer Academic Publishers},
|
||||
Title = {Flattening Tuples in an SSA Intermediate Representation},
|
||||
Url = {http://dx.doi.org/10.1007/s10990-008-9035-3},
|
||||
Volume = {21},
|
||||
Year = {2008},
|
||||
Bdsk-Url-1 = {http://dx.doi.org/10.1007/s10990-008-9035-3}}
|
||||
|
||||
@inproceedings{Sivaramakrishnan:2012:ERB:2258996.2259005,
|
||||
Acmid = {2259005},
|
||||
Address = {New York, NY, USA},
|
||||
Author = {Sivaramakrishnan, KC and Ziarek, Lukasz and Jagannathan, Suresh},
|
||||
Booktitle = {Proceedings of the 2012 International Symposium on Memory Management},
|
||||
Date-Added = {2016-01-19 22:42:09 +0000},
|
||||
Date-Modified = {2016-01-19 22:42:09 +0000},
|
||||
Doi = {10.1145/2258996.2259005},
|
||||
Isbn = {978-1-4503-1350-6},
|
||||
Keywords = {barrier elimination, cleanliness, concurrent programming, functional languages, parallel and concurrent collection, private heaps},
|
||||
Location = {Beijing, China},
|
||||
Numpages = {12},
|
||||
Pages = {49--60},
|
||||
Publisher = {ACM},
|
||||
Series = {ISMM '12},
|
||||
Title = {Eliminating Read Barriers Through Procrastination and Cleanliness},
|
||||
Url = {http://doi.acm.org/10.1145/2258996.2259005},
|
||||
Year = {2012},
|
||||
Bdsk-Url-1 = {http://doi.acm.org/10.1145/2258996.2259005},
|
||||
Bdsk-Url-2 = {http://dx.doi.org/10.1145/2258996.2259005}}
|
||||
|
||||
@inproceedings{Pizlo:2010:HPE:1755913.1755922,
|
||||
Acmid = {1755922},
|
||||
Address = {New York, NY, USA},
|
||||
Author = {Pizlo, Filip and Ziarek, Lukasz and Blanton, Ethan and Maj, Petr and Vitek, Jan},
|
||||
Booktitle = {Proceedings of the 5th European Conference on Computer Systems},
|
||||
Date-Added = {2016-01-19 22:42:00 +0000},
|
||||
Date-Modified = {2016-01-19 22:42:00 +0000},
|
||||
Doi = {10.1145/1755913.1755922},
|
||||
Isbn = {978-1-60558-577-2},
|
||||
Keywords = {java virtual machine, memory management, real-time systems},
|
||||
Location = {Paris, France},
|
||||
Numpages = {14},
|
||||
Pages = {69--82},
|
||||
Publisher = {ACM},
|
||||
Series = {EuroSys '10},
|
||||
Title = {High-level Programming of Embedded Hard Real-time Devices},
|
||||
Url = {http://doi.acm.org/10.1145/1755913.1755922},
|
||||
Year = {2010},
|
||||
Bdsk-Url-1 = {http://doi.acm.org/10.1145/1755913.1755922},
|
||||
Bdsk-Url-2 = {http://dx.doi.org/10.1145/1755913.1755922}}
|
||||
|
||||
@inproceedings{Yan:2013:RDR:2512989.2512990,
|
||||
Acmid = {2512990},
|
||||
Address = {New York, NY, USA},
|
||||
Author = {Yan, Yin and Konduri, Sree Harsha and Kulkarni, Amit and Anand, Varun and Ko, Steven Y. and Ziarek, Lukasz},
|
||||
Booktitle = {Proceedings of the 11th International Workshop on Java Technologies for Real-time and Embedded Systems},
|
||||
Date-Added = {2016-01-19 22:41:50 +0000},
|
||||
Date-Modified = {2016-01-19 22:41:50 +0000},
|
||||
Doi = {10.1145/2512989.2512990},
|
||||
Isbn = {978-1-4503-2166-2},
|
||||
Location = {Karlsruhe, Germany},
|
||||
Numpages = {10},
|
||||
Pages = {98--107},
|
||||
Publisher = {ACM},
|
||||
Series = {JTRES '13},
|
||||
Title = {RTDroid: A Design for Real-time Android},
|
||||
Url = {http://doi.acm.org/10.1145/2512989.2512990},
|
||||
Year = {2013},
|
||||
Bdsk-Url-1 = {http://doi.acm.org/10.1145/2512989.2512990},
|
||||
Bdsk-Url-2 = {http://dx.doi.org/10.1145/2512989.2512990}}
|
||||
|
||||
@inproceedings{Blanton:2013:NIC:2512989.2512994,
|
||||
Acmid = {2512994},
|
||||
Address = {New York, NY, USA},
|
||||
Author = {Blanton, Ethan and Ziarek, Lukasz},
|
||||
Booktitle = {Proceedings of the 11th International Workshop on Java Technologies for Real-time and Embedded Systems},
|
||||
Date-Added = {2016-01-19 22:41:44 +0000},
|
||||
Date-Modified = {2016-01-19 22:41:44 +0000},
|
||||
Doi = {10.1145/2512989.2512994},
|
||||
Isbn = {978-1-4503-2166-2},
|
||||
Location = {Karlsruhe, Germany},
|
||||
Numpages = {10},
|
||||
Pages = {58--67},
|
||||
Publisher = {ACM},
|
||||
Series = {JTRES '13},
|
||||
Title = {Non-blocking Inter-partition Communication with Wait-free Pair Transactions},
|
||||
Url = {http://doi.acm.org/10.1145/2512989.2512994},
|
||||
Year = {2013},
|
||||
Bdsk-Url-1 = {http://doi.acm.org/10.1145/2512989.2512994},
|
||||
Bdsk-Url-2 = {http://dx.doi.org/10.1145/2512989.2512994}}
|
||||
|
||||
@inproceedings{Ziarek:2011:CAE:1993498.1993572,
|
||||
Acmid = {1993572},
|
||||
Address = {New York, NY, USA},
|
||||
Author = {Ziarek, Lukasz and Sivaramakrishnan, KC and Jagannathan, Suresh},
|
||||
Booktitle = {Proceedings of the 32Nd ACM SIGPLAN Conference on Programming Language Design and Implementation},
|
||||
Date-Added = {2016-01-19 22:41:35 +0000},
|
||||
Date-Modified = {2016-01-19 22:41:35 +0000},
|
||||
Doi = {10.1145/1993498.1993572},
|
||||
Isbn = {978-1-4503-0663-8},
|
||||
Keywords = {asynchrony, composability, concurrent ml, first-class events, message-passing},
|
||||
Location = {San Jose, California, USA},
|
||||
Numpages = {12},
|
||||
Pages = {628--639},
|
||||
Publisher = {ACM},
|
||||
Series = {PLDI '11},
|
||||
Title = {Composable Asynchronous Events},
|
||||
Url = {http://doi.acm.org/10.1145/1993498.1993572},
|
||||
Year = {2011},
|
||||
Bdsk-Url-1 = {http://doi.acm.org/10.1145/1993498.1993572},
|
||||
Bdsk-Url-2 = {http://dx.doi.org/10.1145/1993498.1993572}}
|
||||
|
||||
@article{Ziarek:2010:LCC:1852977.1852979,
|
||||
Acmid = {1852979},
|
||||
Address = {New York, NY, USA},
|
||||
Author = {Ziarek, Lukasz and Jagannathan, Suresh},
|
||||
Date-Added = {2016-01-19 22:41:35 +0000},
|
||||
Date-Modified = {2016-01-19 22:41:35 +0000},
|
||||
Doi = {10.1017/S0956796810000067},
|
||||
Issn = {0956-7968},
|
||||
Issue_Date = {March 2010},
|
||||
Journal = {J. Funct. Program.},
|
||||
Month = mar,
|
||||
Number = {2},
|
||||
Numpages = {37},
|
||||
Pages = {137--173},
|
||||
Publisher = {Cambridge University Press},
|
||||
Title = {Lightweight Checkpointing for Concurrent Ml},
|
||||
Url = {http://dx.doi.org/10.1017/S0956796810000067},
|
||||
Volume = {20},
|
||||
Year = {2010},
|
||||
Bdsk-Url-1 = {http://dx.doi.org/10.1017/S0956796810000067}}
|
||||
|
||||
@article{Yang:2015:LOA:2824032.2824055,
|
||||
Acmid = {2824055},
|
||||
Author = {Yang, Ying and Meneghetti, Niccol\`{o} and Fehling, Ronny and Liu, Zhen Hua and Kennedy, Oliver},
|
||||
Date-Added = {2016-01-19 22:40:55 +0000},
|
||||
Date-Modified = {2016-01-19 22:40:55 +0000},
|
||||
Doi = {10.14778/2824032.2824055},
|
||||
Issn = {2150-8097},
|
||||
Issue_Date = {August 2015},
|
||||
Journal = {Proc. VLDB Endow.},
|
||||
Month = aug,
|
||||
Number = {12},
|
||||
Numpages = {12},
|
||||
Pages = {1578--1589},
|
||||
Publisher = {VLDB Endowment},
|
||||
Title = {Lenses: An On-demand Approach to ETL},
|
||||
Url = {http://dx.doi.org/10.14778/2824032.2824055},
|
||||
Volume = {8},
|
||||
Year = {2015},
|
||||
Bdsk-Url-1 = {http://dx.doi.org/10.14778/2824032.2824055}}
|
||||
|
||||
@misc{ramamurthy2015pocketdata,
|
||||
Author = {Naveen Kumar Ramamurthy and Sankara Vadivel Dhandapani and Saravanan Adaikkalavan and Sathish Kumar Deivasigamani},
|
||||
Date-Added = {2016-01-18 20:07:55 +0000},
|
||||
|
@ -2738,28 +2972,3 @@
|
|||
Year = {2012},
|
||||
Bdsk-Url-1 = {http://doi.acm.org/10.1145/2384716.2384723},
|
||||
Bdsk-Url-2 = {http://dx.doi.org/10.1145/2384716.2384723}}
|
||||
@inproceedings{nomadlog-sigcomm14,
|
||||
author = {Gao, Zhaoyu and Venkataramani, Arun and Kurose, James F. and Heimlicher, Simon},
|
||||
title = {Towards a Quantitative Comparison of Location-independent Network Architectures},
|
||||
booktitle = {Proceedings of the 2014 ACM Conference on SIGCOMM},
|
||||
series = {SIGCOMM '14},
|
||||
year = {2014},
|
||||
isbn = {978-1-4503-2836-4},
|
||||
location = {Chicago, Illinois, USA},
|
||||
pages = {259--270},
|
||||
numpages = {12},
|
||||
url = {http://doi.acm.org/10.1145/2619239.2626333},
|
||||
doi = {10.1145/2619239.2626333},
|
||||
acmid = {2626333},
|
||||
publisher = {ACM},
|
||||
address = {New York, NY, USA},
|
||||
keywords = {location-independence, mobility, network architecture},
|
||||
}
|
||||
@inproceedings{locking-chi2016,
|
||||
author = {Marian Harbach and Alexander De Luca and Serge Egelman},
|
||||
title = {The Anatomy of Smartphone Unlocking: A Field Study of Android Lock
|
||||
Screens},
|
||||
booktitle = {Proceedings of the 2016 ACM Conference on Human Factors in
|
||||
Computing Systems (CHI'2016)},
|
||||
Year = {2016},
|
||||
}
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
pytex/cls/nsfcnsproposal.cls
|
|
@ -0,0 +1,285 @@
|
|||
\NeedsTeXFormat{LaTeX2e}
|
||||
|
||||
% 22 Oct 2010 : GWA : New proposal class. Originally based on the
|
||||
% proposalnsf.cls file downloaded from here:
|
||||
% http://www-pord.ucsd.edu/~sgille/how_to/proposal_prep.html
|
||||
|
||||
\RequirePackage{color}
|
||||
\RequirePackage{calc}
|
||||
\RequirePackage{mathpazo}
|
||||
\RequirePackage{ulem}
|
||||
\ProvidesClass{nsfcnsproposal}[2010/10/22 GWA NSF CNS Proposal Class]
|
||||
\DeclareOption*{\PassOptionsToClass{\CurrentOption}{memoir}}
|
||||
\ProcessOptions
|
||||
\LoadClass[onecolumn,oneside,final]{memoir}
|
||||
\RequirePackage{colortbl}
|
||||
\RequirePackage{threeparttable}
|
||||
|
||||
% 22 Oct 2010 : GWA : Set up simple chapter headings.
|
||||
|
||||
\chapterstyle{article}
|
||||
|
||||
% 22 Oct 2010 : GWA : Used to be:
|
||||
% \setlength{\paperheight}{11in}
|
||||
% \setlength{\paperwidth}{8.5in}
|
||||
|
||||
\setstocksize{11in}{8.5in}
|
||||
\settrimmedsize{11in}{8.5in}{*}
|
||||
\settrims{0pt}{0pt}
|
||||
|
||||
% 22 Oct 2010 : GWA : Used to be:
|
||||
% \setlength{\textwidth}{\paperwidth - 2in}
|
||||
% \setlength{\textheight}{\paperheight - 2in}
|
||||
|
||||
%\settypeblocksize{9in}{6.5in}{*}
|
||||
|
||||
% 22 Oct 2010 : GWA : Used to be:
|
||||
% \setlength{\oddsidemargin}{1in}
|
||||
% \setlength{\evensidemargin}{1in}
|
||||
|
||||
\setlrmarginsandblock{1in}{1in}{*}
|
||||
|
||||
% 22 Oct 2010 : GWA : Used to be:
|
||||
% \setlength{\topmargin}{1in}
|
||||
% \setlength{\headheight}{0pt}
|
||||
% \setlength{\headsep}{0pt}
|
||||
|
||||
\setulmarginsandblock{1in}{1in}{*}
|
||||
|
||||
% 22 Oct 2010 : GWA : Used to be:
|
||||
% \setlength{\footskip}{36pt}
|
||||
% \setlength{\headheight}{0pt}
|
||||
|
||||
\setheadfoot{0.1pt}{36pt}
|
||||
|
||||
% 22 Oct 2010 : GWA : Used to be:
|
||||
% \setlength{\marginparsep}{0.5cm}
|
||||
% \setlength{\marginparwidth}{1.5cm}
|
||||
|
||||
\setmarginnotes{0.5cm}{1.5cm}{0.1cm}
|
||||
|
||||
% 22 Oct 2010 : GWA : memoir command to actually apply the layout.
|
||||
|
||||
\checkandfixthelayout
|
||||
|
||||
% 22 Oct 2010 : GWA : Proposal page style.
|
||||
|
||||
\let\@internalcite\cite
|
||||
\def\fullcite{\def\citeauthoryear##1##2##3{##1, ##3}\@internalcite}
|
||||
\def\fullciteA{\def\citeauthoryear##1##2##3{##1}\@internalcite}
|
||||
\def\shortcite{\def\citeauthoryear##1##2##3{##2, ##3}\@internalcite}
|
||||
\def\shortciteA{\def\citeauthoryear##1##2##3{##2}\@internalcite}
|
||||
\def\citeyear{\def\citeauthoryear##1##2##3{##3}\@internalcite}
|
||||
|
||||
\newlength{\lefttitle}
|
||||
\newlength{\righttitle}
|
||||
\setlength{\lefttitle}{0.8\textwidth}
|
||||
\setlength{\righttitle}{0.18\textwidth}
|
||||
|
||||
\makechapterstyle{proposal}{%
|
||||
\renewcommand{\chapterheadstart}{}
|
||||
\renewcommand{\printchaptername}{}
|
||||
\renewcommand{\chapternamenum}{}
|
||||
\renewcommand{\printchapternum}{}
|
||||
\renewcommand{\afterchapternum}{}
|
||||
\renewcommand{\printchaptertitle}[1]{%
|
||||
\vspace{0.5\onelineskip}
|
||||
\Large\scshape\MakeUppercase{##1}
|
||||
}
|
||||
\renewcommand{\afterchaptertitle}{\vspace{0.5\onelineskip} \hrule}
|
||||
}
|
||||
\makechapterstyle{summary}{%
|
||||
\renewcommand{\chapterheadstart}{}
|
||||
\renewcommand{\printchaptername}{}
|
||||
\renewcommand{\chapternamenum}{}
|
||||
\renewcommand{\printchapternum}{}
|
||||
\renewcommand{\afterchapternum}{}
|
||||
\renewcommand{\printchaptertitle}[1]{
|
||||
\noindent\begin{minipage}[t]{\lefttitle}
|
||||
\vspace{0pt}
|
||||
\raggedright
|
||||
\large\scshape{##1}
|
||||
\vspace*{0.10in}
|
||||
\end{minipage}
|
||||
\hfill
|
||||
\begin{minipage}[t]{\righttitle}
|
||||
\vspace{0pt}
|
||||
\raggedleft
|
||||
{\small \scshape NSF Proposal\\
|
||||
\submissiondate}
|
||||
\end{minipage}
|
||||
\small
|
||||
\theauthors\\
|
||||
Type: CI-P; CISE Core Division: IIS; Keywords: databases, smartphones,
|
||||
benchmarking
|
||||
}
|
||||
\renewcommand{\afterchaptertitle}{\vspace{0.5\onelineskip} \hrule \vspace{0.3\onelineskip}}
|
||||
}
|
||||
\makechapterstyle{letter}{%
|
||||
\renewcommand{\chapterheadstart}{}
|
||||
\renewcommand{\printchaptername}{}
|
||||
\renewcommand{\chapternamenum}{}
|
||||
\renewcommand{\printchapternum}{}
|
||||
\renewcommand{\afterchapternum}{}
|
||||
\renewcommand{\printchaptertitle}[1]{}
|
||||
\renewcommand{\afterchaptertitle}{}
|
||||
}
|
||||
\definecolor{shadecolor}{gray}{0.9}
|
||||
\newcommand{\proposalsec}[1]{%
|
||||
\large\bfseries\raggedright #1
|
||||
}
|
||||
\setsecheadstyle{\proposalsec}
|
||||
\renewcommand{\thesection}{\arabic{section}}
|
||||
\chapterstyle{proposal}
|
||||
|
||||
% 26 Oct 2010 : GWA : Section styles.
|
||||
\setsecnumformat{\csname the#1\endcsname\space---\space}
|
||||
\setbeforesecskip{-1.0ex plus -0.5ex minus -0.2ex}
|
||||
\setaftersecskip{1.0ex plus 0.2ex minus 0.1ex}
|
||||
|
||||
% 26 Oct 2010 : GWA : Subsection styles.
|
||||
\setcounter{secnumdepth}{2}
|
||||
\setsubsecheadstyle{\bfseries\raggedright}
|
||||
\setbeforesubsecskip{1ex plus -0.2ex minus -0.2ex}
|
||||
\setaftersubsecskip{0.3ex plus -0.2ex minus -0.2ex}
|
||||
\setbeforesubsubsecskip{-1.0ex plus -0.2ex minus -0.2ex}
|
||||
\setaftersubsubsecskip{-0.3ex plus -0.2ex minus -0.2ex}
|
||||
|
||||
\newcounter{flushenumbfenum}
|
||||
\newenvironment{flushenumbf}{
|
||||
\begin{list}{\textbf{\arabic{flushenumbfenum}.}}
|
||||
{\setlength{\leftmargin}{0pt}}%
|
||||
\setlength{\labelwidth}{0pt}
|
||||
\setlength{\itemindent}{0.5em}
|
||||
\setlength{\labelsep}{0.5em}
|
||||
\usecounter{flushenumbfenum}}
|
||||
{\end{list}}
|
||||
|
||||
% 22 Nov 2010 : GWA : Research questions environment. Produces a running list
|
||||
% (i.e., counters do not reset) prefaced by "Q1", "Q2", etc.
|
||||
|
||||
\newcounter{researchquestionenum}
|
||||
\newcounter{researchquestionenumtmp}
|
||||
\newenvironment{researchquestions}{
|
||||
\savetrivseps
|
||||
\zerotrivseps
|
||||
\vspace{0.5em}
|
||||
\begin{framed}
|
||||
\vspace*{-0.5em}
|
||||
\begin{list}{\textbf{Q\arabic{researchquestionenum}.}}
|
||||
{\setlength{\leftmargin}{2em}}%
|
||||
\setlength{\labelwidth}{4em}
|
||||
\setlength{\itemindent}{0pt}
|
||||
\setlength{\labelsep}{0.5em}
|
||||
\setlength{\topsep}{0pt}
|
||||
\setlength{\partopsep}{0pt}
|
||||
\setlength{\parskip}{0pt}
|
||||
\usecounter{researchquestionenum}
|
||||
\setcounter{researchquestionenum}{\value{researchquestionenumtmp}}}
|
||||
{\end{list}
|
||||
\end{framed}
|
||||
\setcounter{researchquestionenumtmp}{\value{researchquestionenum}}
|
||||
\restoretrivseps}
|
||||
|
||||
\newcounter{researchtaskenum}
|
||||
\newenvironment{researchtasks}{
|
||||
\begin{list}{\textbf{R\arabic{researchtaskenum}.}}
|
||||
{\setlength{\leftmargin}{2em}}%
|
||||
\setlength{\labelwidth}{4em}
|
||||
\setlength{\itemindent}{0pt}
|
||||
\setlength{\labelsep}{0.5em}
|
||||
\usecounter{researchtaskenum}}
|
||||
{\end{list}}
|
||||
|
||||
\newcounter{researchmethodsenum}
|
||||
\newenvironment{researchmethods}{
|
||||
\begin{list}{\alph{researchmethodsenum}.}
|
||||
{\setlength{\leftmargin}{0pt}}%
|
||||
\setlength{\labelwidth}{0pt}
|
||||
\setlength{\itemindent}{0.5em}
|
||||
\setlength{\labelsep}{0.5em}
|
||||
\usecounter{researchmethodsenum}}
|
||||
{\end{list}}
|
||||
|
||||
\newcounter{broaderimpactenum}
|
||||
\newcounter{broaderimpactenumtmp}
|
||||
\newenvironment{broaderimpacts}{
|
||||
\savetrivseps
|
||||
\zerotrivseps
|
||||
\vspace{0.2em}
|
||||
\begin{framed}
|
||||
\vspace*{-0.5em}
|
||||
\begin{list}{\textbf{B\arabic{broaderimpactenum}.}}
|
||||
{\setlength{\leftmargin}{2em}}%
|
||||
\setlength{\labelwidth}{4em}
|
||||
\setlength{\itemindent}{0pt}
|
||||
\setlength{\labelsep}{0.5em}
|
||||
\setlength{\topsep}{0pt}
|
||||
\setlength{\partopsep}{0pt}
|
||||
\setlength{\parskip}{0pt}
|
||||
\usecounter{broaderimpactenum}
|
||||
\setcounter{broaderimpactenum}{\value{broaderimpactenumtmp}}}
|
||||
{\end{list}
|
||||
\end{framed}
|
||||
\setcounter{broaderimpactenumtmp}{\value{broaderimpactenum}}
|
||||
\restoretrivseps}
|
||||
|
||||
\newcounter{timeenumcounter}
|
||||
\newenvironment{timeenum}{
|
||||
\savetrivseps
|
||||
\zerotrivseps
|
||||
\begin{list}{\textbf{t = \arabic{timeenumcounter}}}
|
||||
{\setlength{\leftmargin}{3em}}%
|
||||
\setlength{\labelwidth}{3em}
|
||||
\setlength{\itemindent}{0pt}
|
||||
\setlength{\labelsep}{1em}
|
||||
\usecounter{timeenumcounter}
|
||||
\setcounter{timeenumcounter}{-1}}
|
||||
{\end{list}
|
||||
\restoretrivseps}
|
||||
|
||||
\newenvironment{conclusion}{
|
||||
\savetrivseps
|
||||
\zerotrivseps
|
||||
\vspace{0.2em}
|
||||
\begin{framed}
|
||||
\vspace*{-0.5em}}
|
||||
{\end{framed}
|
||||
\restoretrivseps}
|
||||
|
||||
\newenvironment{tightcenter}{
|
||||
\savetrivseps
|
||||
\zerotrivseps
|
||||
\begin{center}}
|
||||
{\end{center}
|
||||
\restoretrivseps}
|
||||
|
||||
\newenvironment{tightcentertitle}{
|
||||
\savetrivseps
|
||||
\zerotrivseps
|
||||
\vspace*{0.1in}
|
||||
\begin{center}}
|
||||
{\end{center}
|
||||
\vspace*{0.1in}
|
||||
\restoretrivseps}
|
||||
|
||||
\tightlists
|
||||
\firmlists
|
||||
\renewcommand{\bibname}{References}
|
||||
\renewcommand{\thetable}{\arabic{table}}
|
||||
\newenvironment{indentpar}[1]{
|
||||
\begin{list}{}%
|
||||
{\setlength{\leftmargin}{#1}}%
|
||||
\setlength{\itemindent}{0em}
|
||||
\setlength{\parskip}{0pt}
|
||||
\setlength{\parsep}{0pt}
|
||||
\setlength{\labelsep}{0em}}
|
||||
{\end{list}}
|
||||
|
||||
% 06 Dec 2010 : GWA : Figure and captioning commands.
|
||||
\renewcommand{\thefigure}{\arabic{figure}}
|
||||
\captiondelim{ --- }
|
||||
\captionnamefont{\small\bfseries}
|
||||
\captiontitlefont{\small}
|
||||
\nonzeroparskip
|
||||
\setlength{\parindent}{0pt}
|
|
@ -1,8 +1,8 @@
|
|||
% !TEX root = ../fullproposal.tex
|
||||
In a preliminary study~\cite{pocketdata}, we instrumented Android smartphones being used as the primary device of 11 UB students, faculty and staff for a period of one month.
|
||||
The SQLite embedded database included as part of the Android platform was modified to log a trace of all queries executed, along with metadata such as the number of rows returned, time taken, and the application process executing the query.
|
||||
The SQLite embedded database included as part of the Android platform was modified to log a trace of all SQL statements executed, along with metadata such as the number of rows returned, time taken, and the application process that issued the statement.
|
||||
To protect participant privacy, our instrumentation removed as much personally-identifying information as possible and recorded prepared statement arguments only as hash values.
|
||||
With participant permission, we have made these traces publicly available.
|
||||
With participant permission, we have made these traces publicly available~\cite{pocketdata}.
|
||||
|
||||
We conducted a preliminary analysis to summarize these traces, the key parts of which we summarize here to provide a sense of the type of information that we will make available to the \PocketData{} community.
|
||||
We captured approximately 45 million statements executed by SQLite over the 1 month period.
|
||||
|
@ -31,7 +31,7 @@ Figure~\ref{fig:coarseSelectComplexity} shows the distribution of \texttt{SELECT
|
|||
Even at this coarse-grained view of query complexity, the read-only portion of the embedded workload distinguishes itself from existing TPC benchmarks.
|
||||
Like TPC-C~\cite{tpcc}, the vast majority of the workload involves simple, small requests for data that touch a small number of tables.
|
||||
29.15 million, or about 87\% of the \texttt{SELECT} queries were simple select-project-join queries. Of those, 28.72 million or about 86\% of all queries were simple single-table scans or look-ups. In these queries, which form the bulk of SQLite's read workload, the query engine exists simply to provide an iterator over the relationally structured data it is being used to store.
|
||||
Conversely, the workload also has a tail that consists of complex, TPC-H-like~\cite{tpch} queries. Several hundred thousand queries involve at least 2 levels of nesting, and over a hundred thousand queries access 5 or more tables. As an extreme example, our trace includes 10 similar \texttt{SELECT} queries issued by the Google Play Games Service, each of which accesses up to 8 distinct tables to combine developer-provided game state, user preferences, device profile meta-data, and historical game-play results from the user.
|
||||
Conversely, the workload also has a tail that consists of complex, TPC-H-like~\cite{tpch} queries. Several hundred thousand queries involve at least 2 levels of nesting, and over a hundred thousand queries access 5 or more tables. As an extreme example, our trace includes 10 similar \texttt{SELECT} queries issued by the Google Play Games Service, each of which accesses up to 8 distinct tables to combine and summarize developer-provided game state, user preferences, device profile meta-data, and historical game-play results from the user.
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
|
@ -47,10 +47,48 @@ This query would have a join width of 2 (\texttt{R}, \texttt{S}) and 2 conjuncti
|
|||
% For uniformity, \texttt{NATURAL JOIN} and \texttt{JOIN ON} (\textit{e.g.}, \texttt{SELECT R.A from R JOIN S ON B}) expressions appearing in the \texttt{FROM} clause are rewritten into equivalent expressions in the \texttt{WHERE} clause.
|
||||
The first column of this table indicates queries to a single relation. Just over 1 million queries were full table scans (0 where clauses), and just under 27 million queries involved only a single conjunctive term. This latter class constitutes the bulk of the simple query workload, at just over 87\% of the simple look-up queries. Single-clause queries appear to be the norm.
|
||||
|
||||
\begin{figure*}
|
||||
\begin{subfigure}[t]{0.5\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=0.9\textwidth]{graphs/select_count_cdf_by_app}
|
||||
\caption{}
|
||||
\label{fig:selectByApp:all}
|
||||
\end{subfigure}%
|
||||
\begin{subfigure}[t]{0.5\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=0.9\textwidth]{graphs/select_percent_simple_cdf_by_app}
|
||||
\caption{}
|
||||
\label{fig:selectByApp:simple}
|
||||
\end{subfigure}%
|
||||
\caption{\textbf{Breakdown of \texttt{SELECT} queries by app. (a) Cumulative distribution of applications by the number of \texttt{SELECT} queries issued (note the logarithmic scale). (b) Cumulative distribution of applications by the percent of the app's \texttt{SELECT} queries that are key value queries (full table scans or exact key look-ups).}}
|
||||
\label{fig:selectByApp}
|
||||
\end{figure*}
|
||||
Over the course of the one-month trace we observed 179 distinct apps, varying from built-in Android applications such as \textit{Gmail} or \textit{YouTube}, to video players such as \textit{VLC}, to games such as \textit{3 Kingdoms}. Figure~\ref{fig:selectByApp:all} shows the cumulative distribution of apps sorted by the number of queries that the app performs. The results are extremely skewed, with the top 10\% of apps each posing more than 100 thousand queries over the one month trace. The most query-intensive system service, \textit{Media Storage} was responsible for 13.57 million queries or just shy of 40 queries per minute per phone. The most query-intensive user-facing app was \textit{Google+}, which performed 1.94 million queries over the course of the month or 5 queries per minute.
|
||||
At the other end of the spectrum, the bottom 10\% of apps posed as few as 30 queries over the entire month.
|
||||
|
||||
We noted above that a large proportion of \texttt{SELECT} queries were exact look-ups; Indeed many applications running on the device are using SQLite as a simple key-value store. For 24 apps (13.4\%), we observed \emph{only} key-value queries during the entire, month-long trace.
|
||||
We noted above that a large proportion of \texttt{SELECT} queries were exact look-ups; Indeed many applications running on the device are using SQLite as a simple key-value store. As seen in Figure~\ref{fig:selectByApp:simple}, for 24 apps (13.4\%), we observed \emph{only} queries that would have been supported by a trivial key-value API for the full span of the month-long trace.
|
||||
|
||||
\begin{figure}
|
||||
\begin{subfigure}[t]{0.5\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=0.9\textwidth]{graphs/data_mod_ops_cdf_by_app}
|
||||
\caption{}
|
||||
\label{fig:updateByApp:modOps}
|
||||
\end{subfigure}%
|
||||
\begin{subfigure}[t]{0.5\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=0.9\textwidth]{graphs/read_write_ratio_cdf_by_app}
|
||||
\caption{}
|
||||
\label{fig:updateByApp:writeRatio}
|
||||
\end{subfigure}%
|
||||
\caption{\textbf{App-level write behavior. (a) Cumulative distribution of applications by number of data manipulation statements performed (note the logarithmic scale). (b) Cumulative distribution of applications by read/write ratio. }}
|
||||
\label{fig:updateByApp}
|
||||
\end{figure}
|
||||
|
||||
Figure~\ref{fig:updateByApp:modOps} illustrates app-level write workloads, sorting applications by the number of \texttt{INSERT}, \texttt{UPSERT}, \texttt{UPDATE}, and \texttt{DELETE} operations that could be attributed to each. The CDF is almost perfectly exponential, suggesting that the number of write statements performed by any given app follows a long-tailed distribution, a feature to be considered in the design of a pocket data benchmark.
|
||||
|
||||
Figure~\ref{fig:updateByApp:writeRatio} breaks apps down by their read/write ratio. Surprisingly, 25 apps (14\% of the apps seen) did not perform a single write over the course of the entire trace. Manual examination of these apps suggested two possible explanations. Several apps have reason to store state that is updated only infrequently. For example, \textit{JuiceSSH} or \textit{Key Chain} appear to use SQLite as a credential store. A second, far more interesting class of apps includes apps like \textit{Google Play Newsstand}, \textit{Eventbrite}, \textit{Wifi Analyzer}, and \textit{TuneIn Radio Pro}, all of which have components that query data stored in the cloud. We suspect that the cloud data is being encapsulated into a pre-constructed SQLite database and being pushed to, or downloaded by the client applications.
|
||||
This type of behavior might be compared to a bulk ETL process or log shipment in a server-class database workload, except that here, the database has already been constructed. Pre-caching through database encapsulation is a unique feature of embedded databases, and one that is already being used in a substantial number of apps.
|
||||
|
||||
\begin{figure*}[t]
|
||||
\centering
|
||||
|
@ -70,14 +108,14 @@ We noted above that a large proportion of \texttt{SELECT} queries were exact loo
|
|||
\caption{}
|
||||
\label{fig:app:rowcount}
|
||||
\end{subfigure}%
|
||||
\caption{Per-App Summary Statistics for Android SQLite Queries. Distributions of (a) inter-query arrival times, (b) query runtimes, and (c) rows returned per query.}
|
||||
\caption{\textbf{Per-App Summary Statistics for Android SQLite Queries. Distributions of (a) inter-query arrival times, (b) query runtimes, and (c) rows returned per query.}}
|
||||
\label{fig:app}
|
||||
\end{figure*}
|
||||
|
||||
Figure~\ref{fig:app} shows query interarrival times, runtimes, and returned row
|
||||
counts for ten of the most active SQLite clients. As seen in
|
||||
Figure~\ref{fig:app:interarrival}, the 0.01Hz periodicity is not unique to any one
|
||||
application, further suggesting filesystem locking as a culprit. Two of the most
|
||||
Figure~\ref{fig:app:interarrival}, a 0.01Hz periodicity in arrival times is common to all
|
||||
applications, suggesting filesystem locking as a culprit. Two of the most
|
||||
prolific SQLite clients, \textit{Google Play services} and \textit{Media Storage}
|
||||
appear to be very bursty: 70\% of all statements for these applications are issued
|
||||
within 0.1ms of the previous statement. Also interesting is the curve for queries
|
||||
|
@ -85,6 +123,7 @@ issued by the \textit{Android System} itself. The interarrival time CDF appears
|
|||
to be almost precisely logarithmic for rates above 10$\mu$s, but has a notable lack
|
||||
of interarrival times in the 1ms to 10ms range. This could suggest caching
|
||||
effects, with the cache expiring after 1ms.
|
||||
|
||||
As seen in Figure~\ref{fig:app:runtime}, most apps hold to the average runtime of
|
||||
100$\mu$s, with several notable exceptions. Over 50\% of the
|
||||
\textit{Android System}'s statements take on the order of 1ms. Just under 20\% of
|
||||
|
@ -99,7 +138,7 @@ the number of rows returned in general varies much more widely. Many of these
|
|||
apps' user interfaces have both a list and a search view that show multiple records
|
||||
at a time, suggesting that these views are backed directly by SQLite. Although all
|
||||
apps have long tails, two apps in particular: \textit{Gmail} and \textit{Google+} are
|
||||
notable for regularly issuing queries that return on the order of 100 rows.
|
||||
notable for regularly issuing queries that return on the order of hundreds of rows.
|
||||
|
||||
\begin{figure*}
|
||||
\centering
|
||||
|
@ -111,22 +150,23 @@ notable for regularly issuing queries that return on the order of 100 rows.
|
|||
\includegraphics[width=\textwidth]{graphs/facebook-minified.pdf}
|
||||
\caption{}
|
||||
\end{subfigure}%
|
||||
\caption{Variations in bursty data access patterns~\cite{ramamurthy2015pocketdata} for WhatsApp (a) and Facebook (b).}
|
||||
\caption{\textbf{Variations in bursty data access patterns~\cite{ramamurthy2015pocketdata} for WhatsApp (a) and Facebook (b).}}
|
||||
\label{fig:burstiness}
|
||||
\end{figure*}
|
||||
|
||||
Figure~\ref{fig:burstiness} shows variations in query burstiness across multiple apps and users\footnote{The PIs have already incorporated material from this proposal into their coursework. Figure \ref{fig:burstiness} is from a student report~\cite{ramamurthy2015pocketdata} from UB's CSE-662, jointly instructed by PIs Kennedy and Ziarek.}.
|
||||
Figure~\ref{fig:burstiness} shows variations in query burstiness across multiple apps and users\footnote{The PIs have already incorporated material from this proposal into their coursework. Figure \ref{fig:burstiness} is from a student report~\cite{ramamurthy2015pocketdata} from UB's CSE-662, jointly instructed by PIs Kennedy and Ziarek. The student group performed an app-centric analysis of the query traces.}.
|
||||
Two features immediately emerge from this data.
|
||||
First, \PocketData{} workloads are extremely bursty; The default steady state is completely idle, with infrequent bursts of hundreds of operations per second.
|
||||
Second, the nature of these bursts varies significantly by the calling app; In this trace Facebook generates a read-only workload, while Whatsapp produces two bursts each with a distinct mix of updates, inserts, deletes, and selects.
|
||||
|
||||
\medskip
|
||||
|
||||
We plan to freely releasing aggregate metrics about database usage patterns in
|
||||
We will freely release aggregate metrics about database usage patterns in
|
||||
embedded smartphone databases.
|
||||
We also plan to make our source traces available under IRB-approved conditions.
|
||||
We also plan to make our source traces available to researchers with approval
|
||||
from their institution's IRB.
|
||||
By doing this, we will enable other researchers to begin exploring the bottlenecks
|
||||
in and practical limitations of existing embedded databases and abstraction layers
|
||||
like object-relational mappers developed over them.
|
||||
in and practical limitations of existing embedded databases, as well as in abstraction layers
|
||||
like object-relational mappers.
|
||||
Better understanding the space will help to identify new research challenges, and
|
||||
help to encourage researchers to join the \PocketData{} community.
|
|
@ -3,7 +3,8 @@
|
|||
We will provide an instrumentation toolkit for the \PocketData{} community. The goal of this toolkit is twofold: (1) Gathering usage traces and metrics from phones deployed in real-world settings, and (2) Reliably measuring system performance on simulated and replayed \PocketData{} workloads.
|
||||
There are several challenges unique to the \PocketData{} setting that make instrumenting smartphone embedded databases difficult.
|
||||
The simplest of these is that smartphones rely on specialized operating systems, hardware, and virtualization that can make it difficult to deploy existing measurement tools designed for desktops.
|
||||
Many of these tools are easily portable, but there are several more subtle and difficult challenges involved in instrumentation.
|
||||
Many of these tools can be ported and we will endeavor to supplement existing community efforts in doing so.
|
||||
There are also several more subtle challenges specific to instrumenting \PocketData{}.
|
||||
|
||||
A key challenge is the types of bottlenecks that \PocketData{} workloads encounter.
|
||||
Typical metrics for enterprise benchmarks include throughput at saturation, joules per unit of throughput, and throughput vs latency curves.
|
||||
|
@ -18,7 +19,7 @@ For example, when a CPU is spending time idling, reliably attributing CPU cycles
|
|||
A further concern that makes \PocketData{} instrumentation difficult is that embedded databases are typically stored in self-contained files.
|
||||
Replicating an embedded database can be as simple as initiating a file transfer over HTTP.
|
||||
We observed many apps exploiting this feature in practice~\cite{pocketdata}.
|
||||
Because file transfers bypass the normal embedded database library, fully capturing an app's interactions with an embedded database requires jointly instrumenting other aspects of the OS, including the filesystem and network layers.
|
||||
File transfers bypass the normal embedded database library, so fully capturing an app's interactions with an embedded database requires jointly instrumenting other aspects of the OS, including the filesystem and network layers.
|
||||
|
||||
As part of the proposed work, we will develop an instrumentation toolkit that can be used to reliably track an app's embedded database activities and to reliably measure embedded database performance.
|
||||
As part of the proposed work, we will develop an instrumentation toolkit that can reliably track an app's embedded database activities and measure all facets of an embedded database's performance.
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ suite, which will serve three roles for the \PocketData{} community.
|
|||
First, a benchmark will foster research on embedded databases by
|
||||
creating a realistic standard for evaluation, allowing for fair comparisons
|
||||
across competing research efforts.
|
||||
Second, by providing a precise set of metrics to optimize, a benchmark
|
||||
Second, by providing a precise set of metrics to optimize for, a benchmark
|
||||
will serve to guide the research community's efforts towards pertinent
|
||||
real-world challenges faced by smartphone applications.
|
||||
|
||||
|
@ -16,9 +16,9 @@ effort to track changes in app usage behaviors and bottlenecks.
|
|||
We will develop a modular benchmark along the lines of
|
||||
PolePosition~\cite{poleposition}, driven by modules that
|
||||
capture the semantics and behavior of a class of applications.
|
||||
Using the metrics data that we gather and release, we will lead a
|
||||
an effort to continually monitor for changes in app usage patterns,
|
||||
and how phone users engage with data-driven apps.
|
||||
Based on the metrics gathering efforts discussed above, we will lead
|
||||
an effort to continually monitor app's data usage patterns for changes,
|
||||
as well as for changes in how phone users engage with data-driven apps.
|
||||
As new patterns are discovered by the \PocketData{} community,
|
||||
we will maintain \textit{a repository of modules describing these
|
||||
behaviors}.
|
||||
|
@ -31,8 +31,8 @@ Ideally, we will be able to link individual queries to triggering events (user i
|
|||
Although we hope to automate this process eventually, our initial approach will be to focus on one app at a time.
|
||||
This will not only help us to better understand the space, but also to generate realistic datasets by being able to analyze the specific app's schema and updates/inserts.
|
||||
The Application tier of the full benchmark will consist of a representative cover of the 179 apps that we encountered in our preliminary analysis, as well as apps that we encounter in subsequent data gathering efforts.
|
||||
The User tier will simulate the complete phone environment; Statistics for single user include of a cluster of app modules, and patterns of charging (when is the phone plugged in?), network access (when is the phone on the internet, and with what quality?), and other behavioral traits that impact app data access patterns.
|
||||
To simulate users, we will use standard clustering techniques on our trace data to create both canonical user profiles, and to identify natural variation around those profiles.
|
||||
The User tier will simulate the complete phone environment. Statistics for single user include of a cluster of app modules, patterns of charging behavior (when is the phone plugged in?), network access (when is the phone on the internet, and with what quality?), and other behavioral traits that impact app data access patterns.
|
||||
To simulate users, we will use standard clustering techniques on our trace data first to create canonical user profiles, and then to identify natural variation around those profiles.
|
||||
|
||||
|
||||
It is reasonable to ask why a specialized \PocketData{} database
|
||||
|
@ -45,12 +45,12 @@ and mobile software.
|
|||
Although AndroBench does include a component for simulating
|
||||
the filesystem access patterns of SQLite, neither of these
|
||||
benchmarks explicitly generates the structured data access patterns
|
||||
necessary to evaluate a data management system.
|
||||
necessary to evaluate a complete data management system.
|
||||
Previous research efforts~\cite{jeong2013iostack} have used mobility
|
||||
traces generated by MobiGen, fed to a virtual machine running
|
||||
standard apps to generate semi-realistic traces of embedded
|
||||
common apps such as Facebook to generate semi-realistic traces of embedded
|
||||
database access patterns.
|
||||
Although our approach follows a more principled approach based
|
||||
Although \PocketData{} follows a more principled approach based on
|
||||
real-world traces, the metrics we release could be used to validate
|
||||
and standardize data generation tricks of this sort.
|
||||
|
||||
|
@ -68,17 +68,17 @@ The most intensive database user in our preliminary study,
|
|||
\textit{Google Play services} had 14.8 million statements attributed
|
||||
to it, just under half of which were writes.
|
||||
This equates to about one write every 3 seconds, which is substantial
|
||||
from a power management and latency perspective, but for concurrency.
|
||||
from a power management and latency perspective, but which is unlikely
|
||||
to create a concurrency bottleneck.
|
||||
Second, many OLAP benchmarks focus on comparatively simple
|
||||
queries.
|
||||
This is reasonably descriptive of a notable portion of the workload we
|
||||
observed in our preliminary study:
|
||||
A notable portion of the workload we observed in our preliminary study can indeed be described as simple:
|
||||
13\% of the applications we observed had a read workload that
|
||||
consisted exclusively of key/value queries, and over half of the applications
|
||||
we observed had a workload that consisted of at least 80\% key/value queries.
|
||||
|
||||
However, the remaining queries are not as simple.
|
||||
The more complex queries we observed in our preliminary study include
|
||||
However, the trace also exhibited a long tail of extremely complex queries.
|
||||
A small, but significant number of queries we observed in our preliminary study include
|
||||
multiple levels of query nesting, wide joins, and extensive use of aggregation.
|
||||
As such, they more closely resemble analytics (OLAP) workload benchmarks
|
||||
such as TPC-H~\cite{tpch}, The Star-Schema Benchmark~\cite{ssb}, and
|
||||
|
@ -98,11 +98,11 @@ PolePosition simulates the behavior of specific data structure abstractions
|
|||
that need to be backed by a data management system.
|
||||
Because data structures are defined using higher-level operational
|
||||
semantics rather than through a fixed database API, databases are
|
||||
allowed to specialize for specific access patterns that the database may
|
||||
allowed to specialize benchmarks to specific access patterns that the database may
|
||||
be optimized for.
|
||||
The fundamental goals of PolePosition and the \PocketData{} benchmark
|
||||
are similar, but \PocketData{} will operate at a higher level of abstraction,
|
||||
capturing the behavior of entire apps and users engaging with those apps.
|
||||
capturing the behavior of entire apps, as well as users that engage with those apps.
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
% !TEX root = ../fullproposal.tex
|
||||
|
||||
Even the short, month-long query trace with only 11 users on which our preliminary study was based included over 45 million SQL statements.
|
||||
As the experiment is scaled up, analyzing these query traces will become increasingly difficult.
|
||||
Compounding the issue, the comparatively high complexity of many of the the queries makes it difficult to flatten the SQL parse trees into a simple relational format for analysis.
|
||||
Our preliminary analysis required repeated iterations of our feature extraction process: We would define a procedure for extracting interesting features of a SQL statement's parse tree, construct a visualization from the extracted feature, and then identify a new feature of interest.
|
||||
|
||||
|
||||
\begin{itemize}
|
||||
\item SQL parsing is heavyweight
|
||||
\item
|
||||
\end{itemize}
|
||||
As part of the proposed work, we will release tools for analyzing query logs that streamline this iterative process, by making it easy define new feature extractors.
|
||||
As feature extraction is an embarrassingly parallel task, simple optimizations like caching, parallelism, and incremental computation~\cite{kennedy2011dbtoaster} can be used to make these tools extremely efficient\footnote{As a comment on the utility of specialized tools for log analysis, we return to the CSE-662 project involving analyzing \PocketData{} logs. The four students began with a naive analysis tool (written by the students in Java) that took multiple hours to complete one iteration of the analytics cycle. By the end of the course, they had optimized the tool to run in under 10 seconds~\cite{ramamurthy2015pocketdata}.}.
|
||||
Source code for all visualizations that we release as part of our summary metrics will be released to the public to further encourage community participation in \PocketData{}.
|
|
@ -1,6 +1,6 @@
|
|||
% !TEX root = ../fullproposal.tex
|
||||
|
||||
We will build an initial \PocketData{} community and facilitate engagement with the broader CISE community through outreach efforts including attending poster sessions and hosting workshops and tutorials
|
||||
We will build an initial \PocketData{} community and facilitate engagement with the broader CISE community through outreach efforts including attending poster and demo sessions and hosting workshops and tutorials
|
||||
co-located with major conferences in databases (VLDB, SIGMOD, ICDE), mobile and real-time systems (MobiSys, OSDI, RTSS, RTAS), and programming languages (POPL, PLDI, OOPSLA).
|
||||
Poster sessions provide an ideal opportunity to meet researchers in related areas, to advertise the resources we plan to offer, and gather feedback about the needs of potential \PocketData{} community members.
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
The world's 2~billion smartphones and 4~million apps have become a large part
|
||||
of most people's computing experiences.
|
||||
%
|
||||
A common requirement of apps is persisting structured data, a task frequently
|
||||
Most apps need to persist structured data, a task frequently
|
||||
performed using an \textit{embedded database} such as SQLite.
|
||||
%
|
||||
These are heavily used, with Android smartphones generating an average of
|
||||
|
@ -38,7 +38,7 @@ understood.
|
|||
|
||||
%%%%%%%%%%%%%%%%
|
||||
|
||||
To date, there have been some initial explorations of small-scale data
|
||||
To date, there have been some initial explorations of small-, personal-, or pocket-scale data
|
||||
management, both in academia and by industry:
|
||||
%
|
||||
\begin{itemize}
|
||||
|
@ -46,9 +46,9 @@ management, both in academia and by industry:
|
|||
database responsiveness, and database performance on smartphones and tablets.
|
||||
\item Saarland University's Janiform Document project explores interactive
|
||||
manuscripts that include embedded, query-able research data and visualizations.
|
||||
\item Oracle, SAP Labs, Facebook, LMDB, SQLite, and WiredTiger are all actively
|
||||
engaged in research and development of embedded database software.
|
||||
\item Atanas Rountev's group at Ohio State is exploring responsiveness issues
|
||||
\item Oracle, SAP Labs, Facebook, LMDB, SQLite, and MongoDB are all actively
|
||||
engaged in research on and development of embedded database software.
|
||||
\item The Presto group at Ohio State is exploring responsiveness issues
|
||||
in Android caused by data-flow limitations.
|
||||
\item The DAS Lab at Harvard's work on adaptive data management considers
|
||||
the challenges of specializing databases for small data.
|
||||
|
@ -56,44 +56,46 @@ the challenges of specializing databases for small data.
|
|||
smartphone apps interact with embedded-databases.
|
||||
\end{itemize}
|
||||
%
|
||||
There is clearly interest in data management challenges that arise at the small- and
|
||||
pocket-scales.
|
||||
There is clearly interest in data management challenges that arise in small-scale data management.
|
||||
%
|
||||
Unfortunately, unlike the largely homogeneous workloads and platforms that
|
||||
common to research on classical monolithic enterprise databases, \PocketData{}
|
||||
is far more diverse.
|
||||
Unfortunately, unlike the largely homogeneous workloads and platforms that are
|
||||
standard in research on classical enterprise databases, this new \PocketData{}
|
||||
setting is far more diverse.
|
||||
%
|
||||
Data access patterns vary wildly by user, time of day, mix of installed apps,
|
||||
Data access patterns are extremely bursty and can vary wildly by user, time of day, mix of installed apps,
|
||||
network accessibility, and many other factors.
|
||||
%
|
||||
Platform properties such as RAM, persistent storage, CPU performance, and network
|
||||
bandwidth vary wildly, sometimes by multiple orders of magnitude.
|
||||
bandwidth also exhibit extreme variations across phones, sometimes by multiple orders of magnitude.
|
||||
%
|
||||
Resource availability can also vary; Some users keep their phones constantly
|
||||
charged, while others go multiple days without charging.
|
||||
charged, while others go multiple days without plugging their phones in.
|
||||
|
||||
%%%%%%%%%%%%%%%%
|
||||
|
||||
The heterogeneity of the \PocketData{} setting, make it challenging for researchers to
|
||||
The heterogeneity of the \PocketData{} setting makes it challenging for researchers to
|
||||
understand the tradeoffs and requirements of the setting.
|
||||
%
|
||||
This lack of clear high-level goals, in turn, makes it difficult to clearly identify successful
|
||||
research contributions and creates a daunting environment for new research efforts.
|
||||
research contributions.
|
||||
%
|
||||
Lacking the resources necessary to better understand and adapt to \PocketData{} scale,
|
||||
Unfortunately, pinning down specific goals first requires a concerted effort to gather (and analyze)
|
||||
traces of data usage patterns from real-world settings, creating a high barrier to entry for new
|
||||
researchers.
|
||||
%
|
||||
Lacking the resources necessary to better understand and adapt to the \PocketData{} scale,
|
||||
research efforts in the area are presently limited.
|
||||
|
||||
%%%%%%%%%%%%%%%%
|
||||
|
||||
\textbf{Target Community}
|
||||
Research on mobile devices and the internet of things is cross cutting, intersecting communities
|
||||
Research on mobile devices and the more general space of the internet of things (IoT) is cross cutting, intersecting communities
|
||||
that work on data management systems, real-time and embedded devices, programming languages,
|
||||
and operating and mobile systems. We believe research involving \PocketData{} also lies at
|
||||
the intersection of these communities. Specialized databases systems for embedded devices is
|
||||
a growing topic in the database community. As embedded processors become more capable, with
|
||||
larger amounts of main memory available (e.g. Intel Galileo), there is a growing push from the embedded
|
||||
and also from the real-time communities to explore larger software capabilities, including database
|
||||
systems and query processing systems, in embedded deployments. The programming language
|
||||
the intersection of these communities. Specialized database systems for embedded devices are re-emerging as an interesting topic in the database community. As embedded processors become more capable, with
|
||||
larger amounts of main memory available (e.g. Intel's Edison platform), there is a growing push from the embedded systems
|
||||
and the real-time communities to explore larger software capabilities, including database
|
||||
systems and query processing systems in embedded hardware deployments. The programming language
|
||||
community is exploring domain specific languages for specialized query processing.
|
||||
The mobile community is continually exploring how to push the envelope on smartphone based computing,
|
||||
whether via power aware mechanisms, or through more adaptive systems. Many of these solutions use mobile databases
|
||||
|
@ -101,12 +103,12 @@ as their fundamental computation engine (e.g. the `\texttt{maybe}' system develo
|
|||
consider the performance characteristics of database systems (e.g. power modeling).
|
||||
|
||||
This proposal aims to create a community research infrastructure around our \PocketData{} toolchain
|
||||
to enable a myriad of research activities for above mentioned communities. Additionally,
|
||||
to enable a myriad of research activities for the above mentioned communities. Additionally,
|
||||
in this planning grant, we will explore the precise needs of these communities to ensure an
|
||||
infrastructure that has broad applicability.
|
||||
We will reach out to researchers in closely related areas including Internet of Things,
|
||||
Adaptive Data Management, Sensor Networks, and help them to explore how \PocketData{}
|
||||
can impact their research.
|
||||
can help to improve their research.
|
||||
As part of these outreach efforts, we will provide resources that will simultaneously support
|
||||
researcher's existing projects, while also helping to enable new projects with a focus on
|
||||
\PocketData{}.
|
||||
|
@ -117,10 +119,20 @@ During this planning grant we will focus our efforts in three key areas:
|
|||
|
||||
\begin{enumerate}
|
||||
|
||||
\item \textbf{Expansion to IoT}: Our current efforts have focused primarily exploring questions
|
||||
\item \textbf{Growth of the Mobile Embedded Database community}: We have established an
|
||||
initial community of interested CISE researchers for \PocketData{} from both
|
||||
academia and industry. We believe that this community
|
||||
shows that there is sufficient interest within CISE to pursue our proposed \PocketData{} infrastructure.
|
||||
However, for long term success we would like to expand this community to ensure that the infrastructure
|
||||
meets the needs of the broader community and not just a specific research niche.
|
||||
|
||||
\item \textbf{Expansion to IoT}: Our preliminary efforts have focused on questions
|
||||
relating to \PocketData{} in the mobile domain, specifically Android. Although a \PocketData{}
|
||||
infrastructure based solely in Android is valuable, we believe a more comprehensive infrastructure
|
||||
must take into account recent developments in IoT.
|
||||
must take into account recent developments in IoT. There are similarities between how mobile
|
||||
applications leverage embedded databases and how proposed IoT applications would use embedded
|
||||
databases, specifically in the areas of personal health care devices that aggregate and summarize a user's personal data and smart city deployments where small devices process data before sending \emph{relevant} data for more centralized big data analytics.
|
||||
We propose to expand and modify our \PocketData{} infrastructure to meet the needs of IoT community.
|
||||
|
||||
\item \textbf{Workshops and Tutorials}:
|
||||
To facilitate engagement with the broader CISE community and to develop an initial
|
||||
|
@ -129,14 +141,14 @@ major conferences in the database, systems, real-time systems, and programming l
|
|||
communities.
|
||||
Our budget includes funding for travel to such conferences to host workshops and
|
||||
tutorials. This will also enable the PIs to receive valuable feedback on the needs of the community in
|
||||
structuring the \PocketData{} infrastructure.
|
||||
designing and building out the \PocketData{} infrastructure.
|
||||
|
||||
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
A successful planning grant will enable us to proceed with the development of a full
|
||||
\PocketData{} infrastructure.
|
||||
\PocketData{} infrastructure proposal.
|
||||
Concretely, the following three resources will be developed as part of the full infrastructure
|
||||
proposal:
|
||||
\begin{enumerate}
|
||||
|
@ -154,30 +166,55 @@ summarizing those datasets.
|
|||
\item \textbf{Standards and Benchmarks}:
|
||||
We will create a toolkit to establish a set of standards for evaluating research efforts on
|
||||
\PocketData{} for both Android and IoT.
|
||||
First, significant parts of the Android platform have been locked down for reasons of
|
||||
security and intellectual property, making properties like process scheduling and power usage
|
||||
difficult to measure reliably.
|
||||
The \PocketData{} setting requires unique metrics that can be difficult to reliably measure on the Android platform.
|
||||
The toolkit will include instrumentation for Android that will make it easier for researchers
|
||||
to measure the performance of their \PocketData{} tools.
|
||||
to measure performance through rarely used metrics like availability of idle time, thread scheduling, power consumption, and other measures that can be hard to gather reliably on the Android platform like CPU and memory usage for specific libraries.
|
||||
Second, to standardize comparisons across different research efforts, the toolkit will
|
||||
include a benchmark suite.
|
||||
This benchmark will create clearly defined metrics for evaluating success. Moreover, by
|
||||
This benchmark will established clearly defined metrics for evaluating data management solutions. Moreover, by
|
||||
making it extensible, the benchmark will act a clearinghouse for app behaviors discovered
|
||||
in the wild and changing database requirements.
|
||||
\item \textbf{Visualization}: We will create a data visualization tool and associated queries
|
||||
to help researchers understand and navigate the data. The raw traces gather are very
|
||||
larger and the bulk of the data may not be useful for answering a specific question a
|
||||
given researcher may which to explore. Through visualization, filtering, data navigation,
|
||||
and specialized queries, we will enable researchers to more quickly and accurately explore
|
||||
relevant characteristics of \PocketData{}.
|
||||
to help researchers understand and navigate the data. The raw traces we plan to offer researchers are very
|
||||
large. Moreover, the rich structure and variability of SQL queries generated by smartphone apps does not admit traditional indexing strategies often used for analytics. By providing database-driven tools that aid in the analysis and visualization of the resulting queries, we will enable researchers to more quickly and accurately explore
|
||||
relevant characteristics of real-world \PocketData{} workloads.
|
||||
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
\textbf{Qualifications of the PIs:} Much like the cross cutting nature of \PocketData{}, the PIs bring
|
||||
expertise from three of the main communities our proposed \PocketData{} infrastructure would impact.
|
||||
PI Kennedy works in databases, PI Challen has expertise in mobile systems, and PI Ziarek works at the
|
||||
intersection of programming languages and real-time embedded systems.
|
||||
\textbf{Qualifications of the PIs:}
|
||||
%
|
||||
The PIs bring cross-cutting expertise from three of the main communities our
|
||||
proposed \PocketData{} infrastructure would impact.
|
||||
%
|
||||
All three have a record of successful
|
||||
collaboration~\cite{pocketdata,Challen:2015:MWE:2699343.2699361}, and PIs
|
||||
Kennedy and Ziarek have been working together for the last three and a half
|
||||
years on adaptive
|
||||
indexing~\cite{techreport,agarwal2013monadic,kennedy2015just}.
|
||||
%
|
||||
PI Kennedy's expertise covers databases, incremental
|
||||
computation~\cite{Ahmad:2012:DHD:2336664.2336670,kennedy2011dbtoaster,koch2013dbtoaster},
|
||||
uncertain data
|
||||
management~\cite{Kennedy:2011:JEO:1989323.1989410,5447879,Yang:2015:LOA:2824032.2824055},
|
||||
online aggregation~\cite{4812533,Kennedy:2011:FPP:1989323.1989482}, and
|
||||
compiler
|
||||
design~\cite{kennedy2011dbtoaster,Ahmad:2012:DHD:2336664.2336670,koch2013dbtoaster}.
|
||||
%
|
||||
PI Ziarek's expertise covers programming
|
||||
languages~\cite{Ziarek:2011:CAE:1993498.1993572,Ziarek:2010:LCC:1852977.1852979},
|
||||
real-time
|
||||
systems~\cite{Blanton:2013:NIC:2512989.2512994,Yan:2013:RDR:2512989.2512990},
|
||||
virtual
|
||||
machines~\cite{Pizlo:2010:HPE:1755913.1755922,Pizlo:2010:SFR:1806596.1806615},
|
||||
and compiler
|
||||
design~\cite{Sivaramakrishnan:2012:ERB:2258996.2259005,Ziarek:2008:FTS:1466762.1466777}.
|
||||
%
|
||||
PI Challen's expertise comprises smartphone systems, including
|
||||
networking~\cite{hotwireless2015-sharing,infocom2016-scans,hotnets2014-pocketsniffer},
|
||||
architectural~\cite{iiswc2015-agility}, energy
|
||||
management~\cite{mobicase2015-jouler,hotmobile2015-numerator} and
|
||||
security~\cite{mobicase2014-pocketmocker} aspects.
|
||||
|
||||
\subsection{Datasets}
|
||||
\input{sections/1-1-metrics.tex}
|
||||
|
|
|
@ -7,23 +7,23 @@
|
|||
%\item Existing related resources along with a justification that the proposed research cannot be accomplished with these resources at the institution or elsewhere
|
||||
%\end{itemize}}
|
||||
|
||||
In this section we present a few concrete projects that would benefit from \PocketData{} and then describe how the proposed infrastructure will enable
|
||||
reach for the PIs and the broader CISE community.
|
||||
In this section we present a few concrete projects that would benefit from \PocketData{} and describe how the proposed infrastructure will enable
|
||||
research for the PIs and the broader CISE community.
|
||||
|
||||
\subsection{Adaptive Indexes}
|
||||
\subsection{Adaptive Data Management}
|
||||
Selecting the correct physical structure for a database under a given workload is an extremely challenging~\cite{Chaudhuri:1997:ECI:645923.673646,Chaudhuri:1998:ALI:276304.276337,Chaudhuri:2007:SDS:1325851.1325856,Agrawal:2000:ASM:645926.671701} part of database management.
|
||||
The index selection problem becomes even harder when workload characteristics fluctuate rapidly or are not known in advance.
|
||||
There is currently substantial interest in a breed of self-adapting, adaptive index structures~\cite{idreos2007database,Idreos:2011:MWC:2002938.2002944} that address dynamic index selection by facilitating \textit{incremental, online} changes to the index.
|
||||
There is currently substantial interest in a breed of self-adjusting, adaptive index structures~\cite{idreos2007database,Idreos:2011:MWC:2002938.2002944} that address dynamic index selection by facilitating \textit{incremental, online} changes to the index.
|
||||
Examples of adaptive indexes include Cracker Indexes~\cite{Idreos:2012:AIM:2247596.2247667,Idreos:2007:UCD:1247480.1247527,Halim:2012:SDC:2168651.2168652}, Adaptive Merge Trees~\cite{Graefe:2010:SSI:1739041.1739087,Graefe:2012:CCA:2180912.2180918}, SMIX~\cite{Voigt:2013:SSI:2484838.2484862}, H2O~\cite{163421}, and Just-in-Time Data Structures~\cite{kennedy2015just}.
|
||||
Adaptive indexes automatically optimize their physical representation in response to incoming queries, reusing work used to answer the query to also improve subsequent queries. Given enough time, a stable workload, and queries that touch all data objects, an adaptive index eventually converges to a data representation similar to that of a static index.
|
||||
|
||||
\textbf{Infrastructure Justification:} Although there have been several efforts~\cite{Graefe:2010:BAI:1946050.1946063,schuhknecht2013uncracked} to develop benchmarks for adaptive indexes, these benchmarks rely on purely synthetic data and unit-tests rather than real-world scenarios.
|
||||
This is in part because the typical enterprise workloads that rarely exhibit the type of drastic shifts that adaptive indexes target.
|
||||
This is in part because typical enterprise workloads rarely exhibit the type of drastic shifts that adaptive indexes target.
|
||||
As a result most data management benchmarks evaluate systems under stable, steady-state workloads.
|
||||
By contrast, \PocketData{} workloads often show extreme variation in both application demands and resource availability.
|
||||
As a trivial example, an app might demand low-latency, low-power access to data when a user is actively using the phone, while admitting high-latency high-power organizational tasks when the phone is plugged in~\cite{Challen:2015:MWE:2699343.2699361}.
|
||||
|
||||
\textbf{Community Interest:} \textit{Stratos Idreos} from the DAS lab at Harvard will use the \PocketData{} metrics and benchmark workloads to evaluate his group's work on adaptive data systems.
|
||||
\textbf{Community Interest:} \textit{Stratos Idreos}'s DAS lab at Harvard will use the \PocketData{} metrics and benchmark workloads to evaluate their work on adaptive data systems.
|
||||
\citedquote{Stratos Idreos (Harvard)}{I think work on adaptive data systems could benefit. I assume Pocket Data will capture diverse workloads (from various apps) and so this would be a perfect environment to test adaptive data systems.
|
||||
I have a new project on easy to design systems out of modules that can be synthesized. The input is workloads. Perhaps PocketData can provide a testing framework for such work for designing data systems for mobile environments.
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ The relatively limited compute and memory resources available on tablets and sma
|
|||
|
||||
|
||||
\textbf{Infrastructure Justification:} Small-data analytics efforts are presently siloed, with most research efforts targeting entire software stacks, from the user interface front-end to the back-end database.
|
||||
The standard evaluation tools offered by the \PocketData{} benchmar would help to that decouple the research challenges involved in small-data analytics, and allow a broader community of researchers to contribute.
|
||||
The standard evaluation tools offered by the \PocketData{} benchmark would help to decouple the research challenges involved in small-data analytics and allow a broader community of researchers to contribute.
|
||||
For example, an embedded database benchmark simulating a visual query interface workload would serve as a standard for evaluating novel algorithms, indexes, and data management tools.
|
||||
|
||||
\textbf{Community Interest:}
|
||||
|
@ -53,11 +53,11 @@ In this respect, data-driven smartphone apps are similar to data-driven enterpri
|
|||
However, enterprise software is typically supported by experienced database administrators who can carefully fine-tune the database to efficiently support the application.
|
||||
This is not the case for smartphone apps, which instead rely on compiler tools and software libraries to efficiently mediate access to persistent data.
|
||||
Consequently, \PocketData{} offers new research opportunities at the interface between imperative programming languages like C, C\#, or Java, and back-end data management tools.
|
||||
Forms of inline SQL like LinQ~\cite{box2007linq,Meijer:2006:LRO:1142473.1142552} have existed for nearly a decade, but are not frequently used in the design smartphone apps.
|
||||
Forms of inline SQL like LinQ~\cite{box2007linq,Meijer:2006:LRO:1142473.1142552} have existed for nearly a decade, but are not frequently used in the development of smartphone apps.
|
||||
Instead, app developers frequently rely on higher level primitives including object-relational mappers~\cite{Melnik:2007:CMB:1247480.1247532} (ORMs) like Hibernate~\cite{hibernate} to mediate access to the database.
|
||||
Unfortunately, at present, most ORMs are implemented as libraries and lack the ability to introspect the invoking program.
|
||||
This creates an impedance mismatch between the available information and SQL's declarative syntax, forcing ORMs to misuse SQL, or to rely on optional hints provided by the app developer to provide efficient data access.
|
||||
In our preliminary exploration~\cite{pocketdata}, we found significant anti-patterns emerging in data access patterns.
|
||||
In our preliminary exploration~\cite{pocketdata}, we found significant anti-patterns emerging in queries to SQLite.
|
||||
Examples include the use of expensive \texttt{UPSERT} operations when \texttt{UPDATE}s would be sufficient, the use of multiple \texttt{SELECT} queries to dereference foreign-keys instead of using an outer-join query, and the use of separate read-then-write queries rather than in-place updates.
|
||||
Several research efforts, including StatusQuo~\cite{StatusQuo}, Sloth~\cite{Cheung:2014:SLV:2588555.2593672}, and Truffle/Graal~\cite{wimmer2012truffle} have addressed similar problems in enterprise data-driven applications and could find new challenges in the \PocketData{} space.
|
||||
Other research efforts explore data-flow in smartphones for performance optimization~\cite{yang-phd15,yang-icse15,rountev-cgo14} and correctness~\cite{yan-cgo14}, and would benefit from more detailed tools for introspection and measurement.
|
||||
|
@ -65,34 +65,35 @@ Other research efforts explore data-flow in smartphones for performance optimiza
|
|||
\textbf{Infrastructure Justification:} Research on data-driven app development requires a detailed understanding of application requirements, and programming language research needs real-world workloads to demonstrate its viability.
|
||||
The metrics that we propose to gather and the benchmark suite we propose to develop are critical for driving research in this space.
|
||||
|
||||
\textbf{Community Interest:} \textit{Nasko Rountev} of Ohio State will use \PocketData{} as part of the Presto group's work on data-flow analysis to debug of GUI responsiveness issues and as part of his LeakDroid project.
|
||||
\textbf{Community Interest:} \textit{Nasko Rountev} of Ohio State will use \PocketData{} as part his work on data-flow analysis debug of GUI responsiveness issues and as part of his LeakDroid project.
|
||||
|
||||
\subsection{Database-App Coupling}
|
||||
Smartphone apps are integrated with the data management tools they use to a far greater degree than enterprise applications.
|
||||
Embedded databases are libraries that operate within the app's memory space, and not external tools.
|
||||
Apps generate virtually all queries procedurally, making it possible to specify their data management requirements extremely precisely at compile time.
|
||||
Moreover, access to data often occurs through higher-level primitives that are supported by their own library wrappers.
|
||||
Moreover, access to data often occurs through higher-level primitives like ORMs.
|
||||
In short, although embedded databases are in principle capable of emulating stand-alone database engines, in practice they are used more as toolkits of data management building blocks.
|
||||
The tight coupling between app and database promises to offer numerous avenues for workload-driven database optimization.
|
||||
A leader in this area is BerkeleyDB.
|
||||
Although BerkeleyDB does provide a SQL emulation front-end, its core functionality is to provide simple database building blocks like primary and secondary indexing, foreign-key consistency primitives, and transactional access to data.
|
||||
Similar efforts are taking place across multiple industrial research labs and startup companies, as numerous organizations have begun to invest into embedded databases, including MongoDB's WiredTiger~\cite{shakuntalagupta2015practical}, SAP's SqlAnywhere~\cite{4401024}, and Facebook's RocksDB, as well as open-source efforts including the H2 Database~\cite{mueller2006h2} and SQLite~\cite{sqlite}.
|
||||
Similar efforts are taking place across multiple industrial research labs and startup companies, as numerous organizations have begun to invest into embedded databases. Corporate investment in embedded databases includes MongoDB's WiredTiger~\cite{shakuntalagupta2015practical}, SAP's SqlAnywhere~\cite{4401024}, and Facebook's RocksDB, as well as open-source efforts including the H2 Database~\cite{mueller2006h2} and SQLite~\cite{sqlite}.
|
||||
The tight coupling between database and the invoking application also admits possibilities for more aggressive database specialization.
|
||||
Database compilers like DBToaster~\cite{kennedy2011dbtoaster,koch2013dbtoaster,Ahmad:2012:DHD:2336664.2336670}, HyPer/LLVM~\cite{Neumann:2011:ECE:2002938.2002940}, and Legorithmics~\cite{Klonatos:2013:ASO:2463676.2465334,Klonatos:2014:BEQ:2732951.2732959} use aggressive compilation to create a database uniquely specialized for a specific application's query and update workload.
|
||||
Database compilers like DBToaster~\cite{kennedy2011dbtoaster,koch2013dbtoaster,Ahmad:2012:DHD:2336664.2336670}, HyPer/LLVM~\cite{Neumann:2011:ECE:2002938.2002940}, and Legorithmics~\cite{Klonatos:2013:ASO:2463676.2465334,Klonatos:2014:BEQ:2732951.2732959} aggressively compile and optimize database engines that are uniquely specialized for a specific application's query and update workload, as well as its underlying hardware.
|
||||
As already noted above, many of these statistics are available at compile time, making the \PocketData{} setting an ideal candidate for deploying these applications.
|
||||
|
||||
\textbf{Infrastructure Justification:} Realistic evaluation of embedded databases and database compilers requires realistic workloads. Moreover, smartphones are one of the most prolific examples of embedded databases deployed in the wild. Given the variation in smartphone apps' data management requirements, even limited data releases by a single app developer will not be representative. The metrics we will gather, and the benchmark we are proposing will be key to helping researchers evaluate new embedded database tools.
|
||||
|
||||
\textbf{Community Interest:} \textit{Michael Brey} of Oracle is interested in participating in the \PocketData{} community to advance research on embedded databases.
|
||||
\citedquote{Michael Brey (Oracle's BerkeleyDB Team)}{Within Oracle, we are always looking at how the industry both consumer and enterprise is using data in mobile applications. Things like db size, access patterns, single/multi user (multiple apps accessing same db), speed of access required, record size/structure etc. are all important to understand. We are also very interested in the movement of data from the device to some backend repository.}
|
||||
\textbf{Community Interest:} \textit{Christoph Koch}'s DATA lab at EPFL is interested in using the \PocketData{} benchmark to evaluate their work on database compilers. \textit{Ashok Joshi} and \textit{Michael Brey} of Oracle are interested in participating in the \PocketData{} community to advance research on embedded databases.
|
||||
\citedquote{Ashok Joshi (Senior Director at Oracle)}{I got some feedback from one of my colleagues on this topic. Yes, the real-world traces of embedded data usage would be useful; so would the benchmarking toolkit.}
|
||||
\citedquote{Michael Brey (Oracle)}{Within Oracle, we are always looking at how the industry both consumer and enterprise is using data in mobile applications. Things like db size, access patterns, single/multi user (multiple apps accessing same db), speed of access required, record size/structure etc. are all important to understand. We are also very interested in the movement of data from the device to some backend repository.}
|
||||
%Additionally, PI Kennedy will make use of the same resources in his efforts on incremental computation.
|
||||
|
||||
|
||||
\subsection{Enabled Research For the PIs}
|
||||
The PIs have a joint research project aimed at exposing \emph{uncertainty} in mobile computing~\cite{Challen:2015:MWE:2699343.2699361}. The project focuses on exposing new language primitives to the programmer to specify multiple implementation for
|
||||
a given functionality allowing the system to pick which implementation to use at runtime. This allows the system to specialize software to a given hardware platform and more importantly to a given set of external
|
||||
considerations (e.g. network connectivity, available sensors, etc.). Our proposed infrastructure will enable us to study two key aspects of uncertainty: (1) almost all mobile applications store user data and configuration parameters in
|
||||
mobile databases, access to this data can have a profound impact on the behavior of an application, \PocketData{} will allow us to more readily study this aspect of mobile uncertainty; (2) the infrastructure powering our
|
||||
The PIs have a joint research project aimed at exposing \emph{uncertainty} in mobile computing~\cite{Challen:2015:MWE:2699343.2699361}. The project focuses on exposing new language primitives to the programmer to specify multiple implementations of
|
||||
system functionality allowing the system to pick which implementation to use at runtime. This allows the system to specialize software to a given hardware platform and more importantly to a given set of external
|
||||
considerations (e.g. network connectivity, available sensors, etc.). Our proposed infrastructure will enable us to study two key aspects of uncertainty: (1) Almost all mobile applications store user data and configuration parameters in
|
||||
mobile databasesand access to this data can have a profound impact on the behavior of an application. \PocketData{} will allow us to more readily study this aspect of mobile uncertainty; (2) The infrastructure powering our
|
||||
runtime system for exposing uncertainty is built around a mobile database that stores possible choices the software system can make. \PocketData{} will allow us to optimize this database to reduce choice latency.
|
||||
|
||||
PIs Kennedy and Ziarek have a joint research project, Just-in-Time Data Structures (JITDs), focusing on adaptive indexing~\cite{kennedy2015just}.
|
||||
|
@ -101,11 +102,7 @@ The level of variation in load and resource availability that occurs in \PocketD
|
|||
As noted above, our proposed infrastructure will provide us with a benchmark workload that will help us to evaluate adaptive indexes under real-world conditions, rather than through purely synthetic workloads.
|
||||
|
||||
PI Kennedy is part of a collaborative research project with \textit{Shambhu Upadhyaya} (UB), \textit{Varun Chandola} (UB), \textit{Hung Ngo} (UB), and \textit{Long Nguyen} (UMich) that explores techniques for identifying insider attacks on databases (NSF-CNS-1409551).
|
||||
Although the threat of insider attacks on mobile devices is minimal, the specific methodology behind the work involves summarizing query logs by clustering queries into groups of queries with similar ``intent.''
|
||||
Although the threat of insider attacks on mobile devices may be minimal, the specific methodology behind the work involves summarizing query logs by creating clusters of queries with similar ``intent.''
|
||||
The approach is showing promise for summarizing query logs from a corporate (banking) setting.
|
||||
Having query logs from other settings like \PocketData{} would show that the approach can be generalized and may have applications beyond Insider Threat detection (for example to the design of index selection tools).
|
||||
Having query logs from other settings like \PocketData{} would show that the approach can be generalized to domains other than Insider Threat detection (for example to the design of index selection tools).
|
||||
If successful, these efforts could also contribute back to the \PocketData{} project, as a tool for quickly summarizing and clustering query logs would help to build out the visualization and benchmark design components of the proposed infrastructure.
|
||||
|
||||
\subsection{Enabled Research for the Broader Community}
|
||||
|
||||
|
||||
|
|
|
@ -1,22 +1,24 @@
|
|||
% !TEX root = ../fullproposal.tex
|
||||
|
||||
The PIs have already reached out to the database and mobile systems communities for feedback on the current infrastructure, providing the PIs with an initial community and a preliminary source of
|
||||
feedback on design, APIs, and features (details presented in Section~\ref{sec:research}).
|
||||
feedback on design, APIs, and features. A detailed description can be found above, in Section~\ref{sec:research}. In summary, there is interest from researchers working on embedded databases, small-scale data management, personal sensing, query interfaces, and several closely related areas.
|
||||
The \PocketData{} benchmark will serve as a focal point for the community's involvement by providing the community with a way to explore, discuss, and disseminate new data management use cases, and by offering a standard way to evaluate systems on those use cases.
|
||||
|
||||
|
||||
Preliminary work on characterizing differences between \PocketData{} and traditional benchmarking infrastructures
|
||||
was presented at the TPCTC symposium, which has allowed the PIs to solicit industrial feedback. The PIs are currently in first stage discussions with researchers from: VMware, Cisco, Google, Samsung, and Oracle.
|
||||
was presented at the TPC-TC symposium, which has allowed the PIs to solicit industrial feedback. The PIs are currently in first stage discussions with researchers from VMware, Cisco, Google, Samsung, and Oracle regarding TPC involvement in the \PocketData{} benchmark.
|
||||
From this starting point the PIs will also broaden their target communities to included researchers from programming languages as well as real-time and embedded systems.
|
||||
|
||||
The PIs believe that an expansions to expand the pervue of \PocketData{} to also include IoT, will broaden the utility of the proposed infrastructure.
|
||||
Designing and optimizing specialized databases systems (typically stream databases) for IoT that are able to execute on embedded devices are
|
||||
growing topics in the database community.
|
||||
The PIs believe that expanding the pervue of \PocketData{} to also include IoT will broaden the utility of the proposed infrastructure.
|
||||
IoT has recently renewed interest in databases systems that are specialized for IoT (stream databases, in-network query processors) and/or are capable of running on embedded devices (\textit{e.g.}, TinyDB~\cite{madden2005tinydb}).
|
||||
As embedded processors become more capable, with
|
||||
larger amounts of main memory available (e.g. Intel Galileo), there is a growing push from the embedded
|
||||
and also from the real-time communities to explore including database
|
||||
systems and query processing systems in small scale embedded systems. The PIs believe that emerging research in smart cities and personalized
|
||||
larger amounts of main memory available (e.g. Intel's Edison platform), there is a growing push from the embedded
|
||||
and also from the real-time communities to explore including databases and query processing in small scale embedded systems. The PIs believe that emerging research in smart cities and personalized
|
||||
medical devices that aggregate and processes biometric data would benefit from \PocketData{}.
|
||||
Domains specific languages (DSLs) are becoming more pervasive as solutions proposed by the programming language community as mechanisms
|
||||
to both easy programmer effort for specialized systems, but to also greatly improve performance in time, space, and even energy consumption.
|
||||
The PIs believe that \PocketData{} will be of interested to programming language researchers who work on DSLs for IoT solutions.
|
||||
Domain specific languages (DSLs) are becoming more pervasive as mechanisms
|
||||
to both amplify programmer effort for specialized systems and to greatly improve performance in time, space, and even energy consumption.
|
||||
The PIs believe that \PocketData{} will be of interested to both database and programming language researchers in the IoT space.
|
||||
\citedquote{Ashok Joshi (Senior Director; Oracle NoSQL Database, Berkeley DB, Database Mobile Server)}{I think synchronizing device data with server data is a very common occurrence in this space. As a simple example, you should be able to synchronize your `contacts' database on your cell phone with a server repository. Recently, Mike Brey, Raghu Nambiar and I proposed a ``strawman'' IoT benchmark~\cite{ashok2015benchmarking} --- I think extending your work to include large-scale data synchronization would be worth considering.}
|
||||
|
||||
|
||||
\textbf{Evidence of Support}
|
||||
|
@ -25,21 +27,23 @@ our current efforts toward building a community and the community's support for
|
|||
|
||||
\begin{figure}[th]
|
||||
\begin{center}
|
||||
\begin{tabular}{rl||c|l}
|
||||
\begin{tabular}{rl||c|c}
|
||||
\hline
|
||||
researcher & affiliation & research area & enabled research \\ \hline
|
||||
Stratos Idreos & \emph{Harvard} & Databases & Adaptive Indexes\\
|
||||
Arnab Nandi & \emph{Ohio State} & Databases/HCI & Data Analytics for IoT\\
|
||||
\textbf{researcher} & \textbf{affiliation} & \textbf{research area} & \textbf{enabled research} \\ \hline
|
||||
Stratos Idreos & \emph{Harvard} & Databases & Adaptive indexes\\
|
||||
Arnab Nandi & \emph{Ohio State} & Databases/HCI & Interactive analytics\\
|
||||
Nasko Routnev & \emph{Ohio State}& Programming Languages & Mobile data flow analysis\\
|
||||
Michael Brey & \emph{Oracle } & Databases/Mobile Systems & Embedded DB performance\\
|
||||
Meikel Poess & \emph{Oracle} & Databases & Performance analytics \\
|
||||
Raghunath Nambiar & \emph{Cisco} & Databases & Performance analytics \\
|
||||
Reza Taheri & \emph{VMWare} & Databases & Performance analytics \\
|
||||
Jens Dittrich & \emph{Saarland University}& Databases/Mobile Systems & Small data analytics \\
|
||||
Sharad Agarwal & \emph{Microsoft}& Mobile Systems/Sensing & Mobile systems privacy \\ \hline
|
||||
Christoph Koch & \emph{EPFL} & Databases/Theory & Database compilers\\
|
||||
Ashok Joshi & \emph{Oracle} & Databases/IoT & IoT performance\\
|
||||
Michael Brey & \emph{Oracle} & Databases/Mobile Systems & Embedded DB performance\\
|
||||
Meikel Poess & \emph{Oracle} & Databases & Performance measurement \\
|
||||
Raghunath Nambiar & \emph{Cisco} & Databases & Performance measurement \\
|
||||
Reza Taheri & \emph{VMWare} & Databases & Performance measurement \\
|
||||
Jens Dittrich & \emph{Saarland University}& Databases/Mobile Systems & Small-data analytics \\
|
||||
Sharad Agarwal & \emph{Microsoft Research}& Mobile Systems/Sensing & Mobile systems performance \\ \hline
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
\caption{Enabled Research}
|
||||
\caption{\textbf{Existing Community Interest in} \PocketData{}}
|
||||
\label{tab:enabled}
|
||||
\end{figure}
|
||||
|
||||
|
|
|
@ -1,21 +1,27 @@
|
|||
% !TEX root = ../fullproposal.tex
|
||||
|
||||
Our planning process will consist of a development effort and an outreach effort.
|
||||
First and foremost, the centerpiece of our community-building efforts is the \PocketData{} benchmark.
|
||||
In addition to acting as a standard for evaluating research efforts that overcome bottlenecks and limitations of existing technology, the benchmark will serve as a hub for the community to discuss and describe these limitations.
|
||||
First and foremost, the centerpiece of our development side community-building efforts is the \PocketData{} benchmark.
|
||||
In addition to acting as a standard for evaluating research efforts that overcome bottlenecks and limitations of existing technology, the \textit{modular} benchmark will serve as a hub for the community to discuss and describe these limitations.
|
||||
Under the guidance of the PIs, the graduate student supported by this proposal will be responsible for developing a preliminary prototype benchmark.
|
||||
The first version of this benchmark will stress bottlenecks identified in our preliminary study~\cite{pocketdata} by simulating the behavior of a small number of smartphone apps.
|
||||
Using data and query logs derived from our preliminary study, we hope to have version one of the benchmark ready within 4-6 months.
|
||||
The benchmark will be released and advertised over community mailing lists like DBWorld~\cite{dbworld}.
|
||||
By this point, we expect to have expanded the \PocketData{} community through our outreach efforts.
|
||||
After releasing the benchmark we will hold a 3 month community feedback process, allowing us to release version 2 of the benchmark based on community feedback before the end of the planning period.
|
||||
After releasing the benchmark we will hold a 3 month community feedback process, allowing us to release version 2 of the benchmark based on community feedback before the end of the planning period.
|
||||
Additionally we will pursue feedback from the IoT community to understand how \PocketData{} can be extended to meet the IoT community's needs. We envision that these
|
||||
needs will vary depending on the aspect of IoT a given community is interested in (e.g. language runtime design vs. embedded databases). To avoid creating an infrastructure only suited to the needs
|
||||
of a particular niche, we will solicit feedback from many sources.
|
||||
|
||||
In addition to the PocketData community, we will leverage interest from the Transaction Processing Council (TPC) in developing an embedded database benchmark.
|
||||
The TPC represents one of the most prominant names in database benchmarking, and is responsible for benchmarks like TPC-C~\cite{tpcc}, TPC-H~\cite{tpch}, and TPC-DS~\cite{tpcds} that are canonical tools for evaluating research in databases.
|
||||
After presenting our preliminary work at the TPC's annual symposium colocated with VLDB 2015, \textit{Raghunath Nambiar} (Cisco), \textit{Reza Taheri} (VMWare), and \textit{Meikel Poess} (Oracle) of the TPC expressed interest in helping us to develop \PocketData{} as an eventual TPC benchmark.
|
||||
Although all PIs will be responsible for communicating with the TPC as a joint benchmark is fleshed out, PI Kennedy will act as a lead point of contact.
|
||||
The TPC represents one of the most prominent names in database benchmarking, and is responsible for benchmarks like TPC-C~\cite{tpcc}, TPC-H~\cite{tpch}, and TPC-DS~\cite{tpcds} that are touchstones for evaluating research in databases.
|
||||
After presenting our preliminary work at the TPC's annual symposium colocated with VLDB 2015, \textit{Raghunath Nambiar} (Cisco), \textit{Reza Taheri} (VMWare), and \textit{Meikel Poess} (Oracle) of the TPC expressed interest in helping us to develop \PocketData{} as an eventual TPC benchmark. The PIs hope to also participate in TPC discussions on IoT concerns. The TPC discussions will provide the PIs will both industry and
|
||||
academic perspectives on both embedded databases as well as IoT. The PIs hope to leverage this information in the design of the proposed \PocketData{} infrastructure.
|
||||
Although all PIs will be involved in communications with the TPC and its members, PI Kennedy will act as a lead point of contact.
|
||||
|
||||
Our outreach efforts will begin with poster sessions, tutorials, demos and/or short papers presented at prominent database conferences. One candidate is ICDE 2017, which takes place early in the planning period. PI Kennedy will coordinate efforts to perform a demonstration at a database conference to incite discussion and interest in \PocketData{} from the database community. PI Ziarek will coordinate efforts for a demonstration or poster presentation initially targeting SPLASH 2016 to reach out to the PL community, and PI Challen will coordinate efforts for a demonstration or poster presentation initially targeting MobiSys 2017 to reach out to the mobile systems community.
|
||||
Towards the end of the first year of the proposal, the PIs will begin to develop a tutorial on embedded databases and/or plan for a \PocketData{} workshop.
|
||||
To continue building our current community and to expand it to include IoT researchers, the PIs expect to travel to top conferences in a variety of fields.
|
||||
Our outreach efforts will begin with poster sessions, tutorials, and demos presented at prominent database conferences. One candidate is ICDE 2017, which takes place early in the planning period. PI Kennedy will coordinate efforts to perform a demonstration at a database conference to incite discussion and interest in \PocketData{} from the database community. PI Ziarek will coordinate efforts for a demonstration or poster presentation initially targeting SPLASH 2016 to reach out to the PL community, and PI Challen will coordinate efforts for a demonstration or poster presentation initially targeting MobiSys 2017 to reach out to the mobile systems community.
|
||||
At these conferences the PIs will network with researchers who work on IoT as well. In addition, there are many new conferences focusing on IoT that are emerging. The PIs expect to attend
|
||||
IoTA, IoTDI, and WF-IoT. Towards the end of the first year of the proposal, the PIs will begin to develop a tutorial on embedded databases and plan for a \PocketData{} workshop.
|
||||
|
||||
The PIs will submit a \textbf{CI-NEW} proposal for \PocketData{} in Fall of 2017, approximately 14 months after the start of the planning proposal.
|
||||
The PIs will submit a \textbf{CI-NEW} proposal for \PocketData{} in Fall of 2017, approximately 15 months after the start of the planning proposal.
|
|
@ -1,10 +1,10 @@
|
|||
% !TEX root = ../fullproposal.tex
|
||||
|
||||
With 2 billion smartphones in the world and more being added every day, mobile platforms together form the most pervasive distributed systems on the planet.
|
||||
People are increasingly relying on smartphones to manage their lives, from contacts and todo lists to their health, their homes, and the contents of their wallets.
|
||||
This proliferation of data-driven smartphone apps is driving a need to create more, better, faster, more user-friendly, and more power-aware techniques for managing their data.
|
||||
With 2 billion smartphones in the world and more being added every day, mobile platforms together form the most pervasive distributed system on the planet.
|
||||
People are increasingly relying on smartphones to manage their lives, from contacts and todo lists, to their health, their homes, and the contents of their wallets.
|
||||
This proliferation of data-driven smartphone apps is causing a need for more, faster, more user-friendly, and more power-aware techniques for managing data on smartphones and embedded devices.
|
||||
|
||||
To meet the challenges of this new frontier in data management, it is critical that we begin understand how smartphone apps store and retrieve structured state and establish standards for evaluating potential advances based on this understanding. Our proposal lays the groundwork for research on pocket-scale data management. We have interest from the Transaction Processing Council for our proposed benchmark, and even now several members of the database, systems, and programming language communities have expressed interest in the resources we propose to offer.
|
||||
To meet the challenges of this new frontier in data management, it is critical that we begin understand how smartphone apps store and retrieve structured state and establish standards for evaluating potential advances based on this understanding. Our proposal lays the groundwork for research on pocket-scale data management. We have interest from the Transaction Processing Council for our proposed benchmark, and even before the planning stage, several members of the database, systems, and programming language communities have expressed interest in the resources we propose to offer.
|
||||
|
||||
In addition to supporting research in a critical area, this proposal will support one graduate student during the planning phase and up to two graduate students in later phases, resulting in between one and two PhD theses. We anticipate that the proposed work may also lead to one or two MS theses, and if funded, plan to apply for an REU grant for this proposal.
|
||||
The resources created by this proposal will also be integrated into courses taught by the PIs; This has already happened: PIs Kennedy and Ziarek co-taught a project-oriented course entitled ``CSE-662: Languages and Runtimes for Big Data.'' The course included material related to \PocketData{} research, and three of the seven groups in the course worked on projects based on \PocketData{} and the Internet of Things.
|
||||
In addition to supporting research in a critical area, this proposal will support one graduate student during the planning phase and up to two graduate students in later phases, contributing to between one and two PhD theses. We anticipate that the proposed work may also lead to one or two MS theses, and if funded, plan to apply for an REU supplement for this proposal.
|
||||
The resources created by this proposal will also be integrated into courses taught by the PIs, a process that has already started: PIs Kennedy and Ziarek recently co-taught a project-oriented course entitled ``CSE-662: Languages and Runtimes for Big Data.'' The course included material related to \PocketData{} research, and three of the seven groups in the course worked on projects based on \PocketData{} and the Internet of Things.
|
|
@ -0,0 +1,96 @@
|
|||
A. Collaborators for Oliver Kennedy; SUNY Buffalo; PI
|
||||
1. Sumit Agarwal; Unknown
|
||||
2. Yanif Ahmad; Johns Hopkins University
|
||||
3. Jerry Antony Ajay; University at Buffalo
|
||||
4. Daniel Bellinger; Global Foundries
|
||||
5. Geoffrey Challen; University at Buffalo
|
||||
6. Sharath Chandrashekhara; University at Buffalo
|
||||
7. Jan Chomicki; University at Buffalo
|
||||
8. Nick DiRienzo; University at Buffalo
|
||||
9. Ronny Fehling; Oracle
|
||||
10. Dieter Gawlick; Oracle
|
||||
11. Boris Glavic; Illinois Inst. Tech.
|
||||
12. Zhen Hua-Liu; Oracle
|
||||
13. Kyungho Jeon; University at Buffalo
|
||||
14. Steven Y. Ko; University at Buffalo
|
||||
15. Christoph Koch; EPFL
|
||||
16. Steve Lee; Microsoft Corp.
|
||||
17. Charles Loboz; Microsoft Corp.
|
||||
18. Daniel Lupei; EPFL
|
||||
19. Anudipa Maiti; University at Buffalo
|
||||
20. Shikhar Mehra; University at Buffalo
|
||||
21. Niccolò Meneghetti; University at Buffalo
|
||||
22. Arindam Nandi; University at Buffalo
|
||||
23. Anandatirtha Nandugudi; University at Buffalo
|
||||
24. Suman Nath; Microsoft Research
|
||||
25. Milos Nicolic; EPFL
|
||||
26. Andres Nötzli; Stanford
|
||||
27. Amir Shaikhana; EPFL
|
||||
28. Sriram Shantharam; University at Buffalo
|
||||
29. Feng Shen; University at Buffalo
|
||||
30. Jinghao Shi; University at Buffalo
|
||||
31. Slawek Smyl; Microsoft Corp.
|
||||
32. Guru Prasad Srinivasa; University at Buffalo
|
||||
33. Ankur Upadhyay; FactSet
|
||||
34. Ying Yang; University at Buffalo
|
||||
35. Lukasz Ziarek; University at Buffalo
|
||||
B. Collaborators for Lukasz Ziarek; SUNY Buffalo; Co-PI
|
||||
1. Umut Acar; CMU
|
||||
2. Ali-Reza Adl-Tabatabai; Intel
|
||||
3. Ethan Blanton; Fiji Systems Inc.
|
||||
4. Patrick Eugster; Purdue University
|
||||
5. Mathew Fluet; Rochester
|
||||
6. Christoph Hoffman; Purdue University
|
||||
7. Anthony Hosking; Purdue University
|
||||
8. Suresh Jagannathan; Purdue University
|
||||
9. Bharat Jayaraman; University at Buffalo
|
||||
10. Oliver Kennedy; University at Buffalo
|
||||
11. Steve Ko; University at Buffalo
|
||||
12. Sree Harsha Konduri; Amazon
|
||||
13. Amit Kulkarni; University at Buffalo
|
||||
14. Zihuan Li; Purdue University
|
||||
15. Vijay Menon; Google
|
||||
16. Filip Pizlo; Apple Inc.
|
||||
17. Jennifer Sartor; Ghent University
|
||||
18. Tatiana Shpeisman; Intel
|
||||
19. KC Sivaramakrishnan; Purdue University
|
||||
20. Sam Tobin-Hochstadt; Indiana University
|
||||
21. Jan Vitek; Purdue University, Fiji Systems Inc.
|
||||
22. Adam Welc; Oracle
|
||||
23. Yin Yan; University at Buffalo
|
||||
C. Collaborators for Geoffrey Challen; SUNY Buffalo; Co-PI
|
||||
1. Sharad Agarwal; Microsoft Research
|
||||
2. Nilanjan Banerjee; University of Maryland
|
||||
3. Milind Buddhikot; Bell Labs
|
||||
4. Yih-Farn Chen; AT&T Labs Research
|
||||
5. Murat Demirbas; University at Buffalo
|
||||
6. Prabal Dutta; University of Michigan
|
||||
7. Wen Dong; University at Buffalo
|
||||
8. Carla Schlatter Ellis; Duke University
|
||||
9. Shyamnath Gollakota; University of Washington
|
||||
10. Michelle Gong; Google
|
||||
11. Marco Gruteser; Rutgers University
|
||||
12. Mark Hempstead; Drexel University
|
||||
13. Oliver Kennedy; University at Buffalo
|
||||
14. Robin Kravets; University of Illinois, Urbana-Champaig
|
||||
15. Steven Y. Ko; University at Buffalo
|
||||
16. Tevfik Kosar; University at Buffalo
|
||||
17. Dimitrios Koutsonikolas; University at Buffalo
|
||||
18. Branislav Kusy; CSIRO
|
||||
19. Eyal de Lara; University of Toronto
|
||||
20. James Martin; Clemson University
|
||||
21. Tommaso Melodia; Northeastern University
|
||||
22. Emiliano Miluzzo; Apio Systems
|
||||
23. Iqbal Mohomed; IBM Research
|
||||
24. James Pepin; Clemson University
|
||||
25. Matthai Philipose; Microsoft Research
|
||||
26. Sami Rollins; University of San Francisco
|
||||
27. Margo Seltzer; Harvard University
|
||||
28. Ivan Seskar; Rutgers University
|
||||
29. Jacob Sorber; Clemson University
|
||||
30. Aaron Striegel; Notre Dame
|
||||
31. Khai N. Truong; University of Toronto
|
||||
32. Chunming Qiao; University at Buffalo
|
||||
33. Kuangching Wang; Clemson University
|
||||
34. Lin Zhong; Rice University
|
||||
35. Lukasz Ziarek; University at Buffalo
|
Binary file not shown.
|
@ -27,7 +27,7 @@ Computer Science and Engineering (CSE) data storage facilities include vulcan, a
|
|||
|
||||
CSE faculty compute systems include castor, a Sun Blade 1000; citrix[1-3], a load-balanced Citrix farm of Dell PowerEdge 2650 servers; the-who, a Sun Fire V20z desktop virtualization server; benatar, a virtualized general compute server; and the underground cluster, a 4-node compute cluster comprised of Dell 1425s. CSE Faculty also have use of all CSE student systems (below).
|
||||
|
||||
CSE student compute systems include timberlake, a Dell PowerEdge R600 compute server; metallica, a Dell PowerEdge R500 compute server; pollux, a Sun Sparc enterprise T5220 compute server; coldplay, a Sun Fire V20z compute server; fork, a Sun Fire V20z dedicated to the Operating Systems course; nickelback, a Dell PowerEdge 1950 desktop virtualization server; dragonforce, a Dell PowerEdge R720 desktop virtualization server; styx, a Dell PowerEdge R400 desktop virtualization server.
|
||||
CSE student compute systems include a Dell PowerEdge R600 compute server; a Dell PowerEdge R500 compute server; a Sun Sparc enterprise T5220 compute server; a Sun Fire V20z compute server; a Sun Fire V20z dedicated to the Operating Systems course; a Dell PowerEdge 1950 desktop virtualization server; a Dell PowerEdge R720 desktop virtualization server; and a Dell PowerEdge R400 desktop virtualization server.
|
||||
|
||||
CSE research groups occupy 6628 square feet of research lab space ranging from secure, monitored, temperature-controlled data centers to specialized experimental facilities. CSE instructional labs occupy 4096 square feet, each configured to serve the characteristic needs of the courses they host. The Patricia Eberlein is the CSE general student computing lab which occupies 1056 square feet.
|
||||
|
||||
|
@ -53,4 +53,6 @@ The DB/PL lab at the University at Buffalo maintains additional resources specif
|
|||
|
||||
The DB/PL lab at University at Buffalo maintains additional resources for internal use, including multiple x86 workstations, laptops, and low-power development boards (Raspberry Pis and Intel Galileos) for general student and PI use. Server infrastructure for the lab includes an application server supporting a lab project management system, teaching support applications, and trial deployments of lab-developed software, an Oracle database server testbed, a 32-core and a 64-core AMD Opteron and a 12-core Intel Xeon-based testbed server, as well as a 16-node Hadoop cluster shared with 3 other labs. Lab workstations and laptops are configured with OSX or Windows. Servers are configured with Redhat Enterprise Linux.
|
||||
|
||||
PI Challen is collaborating on this project without support for the duration of the planning phase. He will apply his expertise in mobile systems and operating systems, and will assist in advising students working on this project.
|
||||
|
||||
\end{document}
|
Binary file not shown.
|
@ -23,7 +23,7 @@
|
|||
~~\\
|
||||
|
||||
\section*{Senior Personnel}
|
||||
PIs Ziarek is budgeted half a month of summer salary. PIs Kennedy and Challen are each budgeted a quarter-month of summer salary. PI Kennedy will apply his expertise and experience in the areas of databases, incremental computation, web applications, and security. PI Ziarek will apply his expertise and experience in the areas of programming languages, distributed computation, and security. PI Challen will participate without support and will apply his expertise in mobile systems and operating systems. All three PIs will take responsibility for (1) advising and coordinating student-driven efforts as described below, (2) reaching out to their respective research communities to build interest in research on \PocketData{}, (3) organizing a \PocketData{} workshop.
|
||||
PIs Ziarek is budgeted half a month of summer salary. PIs Kennedy and Challen are each budgeted a quarter-month of summer salary. PI Kennedy will apply his expertise and experience in the areas of databases, incremental computation, web applications, and security. PI Ziarek will apply his expertise and experience in the areas of programming languages, distributed computation, and security. PI Challen will apply his expertise in mobile systems and operating systems. All three PIs will take responsibility for (1) advising and coordinating student-driven efforts as described below, (2) reaching out to their respective research communities to build interest in research on \PocketData{}, (3) organizing a \PocketData{} workshop.
|
||||
|
||||
\section*{Other Personnel}
|
||||
Funding is requested for one computer science graduate student assistant for one year. The two-semester and summer salary for the student is \$22,000.
|
||||
|
@ -40,11 +40,11 @@ N/A
|
|||
|
||||
Travel may include trips to NSF meetings, conferences and workshops, and any PI meetings. Major conferences such as SIGMOD, VLDB, POPL, PLDI, and ICDE, typically last 4-5 days, and are located both domestically and internationally. Workshops are often affiliated with major conferences, and attendees frequently attend both. We have budgeted for up to 3 conference visits.
|
||||
|
||||
\noindent \textbf{Domestic Conferences} As an example of a domestic conference, we use SIGMOD 2016 being held in San Fransisco, CA. We anticipate a lodging cost of \$99 per night and a \$59 perdiem. The subtotal for 2 attendees over 5 nights is \$2,310. We expect airfare of \$630 and average conference registration fees of \$600 per person for a total domestic travel cost of \$4000.
|
||||
\noindent \textbf{Domestic Conferences} As an example of a domestic conference, we use SIGMOD 2016 being held in San Fransisco, CA. We anticipate a lodging cost of \$99 per night and a \$59 perdiem. The subtotal for 4 attendees over 5 nights is \$2,310. We expect airfare of \$630 and average conference registration fees of \$600 per person for a total domestic travel cost of \$8000.
|
||||
|
||||
\noindent \textbf{Other Domestic Travel} We have budgeted an additional \$2000 for travel to NSF PI meetings and for outreach efforts. Outreach efforts include travel support to allow the PIs to visit potential community members, and travel support for community members to visit UB and present on their work.
|
||||
|
||||
\noindent \textbf{Foreign Conferences} As an example of a foreign conference, we use ICDE 2016 being held in Helsinki, Finland. We anticipate a lodging cost of \$200 per person, and a \$260 perdiem. The subtotal for 1 attendee over 5 nights is \$5,200. We expect airfare of \$1000 and average conference registration fees of \$700 per person for a total domestic travel cost of \$4,000.
|
||||
\noindent \textbf{Foreign Conferences} As an example of a foreign conference, we use ICDE 2016 being held in Helsinki, Finland. We anticipate a lodging cost of \$200 per person, and a \$260 perdiem. The subtotal for 2 attendees over 5 nights is \$5,200 per person. We expect airfare of \$1000 and average conference registration fees of \$700 per person for a total international travel cost of \$8,000.
|
||||
|
||||
\section*{Other Direct Costs}
|
||||
|
||||
|
@ -55,8 +55,8 @@ The negotiated rate with the Department of Computer Science and Engineering for
|
|||
\$1,533 is requested per year for Materials and Supplies to purchase desktop computers for the graduate research students and faculty working on this project. The computers will be used for code development, experimental evaluation, paper writing and typesetting and other efforts related to this project.
|
||||
|
||||
\subsection*{Other}
|
||||
Tuition is budgeted at the standard University at Buffalo rates for the Graduate Research Assistant at 9 credit hours per GRA per semester.
|
||||
The anticipated out-of-state student tuition is \$18,144 for one student for one year.
|
||||
Tuition is budgeted at the standard University at Buffalo rates for a senior Graduate Research Assistant at 3 credit hours per semester.
|
||||
The anticipated out-of-state student tuition is \$6,048 for one student for one year.
|
||||
|
||||
\subsection*{Indirect Costs}
|
||||
Indirect cost rates are based on the applicable federally negotiated rates published at \url{http://www.research.buffalo.edu/sps/about/rates.cfm}.
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
-- List of Personnel --
|
||||
1. Oliver Kennedy; University at Buffalo, SUNY; PI
|
||||
2. Lukasz Ziarek; University at Buffalo, SUNY; Co-PI
|
||||
3. Geoffrey Challen; University at Buffalo, SUNY; Co-PI
|
|
@ -0,0 +1,15 @@
|
|||
---- Overview ----
|
||||
|
||||
A common requirement of the 4 million apps running on the world's 2 billion smartphones is persisting structured data. Embedded databases such as SQLite are heavily used for this purpose, with a single typical Android smartphone averaging more than two SQLite queries per second. The fundamental challenges experienced by mobile apps using embedded databases - minimizing energy consumption, latency, and disk utilization - are familiar ground for database researchers. However, in spite of active research in the areas of smartphone query processing and embedded databases, the specific tradoffs introduced by this new domain of pocket-scale data are far less well understood.
|
||||
|
||||
Key challenges in this space include the lack of publicly available data regarding smartphone database usage patterns in the real world, concrete high-level optimization targets, and tools and methodologies for reliably measuring database performance along axes relevant to smartphone apps. We propose infrastructure support and community-building efforts that will both improve existing research on embedded databases, and help to encourage new and innovative research in the area. This infrastructure support will take the form of real-world smartphone usage traces, a benchmarking suite for pocket-scale data, visualization tools, and instrumentation for mobile embedded databases.
|
||||
|
||||
Keywords: databases, smartphones, benchmarking
|
||||
|
||||
---- Intellectual Merit ----
|
||||
|
||||
The proposed infrastructure will be used by researchers from multiple academic and industrial institutions to support of new and existing research. Interest has already been expressed by researchers working on Adaptive Data Systems, Small Data Analytics, Gestural Query Processing, Data-Flow Analysis, Embedded Databases, Database Benchmarking, and others.
|
||||
|
||||
---- Broader Impacts ----
|
||||
|
||||
With 2 billion smartphones in the world, people are increasingly relying on smartphones to manage their lives. The proliferation of data-driven smartphone apps is driving a need to create more, better, faster, more user-friendly, and more power-aware techniques for managing their data. It is critical that we begin understand how smartphone apps interact with their data. Our proposal lays the groundwork for research on pocket-scale data management. We have interest from the Transaction Processing Council for our proposed benchmark, and even now several members of the database, systems, and programming language communities have expressed interest in the resources we propose to offer. In addition to supporting research in a critical area, this proposal will support one graduate student during the planning phase and up to two graduate students in later phases, resulting in between one and two PhD Theses.
|
Loading…
Reference in New Issue