Updated justification

master
Oliver Kennedy 2016-07-26 07:47:13 +03:00
commit 2ee6edd726
40 changed files with 3004 additions and 193 deletions

6
.gitignore vendored
View File

@ -1,5 +1,7 @@
ABSTRACT
/.DS_Store
/badwords
/biosketches
/shortproposal.pdf
/fullproposal.pdf
/.xxxnote
@ -16,7 +18,9 @@ ABSTRACT
*.fls
*.log
*.synctex.gz
*.fdb_latexmk
.deps
.~lock*
Bios/*.pdf
/fullproposal.fdb_latexmk
/fullproposal-body.pdf
/fullproposal-refs.pdf

View File

@ -1,16 +1,21 @@
TEX_FILES=fullproposal.tex main.bib geoffreychallen.bib $(wildcard sections/*)
TARGET=fullproposal
TEX_FILES=$(TARGET).tex main.bib geoffreychallen.bib $(wildcard sections/*)
all: fullproposal.pdf todo
@if [ `uname` = "Darwin" ] ; then open fullproposal.pdf; fi
@if [ `uname` = "Darwin" ] ; then open $(TARGET).pdf; fi
graphs:
@cd graphs; rake
fullproposal.pdf: $(TEX_FILES)
latexmk -pdf fullproposal.tex
$(TARGET).pdf: $(TEX_FILES)
latexmk -pdf $(TARGET).tex
open: fullproposal.pdf todo
open: $(TARGET).pdf todo
open $<
split: $(TARGET).pdf SplitProposal.workflow
automator -i `pwd`/$(TARGET).pdf SplitProposal.workflow 2>&1 | grep -v "Type1 font data"
clean:
latexmk -CA -bibtex

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleName</key>
<string>SplitProposal</string>
</dict>
</plist>

Binary file not shown.

After

Width:  |  Height:  |  Size: 171 KiB

File diff suppressed because it is too large Load Diff

35
bios/challen_coi.txt Normal file
View File

@ -0,0 +1,35 @@
Sharad Agarwal (Microsoft Research)
Nilanjan Banerjee (University of Maryland)
Milind Buddhikot (Bell Labs)
Yih-Farn Chen (AT\&T Labs Research)
Murat Demirbas (University at Buffalo)
Prabal Dutta (University of Michigan)
Wen Dong (University at Buffalo)
Carla Schlatter Ellis (Duke University)
Shyamnath Gollakota (University of Washington)
Michelle Gong (Google)
Marco Gruteser (Rutgers University)
Mark Hempstead (Drexel University)
Oliver Kennedy (University at Buffalo)
Robin Kravets (University of Illinois, Urbana-Champaign
Steven Y. Ko (University at Buffalo)
Tevfik Kosar (University at Buffalo)
Dimitrios Koutsonikolas (University at Buffalo)
Branislav Kusy (CSIRO)
Eyal de Lara (University of Toronto)
James Martin (Clemson University)
Tommaso Melodia (Northeastern University)
Emiliano Miluzzo (Apio Systems)
Iqbal Mohomed (IBM Research)
James Pepin (Clemson University)
Matthai Philipose (Microsoft Research)
Sami Rollins (University of San Francisco)
Margo Seltzer (Harvard University)
Ivan Seskar (Rutgers University)
Jacob Sorber (Clemson University)
Aaron Striegel (Notre Dame)
Khai N. Truong (University of Toronto)
Chunming Qiao (University at Buffalo)
Kuangching Wang (Clemson University)
Lin Zhong (Rice University)
Lukasz Ziarek (University at Buffalo)

23
bios/ziarek_coi.txt Normal file
View File

@ -0,0 +1,23 @@
Umut Acar; CMU
Ali-Reza Adl-Tabatabai; Intel
Ethan Blanton; Fiji Systems Inc.
Patrick Eugster; Purdue University
Mathew Fluet; Rochester
Christoph Hoffman; Purdue University
Anthony Hosking; Purdue University
Suresh Jagannathan; Purdue University
Bharat Jayaraman; University at Buffalo
Oliver Kennedy; University at Buffalo
Steve Ko; University at Buffalo
Sree Harsha Konduri; Amazon
Amit Kulkarni; University at Buffalo
Zihuan Li; Purdue University
Vijay Menon; Google
Filip Pizlo; Apple Inc.
Jennifer Sartor; Ghent University
Tatiana Shpeisman; Intel
KC Sivaramakrishnan; Purdue University
Sam Tobin-Hochstadt; Indiana University
Jan Vitek; Purdue University, Fiji Systems Inc.
Adam Welc; Oracle
Yin Yan; University at Buffalo

View File

@ -1,9 +0,0 @@
*-separate.pdf
.xxxnote
*.swp
*.aux
*.log
*.out
*.bbl
*.blg
.deps

View File

@ -68,6 +68,8 @@ Lukasz Ziarek (Univ. of Buffalo, Dept. of Comp. Sci. and Eng.)}
\input{sections/5-priorresults}
\pagebreak
\setcounter{page}{1}
{
\bibliographystyle{nsf}
\bibliography{main,geoffreychallen}

BIN
letters/challen-support.pdf Normal file

Binary file not shown.

265
main.bib
View File

@ -1,20 +1,254 @@
%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/
%% Created for Oliver Kennedy at 2016-01-18 23:35:08 -0500
%% Created for Oliver Kennedy at 2016-01-19 22:40:45 -0500
%% Saved with string encoding Unicode (UTF-8)
@incollection{ashok2015benchmarking,
Author = {Joshi, Ashok and Nambiar, Raghunath and Brey, Michael},
Booktitle = {Big Data Benchmarking},
Date-Added = {2016-01-20 02:41:38 +0000},
Date-Modified = {2016-01-20 02:41:38 +0000},
Doi = {10.1007/978-3-319-20233-4_4},
Editor = {Rabl, Tilmann and Sachs, Kai and Poess, Meikel and Baru, Chaitanya and Jacobson, Hans-Arno},
Isbn = {978-3-319-20232-7},
Language = {English},
Pages = {29-36},
Publisher = {Springer International Publishing},
Series = {Lecture Notes in Computer Science},
Title = {Benchmarking Internet of Things Solutions},
Url = {http://dx.doi.org/10.1007/978-3-319-20233-4_4},
Volume = {8991},
Year = {2015},
Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-319-20233-4_4}}
@inproceedings{Pizlo:2010:SFR:1806596.1806615,
Acmid = {1806615},
Address = {New York, NY, USA},
Author = {Pizlo, Filip and Ziarek, Lukasz and Maj, Petr and Hosking, Antony L. and Blanton, Ethan and Vitek, Jan},
Booktitle = {Proceedings of the 2010 ACM SIGPLAN Conference on Programming Language Design and Implementation},
Date-Added = {2016-01-19 22:46:55 +0000},
Date-Modified = {2016-01-19 22:46:55 +0000},
Doi = {10.1145/1806596.1806615},
Isbn = {978-1-4503-0019-3},
Keywords = {fragmentation, mark-region, mark-sweep, real-time, replication-copying},
Location = {Toronto, Ontario, Canada},
Numpages = {14},
Pages = {146--159},
Publisher = {ACM},
Series = {PLDI '10},
Title = {Schism: Fragmentation-tolerant Real-time Garbage Collection},
Url = {http://doi.acm.org/10.1145/1806596.1806615},
Year = {2010},
Bdsk-Url-1 = {http://doi.acm.org/10.1145/1806596.1806615},
Bdsk-Url-2 = {http://dx.doi.org/10.1145/1806596.1806615}}
@misc{dbworld,
Author = {{ACM SIGMOD}},
Date-Added = {2016-01-19 04:34:31 +0000},
Date-Modified = {2016-01-19 04:35:06 +0000},
Date-Added = {2016-01-19 22:45:54 +0000},
Date-Modified = {2016-01-19 22:45:54 +0000},
Howpublished = {https://research.cs.wisc.edu/dbworld/},
Title = {DBWorld}}
@inproceedings{nomadlog-sigcomm14,
Acmid = {2626333},
Address = {New York, NY, USA},
Author = {Gao, Zhaoyu and Venkataramani, Arun and Kurose, James F. and Heimlicher, Simon},
Booktitle = {Proceedings of the 2014 ACM Conference on SIGCOMM},
Date-Added = {2016-01-19 22:44:45 +0000},
Date-Modified = {2016-01-19 22:44:45 +0000},
Doi = {10.1145/2619239.2626333},
Isbn = {978-1-4503-2836-4},
Keywords = {location-independence, mobility, network architecture},
Location = {Chicago, Illinois, USA},
Numpages = {12},
Pages = {259--270},
Publisher = {ACM},
Series = {SIGCOMM '14},
Title = {Towards a Quantitative Comparison of Location-independent Network Architectures},
Url = {http://doi.acm.org/10.1145/2619239.2626333},
Year = {2014},
Bdsk-Url-1 = {http://doi.acm.org/10.1145/2619239.2626333},
Bdsk-Url-2 = {http://dx.doi.org/10.1145/2619239.2626333}}
@inproceedings{locking-chi2016,
Author = {Marian Harbach and Alexander De Luca and Serge Egelman},
Booktitle = {Proceedings of the 2016 ACM Conference on Human Factors in Computing Systems (CHI'2016)},
Date-Added = {2016-01-19 22:44:45 +0000},
Date-Modified = {2016-01-19 22:44:45 +0000},
Title = {The Anatomy of Smartphone Unlocking: A Field Study of Android Lock Screens},
Year = {2016}}
@article{Ziarek:2008:FTS:1466762.1466777,
Acmid = {1466777},
Address = {Hingham, MA, USA},
Author = {Ziarek, Lukasz and Weeks, Stephen and Jagannathan, Suresh},
Date-Added = {2016-01-19 22:42:22 +0000},
Date-Modified = {2016-01-19 22:42:22 +0000},
Doi = {10.1007/s10990-008-9035-3},
Issn = {1388-3690},
Issue_Date = {September 2008},
Journal = {Higher Order Symbol. Comput.},
Keywords = {Compilation, Flattening, Optimization, SSA, Tuples, Unboxing},
Month = sep,
Number = {3},
Numpages = {26},
Pages = {333--358},
Publisher = {Kluwer Academic Publishers},
Title = {Flattening Tuples in an SSA Intermediate Representation},
Url = {http://dx.doi.org/10.1007/s10990-008-9035-3},
Volume = {21},
Year = {2008},
Bdsk-Url-1 = {http://dx.doi.org/10.1007/s10990-008-9035-3}}
@inproceedings{Sivaramakrishnan:2012:ERB:2258996.2259005,
Acmid = {2259005},
Address = {New York, NY, USA},
Author = {Sivaramakrishnan, KC and Ziarek, Lukasz and Jagannathan, Suresh},
Booktitle = {Proceedings of the 2012 International Symposium on Memory Management},
Date-Added = {2016-01-19 22:42:09 +0000},
Date-Modified = {2016-01-19 22:42:09 +0000},
Doi = {10.1145/2258996.2259005},
Isbn = {978-1-4503-1350-6},
Keywords = {barrier elimination, cleanliness, concurrent programming, functional languages, parallel and concurrent collection, private heaps},
Location = {Beijing, China},
Numpages = {12},
Pages = {49--60},
Publisher = {ACM},
Series = {ISMM '12},
Title = {Eliminating Read Barriers Through Procrastination and Cleanliness},
Url = {http://doi.acm.org/10.1145/2258996.2259005},
Year = {2012},
Bdsk-Url-1 = {http://doi.acm.org/10.1145/2258996.2259005},
Bdsk-Url-2 = {http://dx.doi.org/10.1145/2258996.2259005}}
@inproceedings{Pizlo:2010:HPE:1755913.1755922,
Acmid = {1755922},
Address = {New York, NY, USA},
Author = {Pizlo, Filip and Ziarek, Lukasz and Blanton, Ethan and Maj, Petr and Vitek, Jan},
Booktitle = {Proceedings of the 5th European Conference on Computer Systems},
Date-Added = {2016-01-19 22:42:00 +0000},
Date-Modified = {2016-01-19 22:42:00 +0000},
Doi = {10.1145/1755913.1755922},
Isbn = {978-1-60558-577-2},
Keywords = {java virtual machine, memory management, real-time systems},
Location = {Paris, France},
Numpages = {14},
Pages = {69--82},
Publisher = {ACM},
Series = {EuroSys '10},
Title = {High-level Programming of Embedded Hard Real-time Devices},
Url = {http://doi.acm.org/10.1145/1755913.1755922},
Year = {2010},
Bdsk-Url-1 = {http://doi.acm.org/10.1145/1755913.1755922},
Bdsk-Url-2 = {http://dx.doi.org/10.1145/1755913.1755922}}
@inproceedings{Yan:2013:RDR:2512989.2512990,
Acmid = {2512990},
Address = {New York, NY, USA},
Author = {Yan, Yin and Konduri, Sree Harsha and Kulkarni, Amit and Anand, Varun and Ko, Steven Y. and Ziarek, Lukasz},
Booktitle = {Proceedings of the 11th International Workshop on Java Technologies for Real-time and Embedded Systems},
Date-Added = {2016-01-19 22:41:50 +0000},
Date-Modified = {2016-01-19 22:41:50 +0000},
Doi = {10.1145/2512989.2512990},
Isbn = {978-1-4503-2166-2},
Location = {Karlsruhe, Germany},
Numpages = {10},
Pages = {98--107},
Publisher = {ACM},
Series = {JTRES '13},
Title = {RTDroid: A Design for Real-time Android},
Url = {http://doi.acm.org/10.1145/2512989.2512990},
Year = {2013},
Bdsk-Url-1 = {http://doi.acm.org/10.1145/2512989.2512990},
Bdsk-Url-2 = {http://dx.doi.org/10.1145/2512989.2512990}}
@inproceedings{Blanton:2013:NIC:2512989.2512994,
Acmid = {2512994},
Address = {New York, NY, USA},
Author = {Blanton, Ethan and Ziarek, Lukasz},
Booktitle = {Proceedings of the 11th International Workshop on Java Technologies for Real-time and Embedded Systems},
Date-Added = {2016-01-19 22:41:44 +0000},
Date-Modified = {2016-01-19 22:41:44 +0000},
Doi = {10.1145/2512989.2512994},
Isbn = {978-1-4503-2166-2},
Location = {Karlsruhe, Germany},
Numpages = {10},
Pages = {58--67},
Publisher = {ACM},
Series = {JTRES '13},
Title = {Non-blocking Inter-partition Communication with Wait-free Pair Transactions},
Url = {http://doi.acm.org/10.1145/2512989.2512994},
Year = {2013},
Bdsk-Url-1 = {http://doi.acm.org/10.1145/2512989.2512994},
Bdsk-Url-2 = {http://dx.doi.org/10.1145/2512989.2512994}}
@inproceedings{Ziarek:2011:CAE:1993498.1993572,
Acmid = {1993572},
Address = {New York, NY, USA},
Author = {Ziarek, Lukasz and Sivaramakrishnan, KC and Jagannathan, Suresh},
Booktitle = {Proceedings of the 32Nd ACM SIGPLAN Conference on Programming Language Design and Implementation},
Date-Added = {2016-01-19 22:41:35 +0000},
Date-Modified = {2016-01-19 22:41:35 +0000},
Doi = {10.1145/1993498.1993572},
Isbn = {978-1-4503-0663-8},
Keywords = {asynchrony, composability, concurrent ml, first-class events, message-passing},
Location = {San Jose, California, USA},
Numpages = {12},
Pages = {628--639},
Publisher = {ACM},
Series = {PLDI '11},
Title = {Composable Asynchronous Events},
Url = {http://doi.acm.org/10.1145/1993498.1993572},
Year = {2011},
Bdsk-Url-1 = {http://doi.acm.org/10.1145/1993498.1993572},
Bdsk-Url-2 = {http://dx.doi.org/10.1145/1993498.1993572}}
@article{Ziarek:2010:LCC:1852977.1852979,
Acmid = {1852979},
Address = {New York, NY, USA},
Author = {Ziarek, Lukasz and Jagannathan, Suresh},
Date-Added = {2016-01-19 22:41:35 +0000},
Date-Modified = {2016-01-19 22:41:35 +0000},
Doi = {10.1017/S0956796810000067},
Issn = {0956-7968},
Issue_Date = {March 2010},
Journal = {J. Funct. Program.},
Month = mar,
Number = {2},
Numpages = {37},
Pages = {137--173},
Publisher = {Cambridge University Press},
Title = {Lightweight Checkpointing for Concurrent Ml},
Url = {http://dx.doi.org/10.1017/S0956796810000067},
Volume = {20},
Year = {2010},
Bdsk-Url-1 = {http://dx.doi.org/10.1017/S0956796810000067}}
@article{Yang:2015:LOA:2824032.2824055,
Acmid = {2824055},
Author = {Yang, Ying and Meneghetti, Niccol\`{o} and Fehling, Ronny and Liu, Zhen Hua and Kennedy, Oliver},
Date-Added = {2016-01-19 22:40:55 +0000},
Date-Modified = {2016-01-19 22:40:55 +0000},
Doi = {10.14778/2824032.2824055},
Issn = {2150-8097},
Issue_Date = {August 2015},
Journal = {Proc. VLDB Endow.},
Month = aug,
Number = {12},
Numpages = {12},
Pages = {1578--1589},
Publisher = {VLDB Endowment},
Title = {Lenses: An On-demand Approach to ETL},
Url = {http://dx.doi.org/10.14778/2824032.2824055},
Volume = {8},
Year = {2015},
Bdsk-Url-1 = {http://dx.doi.org/10.14778/2824032.2824055}}
@misc{ramamurthy2015pocketdata,
Author = {Naveen Kumar Ramamurthy and Sankara Vadivel Dhandapani and Saravanan Adaikkalavan and Sathish Kumar Deivasigamani},
Date-Added = {2016-01-18 20:07:55 +0000},
@ -2738,28 +2972,3 @@
Year = {2012},
Bdsk-Url-1 = {http://doi.acm.org/10.1145/2384716.2384723},
Bdsk-Url-2 = {http://dx.doi.org/10.1145/2384716.2384723}}
@inproceedings{nomadlog-sigcomm14,
author = {Gao, Zhaoyu and Venkataramani, Arun and Kurose, James F. and Heimlicher, Simon},
title = {Towards a Quantitative Comparison of Location-independent Network Architectures},
booktitle = {Proceedings of the 2014 ACM Conference on SIGCOMM},
series = {SIGCOMM '14},
year = {2014},
isbn = {978-1-4503-2836-4},
location = {Chicago, Illinois, USA},
pages = {259--270},
numpages = {12},
url = {http://doi.acm.org/10.1145/2619239.2626333},
doi = {10.1145/2619239.2626333},
acmid = {2626333},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {location-independence, mobility, network architecture},
}
@inproceedings{locking-chi2016,
author = {Marian Harbach and Alexander De Luca and Serge Egelman},
title = {The Anatomy of Smartphone Unlocking: A Field Study of Android Lock
Screens},
booktitle = {Proceedings of the 2016 ACM Conference on Human Factors in
Computing Systems (CHI'2016)},
Year = {2016},
}

View File

@ -1 +0,0 @@
pytex/cls/nsfcnsproposal.cls

285
nsfcnsproposal.cls Normal file
View File

@ -0,0 +1,285 @@
\NeedsTeXFormat{LaTeX2e}
% 22 Oct 2010 : GWA : New proposal class. Originally based on the
% proposalnsf.cls file downloaded from here:
% http://www-pord.ucsd.edu/~sgille/how_to/proposal_prep.html
\RequirePackage{color}
\RequirePackage{calc}
\RequirePackage{mathpazo}
\RequirePackage{ulem}
\ProvidesClass{nsfcnsproposal}[2010/10/22 GWA NSF CNS Proposal Class]
\DeclareOption*{\PassOptionsToClass{\CurrentOption}{memoir}}
\ProcessOptions
\LoadClass[onecolumn,oneside,final]{memoir}
\RequirePackage{colortbl}
\RequirePackage{threeparttable}
% 22 Oct 2010 : GWA : Set up simple chapter headings.
\chapterstyle{article}
% 22 Oct 2010 : GWA : Used to be:
% \setlength{\paperheight}{11in}
% \setlength{\paperwidth}{8.5in}
\setstocksize{11in}{8.5in}
\settrimmedsize{11in}{8.5in}{*}
\settrims{0pt}{0pt}
% 22 Oct 2010 : GWA : Used to be:
% \setlength{\textwidth}{\paperwidth - 2in}
% \setlength{\textheight}{\paperheight - 2in}
%\settypeblocksize{9in}{6.5in}{*}
% 22 Oct 2010 : GWA : Used to be:
% \setlength{\oddsidemargin}{1in}
% \setlength{\evensidemargin}{1in}
\setlrmarginsandblock{1in}{1in}{*}
% 22 Oct 2010 : GWA : Used to be:
% \setlength{\topmargin}{1in}
% \setlength{\headheight}{0pt}
% \setlength{\headsep}{0pt}
\setulmarginsandblock{1in}{1in}{*}
% 22 Oct 2010 : GWA : Used to be:
% \setlength{\footskip}{36pt}
% \setlength{\headheight}{0pt}
\setheadfoot{0.1pt}{36pt}
% 22 Oct 2010 : GWA : Used to be:
% \setlength{\marginparsep}{0.5cm}
% \setlength{\marginparwidth}{1.5cm}
\setmarginnotes{0.5cm}{1.5cm}{0.1cm}
% 22 Oct 2010 : GWA : memoir command to actually apply the layout.
\checkandfixthelayout
% 22 Oct 2010 : GWA : Proposal page style.
\let\@internalcite\cite
\def\fullcite{\def\citeauthoryear##1##2##3{##1, ##3}\@internalcite}
\def\fullciteA{\def\citeauthoryear##1##2##3{##1}\@internalcite}
\def\shortcite{\def\citeauthoryear##1##2##3{##2, ##3}\@internalcite}
\def\shortciteA{\def\citeauthoryear##1##2##3{##2}\@internalcite}
\def\citeyear{\def\citeauthoryear##1##2##3{##3}\@internalcite}
\newlength{\lefttitle}
\newlength{\righttitle}
\setlength{\lefttitle}{0.8\textwidth}
\setlength{\righttitle}{0.18\textwidth}
\makechapterstyle{proposal}{%
\renewcommand{\chapterheadstart}{}
\renewcommand{\printchaptername}{}
\renewcommand{\chapternamenum}{}
\renewcommand{\printchapternum}{}
\renewcommand{\afterchapternum}{}
\renewcommand{\printchaptertitle}[1]{%
\vspace{0.5\onelineskip}
\Large\scshape\MakeUppercase{##1}
}
\renewcommand{\afterchaptertitle}{\vspace{0.5\onelineskip} \hrule}
}
\makechapterstyle{summary}{%
\renewcommand{\chapterheadstart}{}
\renewcommand{\printchaptername}{}
\renewcommand{\chapternamenum}{}
\renewcommand{\printchapternum}{}
\renewcommand{\afterchapternum}{}
\renewcommand{\printchaptertitle}[1]{
\noindent\begin{minipage}[t]{\lefttitle}
\vspace{0pt}
\raggedright
\large\scshape{##1}
\vspace*{0.10in}
\end{minipage}
\hfill
\begin{minipage}[t]{\righttitle}
\vspace{0pt}
\raggedleft
{\small \scshape NSF Proposal\\
\submissiondate}
\end{minipage}
\small
\theauthors\\
Type: CI-P; CISE Core Division: IIS; Keywords: databases, smartphones,
benchmarking
}
\renewcommand{\afterchaptertitle}{\vspace{0.5\onelineskip} \hrule \vspace{0.3\onelineskip}}
}
\makechapterstyle{letter}{%
\renewcommand{\chapterheadstart}{}
\renewcommand{\printchaptername}{}
\renewcommand{\chapternamenum}{}
\renewcommand{\printchapternum}{}
\renewcommand{\afterchapternum}{}
\renewcommand{\printchaptertitle}[1]{}
\renewcommand{\afterchaptertitle}{}
}
\definecolor{shadecolor}{gray}{0.9}
\newcommand{\proposalsec}[1]{%
\large\bfseries\raggedright #1
}
\setsecheadstyle{\proposalsec}
\renewcommand{\thesection}{\arabic{section}}
\chapterstyle{proposal}
% 26 Oct 2010 : GWA : Section styles.
\setsecnumformat{\csname the#1\endcsname\space---\space}
\setbeforesecskip{-1.0ex plus -0.5ex minus -0.2ex}
\setaftersecskip{1.0ex plus 0.2ex minus 0.1ex}
% 26 Oct 2010 : GWA : Subsection styles.
\setcounter{secnumdepth}{2}
\setsubsecheadstyle{\bfseries\raggedright}
\setbeforesubsecskip{1ex plus -0.2ex minus -0.2ex}
\setaftersubsecskip{0.3ex plus -0.2ex minus -0.2ex}
\setbeforesubsubsecskip{-1.0ex plus -0.2ex minus -0.2ex}
\setaftersubsubsecskip{-0.3ex plus -0.2ex minus -0.2ex}
\newcounter{flushenumbfenum}
\newenvironment{flushenumbf}{
\begin{list}{\textbf{\arabic{flushenumbfenum}.}}
{\setlength{\leftmargin}{0pt}}%
\setlength{\labelwidth}{0pt}
\setlength{\itemindent}{0.5em}
\setlength{\labelsep}{0.5em}
\usecounter{flushenumbfenum}}
{\end{list}}
% 22 Nov 2010 : GWA : Research questions environment. Produces a running list
% (i.e., counters do not reset) prefaced by "Q1", "Q2", etc.
\newcounter{researchquestionenum}
\newcounter{researchquestionenumtmp}
\newenvironment{researchquestions}{
\savetrivseps
\zerotrivseps
\vspace{0.5em}
\begin{framed}
\vspace*{-0.5em}
\begin{list}{\textbf{Q\arabic{researchquestionenum}.}}
{\setlength{\leftmargin}{2em}}%
\setlength{\labelwidth}{4em}
\setlength{\itemindent}{0pt}
\setlength{\labelsep}{0.5em}
\setlength{\topsep}{0pt}
\setlength{\partopsep}{0pt}
\setlength{\parskip}{0pt}
\usecounter{researchquestionenum}
\setcounter{researchquestionenum}{\value{researchquestionenumtmp}}}
{\end{list}
\end{framed}
\setcounter{researchquestionenumtmp}{\value{researchquestionenum}}
\restoretrivseps}
\newcounter{researchtaskenum}
\newenvironment{researchtasks}{
\begin{list}{\textbf{R\arabic{researchtaskenum}.}}
{\setlength{\leftmargin}{2em}}%
\setlength{\labelwidth}{4em}
\setlength{\itemindent}{0pt}
\setlength{\labelsep}{0.5em}
\usecounter{researchtaskenum}}
{\end{list}}
\newcounter{researchmethodsenum}
\newenvironment{researchmethods}{
\begin{list}{\alph{researchmethodsenum}.}
{\setlength{\leftmargin}{0pt}}%
\setlength{\labelwidth}{0pt}
\setlength{\itemindent}{0.5em}
\setlength{\labelsep}{0.5em}
\usecounter{researchmethodsenum}}
{\end{list}}
\newcounter{broaderimpactenum}
\newcounter{broaderimpactenumtmp}
\newenvironment{broaderimpacts}{
\savetrivseps
\zerotrivseps
\vspace{0.2em}
\begin{framed}
\vspace*{-0.5em}
\begin{list}{\textbf{B\arabic{broaderimpactenum}.}}
{\setlength{\leftmargin}{2em}}%
\setlength{\labelwidth}{4em}
\setlength{\itemindent}{0pt}
\setlength{\labelsep}{0.5em}
\setlength{\topsep}{0pt}
\setlength{\partopsep}{0pt}
\setlength{\parskip}{0pt}
\usecounter{broaderimpactenum}
\setcounter{broaderimpactenum}{\value{broaderimpactenumtmp}}}
{\end{list}
\end{framed}
\setcounter{broaderimpactenumtmp}{\value{broaderimpactenum}}
\restoretrivseps}
\newcounter{timeenumcounter}
\newenvironment{timeenum}{
\savetrivseps
\zerotrivseps
\begin{list}{\textbf{t = \arabic{timeenumcounter}}}
{\setlength{\leftmargin}{3em}}%
\setlength{\labelwidth}{3em}
\setlength{\itemindent}{0pt}
\setlength{\labelsep}{1em}
\usecounter{timeenumcounter}
\setcounter{timeenumcounter}{-1}}
{\end{list}
\restoretrivseps}
\newenvironment{conclusion}{
\savetrivseps
\zerotrivseps
\vspace{0.2em}
\begin{framed}
\vspace*{-0.5em}}
{\end{framed}
\restoretrivseps}
\newenvironment{tightcenter}{
\savetrivseps
\zerotrivseps
\begin{center}}
{\end{center}
\restoretrivseps}
\newenvironment{tightcentertitle}{
\savetrivseps
\zerotrivseps
\vspace*{0.1in}
\begin{center}}
{\end{center}
\vspace*{0.1in}
\restoretrivseps}
\tightlists
\firmlists
\renewcommand{\bibname}{References}
\renewcommand{\thetable}{\arabic{table}}
\newenvironment{indentpar}[1]{
\begin{list}{}%
{\setlength{\leftmargin}{#1}}%
\setlength{\itemindent}{0em}
\setlength{\parskip}{0pt}
\setlength{\parsep}{0pt}
\setlength{\labelsep}{0em}}
{\end{list}}
% 06 Dec 2010 : GWA : Figure and captioning commands.
\renewcommand{\thefigure}{\arabic{figure}}
\captiondelim{ --- }
\captionnamefont{\small\bfseries}
\captiontitlefont{\small}
\nonzeroparskip
\setlength{\parindent}{0pt}

View File

@ -1,8 +1,8 @@
% !TEX root = ../fullproposal.tex
In a preliminary study~\cite{pocketdata}, we instrumented Android smartphones being used as the primary device of 11 UB students, faculty and staff for a period of one month.
The SQLite embedded database included as part of the Android platform was modified to log a trace of all queries executed, along with metadata such as the number of rows returned, time taken, and the application process executing the query.
The SQLite embedded database included as part of the Android platform was modified to log a trace of all SQL statements executed, along with metadata such as the number of rows returned, time taken, and the application process that issued the statement.
To protect participant privacy, our instrumentation removed as much personally-identifying information as possible and recorded prepared statement arguments only as hash values.
With participant permission, we have made these traces publicly available.
With participant permission, we have made these traces publicly available~\cite{pocketdata}.
We conducted a preliminary analysis to summarize these traces, the key parts of which we summarize here to provide a sense of the type of information that we will make available to the \PocketData{} community.
We captured approximately 45 million statements executed by SQLite over the 1 month period.
@ -31,7 +31,7 @@ Figure~\ref{fig:coarseSelectComplexity} shows the distribution of \texttt{SELECT
Even at this coarse-grained view of query complexity, the read-only portion of the embedded workload distinguishes itself from existing TPC benchmarks.
Like TPC-C~\cite{tpcc}, the vast majority of the workload involves simple, small requests for data that touch a small number of tables.
29.15 million, or about 87\% of the \texttt{SELECT} queries were simple select-project-join queries. Of those, 28.72 million or about 86\% of all queries were simple single-table scans or look-ups. In these queries, which form the bulk of SQLite's read workload, the query engine exists simply to provide an iterator over the relationally structured data it is being used to store.
Conversely, the workload also has a tail that consists of complex, TPC-H-like~\cite{tpch} queries. Several hundred thousand queries involve at least 2 levels of nesting, and over a hundred thousand queries access 5 or more tables. As an extreme example, our trace includes 10 similar \texttt{SELECT} queries issued by the Google Play Games Service, each of which accesses up to 8 distinct tables to combine developer-provided game state, user preferences, device profile meta-data, and historical game-play results from the user.
Conversely, the workload also has a tail that consists of complex, TPC-H-like~\cite{tpch} queries. Several hundred thousand queries involve at least 2 levels of nesting, and over a hundred thousand queries access 5 or more tables. As an extreme example, our trace includes 10 similar \texttt{SELECT} queries issued by the Google Play Games Service, each of which accesses up to 8 distinct tables to combine and summarize developer-provided game state, user preferences, device profile meta-data, and historical game-play results from the user.
\begin{figure}
\centering
@ -47,10 +47,48 @@ This query would have a join width of 2 (\texttt{R}, \texttt{S}) and 2 conjuncti
% For uniformity, \texttt{NATURAL JOIN} and \texttt{JOIN ON} (\textit{e.g.}, \texttt{SELECT R.A from R JOIN S ON B}) expressions appearing in the \texttt{FROM} clause are rewritten into equivalent expressions in the \texttt{WHERE} clause.
The first column of this table indicates queries to a single relation. Just over 1 million queries were full table scans (0 where clauses), and just under 27 million queries involved only a single conjunctive term. This latter class constitutes the bulk of the simple query workload, at just over 87\% of the simple look-up queries. Single-clause queries appear to be the norm.
\begin{figure*}
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.9\textwidth]{graphs/select_count_cdf_by_app}
\caption{}
\label{fig:selectByApp:all}
\end{subfigure}%
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.9\textwidth]{graphs/select_percent_simple_cdf_by_app}
\caption{}
\label{fig:selectByApp:simple}
\end{subfigure}%
\caption{\textbf{Breakdown of \texttt{SELECT} queries by app. (a) Cumulative distribution of applications by the number of \texttt{SELECT} queries issued (note the logarithmic scale). (b) Cumulative distribution of applications by the percent of the app's \texttt{SELECT} queries that are key value queries (full table scans or exact key look-ups).}}
\label{fig:selectByApp}
\end{figure*}
Over the course of the one-month trace we observed 179 distinct apps, varying from built-in Android applications such as \textit{Gmail} or \textit{YouTube}, to video players such as \textit{VLC}, to games such as \textit{3 Kingdoms}. Figure~\ref{fig:selectByApp:all} shows the cumulative distribution of apps sorted by the number of queries that the app performs. The results are extremely skewed, with the top 10\% of apps each posing more than 100 thousand queries over the one month trace. The most query-intensive system service, \textit{Media Storage} was responsible for 13.57 million queries or just shy of 40 queries per minute per phone. The most query-intensive user-facing app was \textit{Google+}, which performed 1.94 million queries over the course of the month or 5 queries per minute.
At the other end of the spectrum, the bottom 10\% of apps posed as few as 30 queries over the entire month.
We noted above that a large proportion of \texttt{SELECT} queries were exact look-ups; Indeed many applications running on the device are using SQLite as a simple key-value store. For 24 apps (13.4\%), we observed \emph{only} key-value queries during the entire, month-long trace.
We noted above that a large proportion of \texttt{SELECT} queries were exact look-ups; Indeed many applications running on the device are using SQLite as a simple key-value store. As seen in Figure~\ref{fig:selectByApp:simple}, for 24 apps (13.4\%), we observed \emph{only} queries that would have been supported by a trivial key-value API for the full span of the month-long trace.
\begin{figure}
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.9\textwidth]{graphs/data_mod_ops_cdf_by_app}
\caption{}
\label{fig:updateByApp:modOps}
\end{subfigure}%
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.9\textwidth]{graphs/read_write_ratio_cdf_by_app}
\caption{}
\label{fig:updateByApp:writeRatio}
\end{subfigure}%
\caption{\textbf{App-level write behavior. (a) Cumulative distribution of applications by number of data manipulation statements performed (note the logarithmic scale). (b) Cumulative distribution of applications by read/write ratio. }}
\label{fig:updateByApp}
\end{figure}
Figure~\ref{fig:updateByApp:modOps} illustrates app-level write workloads, sorting applications by the number of \texttt{INSERT}, \texttt{UPSERT}, \texttt{UPDATE}, and \texttt{DELETE} operations that could be attributed to each. The CDF is almost perfectly exponential, suggesting that the number of write statements performed by any given app follows a long-tailed distribution, a feature to be considered in the design of a pocket data benchmark.
Figure~\ref{fig:updateByApp:writeRatio} breaks apps down by their read/write ratio. Surprisingly, 25 apps (14\% of the apps seen) did not perform a single write over the course of the entire trace. Manual examination of these apps suggested two possible explanations. Several apps have reason to store state that is updated only infrequently. For example, \textit{JuiceSSH} or \textit{Key Chain} appear to use SQLite as a credential store. A second, far more interesting class of apps includes apps like \textit{Google Play Newsstand}, \textit{Eventbrite}, \textit{Wifi Analyzer}, and \textit{TuneIn Radio Pro}, all of which have components that query data stored in the cloud. We suspect that the cloud data is being encapsulated into a pre-constructed SQLite database and being pushed to, or downloaded by the client applications.
This type of behavior might be compared to a bulk ETL process or log shipment in a server-class database workload, except that here, the database has already been constructed. Pre-caching through database encapsulation is a unique feature of embedded databases, and one that is already being used in a substantial number of apps.
\begin{figure*}[t]
\centering
@ -70,14 +108,14 @@ We noted above that a large proportion of \texttt{SELECT} queries were exact loo
\caption{}
\label{fig:app:rowcount}
\end{subfigure}%
\caption{Per-App Summary Statistics for Android SQLite Queries. Distributions of (a) inter-query arrival times, (b) query runtimes, and (c) rows returned per query.}
\caption{\textbf{Per-App Summary Statistics for Android SQLite Queries. Distributions of (a) inter-query arrival times, (b) query runtimes, and (c) rows returned per query.}}
\label{fig:app}
\end{figure*}
Figure~\ref{fig:app} shows query interarrival times, runtimes, and returned row
counts for ten of the most active SQLite clients. As seen in
Figure~\ref{fig:app:interarrival}, the 0.01Hz periodicity is not unique to any one
application, further suggesting filesystem locking as a culprit. Two of the most
Figure~\ref{fig:app:interarrival}, a 0.01Hz periodicity in arrival times is common to all
applications, suggesting filesystem locking as a culprit. Two of the most
prolific SQLite clients, \textit{Google Play services} and \textit{Media Storage}
appear to be very bursty: 70\% of all statements for these applications are issued
within 0.1ms of the previous statement. Also interesting is the curve for queries
@ -85,6 +123,7 @@ issued by the \textit{Android System} itself. The interarrival time CDF appears
to be almost precisely logarithmic for rates above 10$\mu$s, but has a notable lack
of interarrival times in the 1ms to 10ms range. This could suggest caching
effects, with the cache expiring after 1ms.
As seen in Figure~\ref{fig:app:runtime}, most apps hold to the average runtime of
100$\mu$s, with several notable exceptions. Over 50\% of the
\textit{Android System}'s statements take on the order of 1ms. Just under 20\% of
@ -99,7 +138,7 @@ the number of rows returned in general varies much more widely. Many of these
apps' user interfaces have both a list and a search view that show multiple records
at a time, suggesting that these views are backed directly by SQLite. Although all
apps have long tails, two apps in particular: \textit{Gmail} and \textit{Google+} are
notable for regularly issuing queries that return on the order of 100 rows.
notable for regularly issuing queries that return on the order of hundreds of rows.
\begin{figure*}
\centering
@ -111,22 +150,23 @@ notable for regularly issuing queries that return on the order of 100 rows.
\includegraphics[width=\textwidth]{graphs/facebook-minified.pdf}
\caption{}
\end{subfigure}%
\caption{Variations in bursty data access patterns~\cite{ramamurthy2015pocketdata} for WhatsApp (a) and Facebook (b).}
\caption{\textbf{Variations in bursty data access patterns~\cite{ramamurthy2015pocketdata} for WhatsApp (a) and Facebook (b).}}
\label{fig:burstiness}
\end{figure*}
Figure~\ref{fig:burstiness} shows variations in query burstiness across multiple apps and users\footnote{The PIs have already incorporated material from this proposal into their coursework. Figure \ref{fig:burstiness} is from a student report~\cite{ramamurthy2015pocketdata} from UB's CSE-662, jointly instructed by PIs Kennedy and Ziarek.}.
Figure~\ref{fig:burstiness} shows variations in query burstiness across multiple apps and users\footnote{The PIs have already incorporated material from this proposal into their coursework. Figure \ref{fig:burstiness} is from a student report~\cite{ramamurthy2015pocketdata} from UB's CSE-662, jointly instructed by PIs Kennedy and Ziarek. The student group performed an app-centric analysis of the query traces.}.
Two features immediately emerge from this data.
First, \PocketData{} workloads are extremely bursty; The default steady state is completely idle, with infrequent bursts of hundreds of operations per second.
Second, the nature of these bursts varies significantly by the calling app; In this trace Facebook generates a read-only workload, while Whatsapp produces two bursts each with a distinct mix of updates, inserts, deletes, and selects.
\medskip
We plan to freely releasing aggregate metrics about database usage patterns in
We will freely release aggregate metrics about database usage patterns in
embedded smartphone databases.
We also plan to make our source traces available under IRB-approved conditions.
We also plan to make our source traces available to researchers with approval
from their institution's IRB.
By doing this, we will enable other researchers to begin exploring the bottlenecks
in and practical limitations of existing embedded databases and abstraction layers
like object-relational mappers developed over them.
in and practical limitations of existing embedded databases, as well as in abstraction layers
like object-relational mappers.
Better understanding the space will help to identify new research challenges, and
help to encourage researchers to join the \PocketData{} community.

View File

@ -3,7 +3,8 @@
We will provide an instrumentation toolkit for the \PocketData{} community. The goal of this toolkit is twofold: (1) Gathering usage traces and metrics from phones deployed in real-world settings, and (2) Reliably measuring system performance on simulated and replayed \PocketData{} workloads.
There are several challenges unique to the \PocketData{} setting that make instrumenting smartphone embedded databases difficult.
The simplest of these is that smartphones rely on specialized operating systems, hardware, and virtualization that can make it difficult to deploy existing measurement tools designed for desktops.
Many of these tools are easily portable, but there are several more subtle and difficult challenges involved in instrumentation.
Many of these tools can be ported and we will endeavor to supplement existing community efforts in doing so.
There are also several more subtle challenges specific to instrumenting \PocketData{}.
A key challenge is the types of bottlenecks that \PocketData{} workloads encounter.
Typical metrics for enterprise benchmarks include throughput at saturation, joules per unit of throughput, and throughput vs latency curves.
@ -18,7 +19,7 @@ For example, when a CPU is spending time idling, reliably attributing CPU cycles
A further concern that makes \PocketData{} instrumentation difficult is that embedded databases are typically stored in self-contained files.
Replicating an embedded database can be as simple as initiating a file transfer over HTTP.
We observed many apps exploiting this feature in practice~\cite{pocketdata}.
Because file transfers bypass the normal embedded database library, fully capturing an app's interactions with an embedded database requires jointly instrumenting other aspects of the OS, including the filesystem and network layers.
File transfers bypass the normal embedded database library, so fully capturing an app's interactions with an embedded database requires jointly instrumenting other aspects of the OS, including the filesystem and network layers.
As part of the proposed work, we will develop an instrumentation toolkit that can be used to reliably track an app's embedded database activities and to reliably measure embedded database performance.
As part of the proposed work, we will develop an instrumentation toolkit that can reliably track an app's embedded database activities and measure all facets of an embedded database's performance.

View File

@ -5,7 +5,7 @@ suite, which will serve three roles for the \PocketData{} community.
First, a benchmark will foster research on embedded databases by
creating a realistic standard for evaluation, allowing for fair comparisons
across competing research efforts.
Second, by providing a precise set of metrics to optimize, a benchmark
Second, by providing a precise set of metrics to optimize for, a benchmark
will serve to guide the research community's efforts towards pertinent
real-world challenges faced by smartphone applications.
@ -16,9 +16,9 @@ effort to track changes in app usage behaviors and bottlenecks.
We will develop a modular benchmark along the lines of
PolePosition~\cite{poleposition}, driven by modules that
capture the semantics and behavior of a class of applications.
Using the metrics data that we gather and release, we will lead a
an effort to continually monitor for changes in app usage patterns,
and how phone users engage with data-driven apps.
Based on the metrics gathering efforts discussed above, we will lead
an effort to continually monitor app's data usage patterns for changes,
as well as for changes in how phone users engage with data-driven apps.
As new patterns are discovered by the \PocketData{} community,
we will maintain \textit{a repository of modules describing these
behaviors}.
@ -31,8 +31,8 @@ Ideally, we will be able to link individual queries to triggering events (user i
Although we hope to automate this process eventually, our initial approach will be to focus on one app at a time.
This will not only help us to better understand the space, but also to generate realistic datasets by being able to analyze the specific app's schema and updates/inserts.
The Application tier of the full benchmark will consist of a representative cover of the 179 apps that we encountered in our preliminary analysis, as well as apps that we encounter in subsequent data gathering efforts.
The User tier will simulate the complete phone environment; Statistics for single user include of a cluster of app modules, and patterns of charging (when is the phone plugged in?), network access (when is the phone on the internet, and with what quality?), and other behavioral traits that impact app data access patterns.
To simulate users, we will use standard clustering techniques on our trace data to create both canonical user profiles, and to identify natural variation around those profiles.
The User tier will simulate the complete phone environment. Statistics for single user include of a cluster of app modules, patterns of charging behavior (when is the phone plugged in?), network access (when is the phone on the internet, and with what quality?), and other behavioral traits that impact app data access patterns.
To simulate users, we will use standard clustering techniques on our trace data first to create canonical user profiles, and then to identify natural variation around those profiles.
It is reasonable to ask why a specialized \PocketData{} database
@ -45,12 +45,12 @@ and mobile software.
Although AndroBench does include a component for simulating
the filesystem access patterns of SQLite, neither of these
benchmarks explicitly generates the structured data access patterns
necessary to evaluate a data management system.
necessary to evaluate a complete data management system.
Previous research efforts~\cite{jeong2013iostack} have used mobility
traces generated by MobiGen, fed to a virtual machine running
standard apps to generate semi-realistic traces of embedded
common apps such as Facebook to generate semi-realistic traces of embedded
database access patterns.
Although our approach follows a more principled approach based
Although \PocketData{} follows a more principled approach based on
real-world traces, the metrics we release could be used to validate
and standardize data generation tricks of this sort.
@ -68,17 +68,17 @@ The most intensive database user in our preliminary study,
\textit{Google Play services} had 14.8 million statements attributed
to it, just under half of which were writes.
This equates to about one write every 3 seconds, which is substantial
from a power management and latency perspective, but for concurrency.
from a power management and latency perspective, but which is unlikely
to create a concurrency bottleneck.
Second, many OLAP benchmarks focus on comparatively simple
queries.
This is reasonably descriptive of a notable portion of the workload we
observed in our preliminary study:
A notable portion of the workload we observed in our preliminary study can indeed be described as simple:
13\% of the applications we observed had a read workload that
consisted exclusively of key/value queries, and over half of the applications
we observed had a workload that consisted of at least 80\% key/value queries.
However, the remaining queries are not as simple.
The more complex queries we observed in our preliminary study include
However, the trace also exhibited a long tail of extremely complex queries.
A small, but significant number of queries we observed in our preliminary study include
multiple levels of query nesting, wide joins, and extensive use of aggregation.
As such, they more closely resemble analytics (OLAP) workload benchmarks
such as TPC-H~\cite{tpch}, The Star-Schema Benchmark~\cite{ssb}, and
@ -98,11 +98,11 @@ PolePosition simulates the behavior of specific data structure abstractions
that need to be backed by a data management system.
Because data structures are defined using higher-level operational
semantics rather than through a fixed database API, databases are
allowed to specialize for specific access patterns that the database may
allowed to specialize benchmarks to specific access patterns that the database may
be optimized for.
The fundamental goals of PolePosition and the \PocketData{} benchmark
are similar, but \PocketData{} will operate at a higher level of abstraction,
capturing the behavior of entire apps and users engaging with those apps.
capturing the behavior of entire apps, as well as users that engage with those apps.

View File

@ -1,8 +1,10 @@
% !TEX root = ../fullproposal.tex
Even the short, month-long query trace with only 11 users on which our preliminary study was based included over 45 million SQL statements.
As the experiment is scaled up, analyzing these query traces will become increasingly difficult.
Compounding the issue, the comparatively high complexity of many of the the queries makes it difficult to flatten the SQL parse trees into a simple relational format for analysis.
Our preliminary analysis required repeated iterations of our feature extraction process: We would define a procedure for extracting interesting features of a SQL statement's parse tree, construct a visualization from the extracted feature, and then identify a new feature of interest.
\begin{itemize}
\item SQL parsing is heavyweight
\item
\end{itemize}
As part of the proposed work, we will release tools for analyzing query logs that streamline this iterative process, by making it easy define new feature extractors.
As feature extraction is an embarrassingly parallel task, simple optimizations like caching, parallelism, and incremental computation~\cite{kennedy2011dbtoaster} can be used to make these tools extremely efficient\footnote{As a comment on the utility of specialized tools for log analysis, we return to the CSE-662 project involving analyzing \PocketData{} logs. The four students began with a naive analysis tool (written by the students in Java) that took multiple hours to complete one iteration of the analytics cycle. By the end of the course, they had optimized the tool to run in under 10 seconds~\cite{ramamurthy2015pocketdata}.}.
Source code for all visualizations that we release as part of our summary metrics will be released to the public to further encourage community participation in \PocketData{}.

View File

@ -1,6 +1,6 @@
% !TEX root = ../fullproposal.tex
We will build an initial \PocketData{} community and facilitate engagement with the broader CISE community through outreach efforts including attending poster sessions and hosting workshops and tutorials
We will build an initial \PocketData{} community and facilitate engagement with the broader CISE community through outreach efforts including attending poster and demo sessions and hosting workshops and tutorials
co-located with major conferences in databases (VLDB, SIGMOD, ICDE), mobile and real-time systems (MobiSys, OSDI, RTSS, RTAS), and programming languages (POPL, PLDI, OOPSLA).
Poster sessions provide an ideal opportunity to meet researchers in related areas, to advertise the resources we plan to offer, and gather feedback about the needs of potential \PocketData{} community members.

View File

@ -7,7 +7,7 @@
The world's 2~billion smartphones and 4~million apps have become a large part
of most people's computing experiences.
%
A common requirement of apps is persisting structured data, a task frequently
Most apps need to persist structured data, a task frequently
performed using an \textit{embedded database} such as SQLite.
%
These are heavily used, with Android smartphones generating an average of
@ -38,7 +38,7 @@ understood.
%%%%%%%%%%%%%%%%
To date, there have been some initial explorations of small-scale data
To date, there have been some initial explorations of small-, personal-, or pocket-scale data
management, both in academia and by industry:
%
\begin{itemize}
@ -46,9 +46,9 @@ management, both in academia and by industry:
database responsiveness, and database performance on smartphones and tablets.
\item Saarland University's Janiform Document project explores interactive
manuscripts that include embedded, query-able research data and visualizations.
\item Oracle, SAP Labs, Facebook, LMDB, SQLite, and WiredTiger are all actively
engaged in research and development of embedded database software.
\item Atanas Rountev's group at Ohio State is exploring responsiveness issues
\item Oracle, SAP Labs, Facebook, LMDB, SQLite, and MongoDB are all actively
engaged in research on and development of embedded database software.
\item The Presto group at Ohio State is exploring responsiveness issues
in Android caused by data-flow limitations.
\item The DAS Lab at Harvard's work on adaptive data management considers
the challenges of specializing databases for small data.
@ -56,44 +56,46 @@ the challenges of specializing databases for small data.
smartphone apps interact with embedded-databases.
\end{itemize}
%
There is clearly interest in data management challenges that arise at the small- and
pocket-scales.
There is clearly interest in data management challenges that arise in small-scale data management.
%
Unfortunately, unlike the largely homogeneous workloads and platforms that
common to research on classical monolithic enterprise databases, \PocketData{}
is far more diverse.
Unfortunately, unlike the largely homogeneous workloads and platforms that are
standard in research on classical enterprise databases, this new \PocketData{}
setting is far more diverse.
%
Data access patterns vary wildly by user, time of day, mix of installed apps,
Data access patterns are extremely bursty and can vary wildly by user, time of day, mix of installed apps,
network accessibility, and many other factors.
%
Platform properties such as RAM, persistent storage, CPU performance, and network
bandwidth vary wildly, sometimes by multiple orders of magnitude.
bandwidth also exhibit extreme variations across phones, sometimes by multiple orders of magnitude.
%
Resource availability can also vary; Some users keep their phones constantly
charged, while others go multiple days without charging.
charged, while others go multiple days without plugging their phones in.
%%%%%%%%%%%%%%%%
The heterogeneity of the \PocketData{} setting, make it challenging for researchers to
The heterogeneity of the \PocketData{} setting makes it challenging for researchers to
understand the tradeoffs and requirements of the setting.
%
This lack of clear high-level goals, in turn, makes it difficult to clearly identify successful
research contributions and creates a daunting environment for new research efforts.
research contributions.
%
Lacking the resources necessary to better understand and adapt to \PocketData{} scale,
Unfortunately, pinning down specific goals first requires a concerted effort to gather (and analyze)
traces of data usage patterns from real-world settings, creating a high barrier to entry for new
researchers.
%
Lacking the resources necessary to better understand and adapt to the \PocketData{} scale,
research efforts in the area are presently limited.
%%%%%%%%%%%%%%%%
\textbf{Target Community}
Research on mobile devices and the internet of things is cross cutting, intersecting communities
Research on mobile devices and the more general space of the internet of things (IoT) is cross cutting, intersecting communities
that work on data management systems, real-time and embedded devices, programming languages,
and operating and mobile systems. We believe research involving \PocketData{} also lies at
the intersection of these communities. Specialized databases systems for embedded devices is
a growing topic in the database community. As embedded processors become more capable, with
larger amounts of main memory available (e.g. Intel Galileo), there is a growing push from the embedded
and also from the real-time communities to explore larger software capabilities, including database
systems and query processing systems, in embedded deployments. The programming language
the intersection of these communities. Specialized database systems for embedded devices are re-emerging as an interesting topic in the database community. As embedded processors become more capable, with
larger amounts of main memory available (e.g. Intel's Edison platform), there is a growing push from the embedded systems
and the real-time communities to explore larger software capabilities, including database
systems and query processing systems in embedded hardware deployments. The programming language
community is exploring domain specific languages for specialized query processing.
The mobile community is continually exploring how to push the envelope on smartphone based computing,
whether via power aware mechanisms, or through more adaptive systems. Many of these solutions use mobile databases
@ -101,12 +103,12 @@ as their fundamental computation engine (e.g. the `\texttt{maybe}' system develo
consider the performance characteristics of database systems (e.g. power modeling).
This proposal aims to create a community research infrastructure around our \PocketData{} toolchain
to enable a myriad of research activities for above mentioned communities. Additionally,
to enable a myriad of research activities for the above mentioned communities. Additionally,
in this planning grant, we will explore the precise needs of these communities to ensure an
infrastructure that has broad applicability.
We will reach out to researchers in closely related areas including Internet of Things,
Adaptive Data Management, Sensor Networks, and help them to explore how \PocketData{}
can impact their research.
can help to improve their research.
As part of these outreach efforts, we will provide resources that will simultaneously support
researcher's existing projects, while also helping to enable new projects with a focus on
\PocketData{}.
@ -117,10 +119,20 @@ During this planning grant we will focus our efforts in three key areas:
\begin{enumerate}
\item \textbf{Expansion to IoT}: Our current efforts have focused primarily exploring questions
\item \textbf{Growth of the Mobile Embedded Database community}: We have established an
initial community of interested CISE researchers for \PocketData{} from both
academia and industry. We believe that this community
shows that there is sufficient interest within CISE to pursue our proposed \PocketData{} infrastructure.
However, for long term success we would like to expand this community to ensure that the infrastructure
meets the needs of the broader community and not just a specific research niche.
\item \textbf{Expansion to IoT}: Our preliminary efforts have focused on questions
relating to \PocketData{} in the mobile domain, specifically Android. Although a \PocketData{}
infrastructure based solely in Android is valuable, we believe a more comprehensive infrastructure
must take into account recent developments in IoT.
must take into account recent developments in IoT. There are similarities between how mobile
applications leverage embedded databases and how proposed IoT applications would use embedded
databases, specifically in the areas of personal health care devices that aggregate and summarize a user's personal data and smart city deployments where small devices process data before sending \emph{relevant} data for more centralized big data analytics.
We propose to expand and modify our \PocketData{} infrastructure to meet the needs of IoT community.
\item \textbf{Workshops and Tutorials}:
To facilitate engagement with the broader CISE community and to develop an initial
@ -129,14 +141,14 @@ major conferences in the database, systems, real-time systems, and programming l
communities.
Our budget includes funding for travel to such conferences to host workshops and
tutorials. This will also enable the PIs to receive valuable feedback on the needs of the community in
structuring the \PocketData{} infrastructure.
designing and building out the \PocketData{} infrastructure.
\end{enumerate}
A successful planning grant will enable us to proceed with the development of a full
\PocketData{} infrastructure.
\PocketData{} infrastructure proposal.
Concretely, the following three resources will be developed as part of the full infrastructure
proposal:
\begin{enumerate}
@ -154,30 +166,55 @@ summarizing those datasets.
\item \textbf{Standards and Benchmarks}:
We will create a toolkit to establish a set of standards for evaluating research efforts on
\PocketData{} for both Android and IoT.
First, significant parts of the Android platform have been locked down for reasons of
security and intellectual property, making properties like process scheduling and power usage
difficult to measure reliably.
The \PocketData{} setting requires unique metrics that can be difficult to reliably measure on the Android platform.
The toolkit will include instrumentation for Android that will make it easier for researchers
to measure the performance of their \PocketData{} tools.
to measure performance through rarely used metrics like availability of idle time, thread scheduling, power consumption, and other measures that can be hard to gather reliably on the Android platform like CPU and memory usage for specific libraries.
Second, to standardize comparisons across different research efforts, the toolkit will
include a benchmark suite.
This benchmark will create clearly defined metrics for evaluating success. Moreover, by
This benchmark will established clearly defined metrics for evaluating data management solutions. Moreover, by
making it extensible, the benchmark will act a clearinghouse for app behaviors discovered
in the wild and changing database requirements.
\item \textbf{Visualization}: We will create a data visualization tool and associated queries
to help researchers understand and navigate the data. The raw traces gather are very
larger and the bulk of the data may not be useful for answering a specific question a
given researcher may which to explore. Through visualization, filtering, data navigation,
and specialized queries, we will enable researchers to more quickly and accurately explore
relevant characteristics of \PocketData{}.
to help researchers understand and navigate the data. The raw traces we plan to offer researchers are very
large. Moreover, the rich structure and variability of SQL queries generated by smartphone apps does not admit traditional indexing strategies often used for analytics. By providing database-driven tools that aid in the analysis and visualization of the resulting queries, we will enable researchers to more quickly and accurately explore
relevant characteristics of real-world \PocketData{} workloads.
\end{enumerate}
\textbf{Qualifications of the PIs:} Much like the cross cutting nature of \PocketData{}, the PIs bring
expertise from three of the main communities our proposed \PocketData{} infrastructure would impact.
PI Kennedy works in databases, PI Challen has expertise in mobile systems, and PI Ziarek works at the
intersection of programming languages and real-time embedded systems.
\textbf{Qualifications of the PIs:}
%
The PIs bring cross-cutting expertise from three of the main communities our
proposed \PocketData{} infrastructure would impact.
%
All three have a record of successful
collaboration~\cite{pocketdata,Challen:2015:MWE:2699343.2699361}, and PIs
Kennedy and Ziarek have been working together for the last three and a half
years on adaptive
indexing~\cite{techreport,agarwal2013monadic,kennedy2015just}.
%
PI Kennedy's expertise covers databases, incremental
computation~\cite{Ahmad:2012:DHD:2336664.2336670,kennedy2011dbtoaster,koch2013dbtoaster},
uncertain data
management~\cite{Kennedy:2011:JEO:1989323.1989410,5447879,Yang:2015:LOA:2824032.2824055},
online aggregation~\cite{4812533,Kennedy:2011:FPP:1989323.1989482}, and
compiler
design~\cite{kennedy2011dbtoaster,Ahmad:2012:DHD:2336664.2336670,koch2013dbtoaster}.
%
PI Ziarek's expertise covers programming
languages~\cite{Ziarek:2011:CAE:1993498.1993572,Ziarek:2010:LCC:1852977.1852979},
real-time
systems~\cite{Blanton:2013:NIC:2512989.2512994,Yan:2013:RDR:2512989.2512990},
virtual
machines~\cite{Pizlo:2010:HPE:1755913.1755922,Pizlo:2010:SFR:1806596.1806615},
and compiler
design~\cite{Sivaramakrishnan:2012:ERB:2258996.2259005,Ziarek:2008:FTS:1466762.1466777}.
%
PI Challen's expertise comprises smartphone systems, including
networking~\cite{hotwireless2015-sharing,infocom2016-scans,hotnets2014-pocketsniffer},
architectural~\cite{iiswc2015-agility}, energy
management~\cite{mobicase2015-jouler,hotmobile2015-numerator} and
security~\cite{mobicase2014-pocketmocker} aspects.
\subsection{Datasets}
\input{sections/1-1-metrics.tex}

View File

@ -7,23 +7,23 @@
%\item Existing related resources along with a justification that the proposed research cannot be accomplished with these resources at the institution or elsewhere
%\end{itemize}}
In this section we present a few concrete projects that would benefit from \PocketData{} and then describe how the proposed infrastructure will enable
reach for the PIs and the broader CISE community.
In this section we present a few concrete projects that would benefit from \PocketData{} and describe how the proposed infrastructure will enable
research for the PIs and the broader CISE community.
\subsection{Adaptive Indexes}
\subsection{Adaptive Data Management}
Selecting the correct physical structure for a database under a given workload is an extremely challenging~\cite{Chaudhuri:1997:ECI:645923.673646,Chaudhuri:1998:ALI:276304.276337,Chaudhuri:2007:SDS:1325851.1325856,Agrawal:2000:ASM:645926.671701} part of database management.
The index selection problem becomes even harder when workload characteristics fluctuate rapidly or are not known in advance.
There is currently substantial interest in a breed of self-adapting, adaptive index structures~\cite{idreos2007database,Idreos:2011:MWC:2002938.2002944} that address dynamic index selection by facilitating \textit{incremental, online} changes to the index.
There is currently substantial interest in a breed of self-adjusting, adaptive index structures~\cite{idreos2007database,Idreos:2011:MWC:2002938.2002944} that address dynamic index selection by facilitating \textit{incremental, online} changes to the index.
Examples of adaptive indexes include Cracker Indexes~\cite{Idreos:2012:AIM:2247596.2247667,Idreos:2007:UCD:1247480.1247527,Halim:2012:SDC:2168651.2168652}, Adaptive Merge Trees~\cite{Graefe:2010:SSI:1739041.1739087,Graefe:2012:CCA:2180912.2180918}, SMIX~\cite{Voigt:2013:SSI:2484838.2484862}, H2O~\cite{163421}, and Just-in-Time Data Structures~\cite{kennedy2015just}.
Adaptive indexes automatically optimize their physical representation in response to incoming queries, reusing work used to answer the query to also improve subsequent queries. Given enough time, a stable workload, and queries that touch all data objects, an adaptive index eventually converges to a data representation similar to that of a static index.
\textbf{Infrastructure Justification:} Although there have been several efforts~\cite{Graefe:2010:BAI:1946050.1946063,schuhknecht2013uncracked} to develop benchmarks for adaptive indexes, these benchmarks rely on purely synthetic data and unit-tests rather than real-world scenarios.
This is in part because the typical enterprise workloads that rarely exhibit the type of drastic shifts that adaptive indexes target.
This is in part because typical enterprise workloads rarely exhibit the type of drastic shifts that adaptive indexes target.
As a result most data management benchmarks evaluate systems under stable, steady-state workloads.
By contrast, \PocketData{} workloads often show extreme variation in both application demands and resource availability.
As a trivial example, an app might demand low-latency, low-power access to data when a user is actively using the phone, while admitting high-latency high-power organizational tasks when the phone is plugged in~\cite{Challen:2015:MWE:2699343.2699361}.
\textbf{Community Interest:} \textit{Stratos Idreos} from the DAS lab at Harvard will use the \PocketData{} metrics and benchmark workloads to evaluate his group's work on adaptive data systems.
\textbf{Community Interest:} \textit{Stratos Idreos}'s DAS lab at Harvard will use the \PocketData{} metrics and benchmark workloads to evaluate their work on adaptive data systems.
\citedquote{Stratos Idreos (Harvard)}{I think work on adaptive data systems could benefit. I assume Pocket Data will capture diverse workloads (from various apps) and so this would be a perfect environment to test adaptive data systems.
I have a new project on easy to design systems out of modules that can be synthesized. The input is workloads. Perhaps PocketData can provide a testing framework for such work for designing data systems for mobile environments.
}
@ -40,7 +40,7 @@ The relatively limited compute and memory resources available on tablets and sma
\textbf{Infrastructure Justification:} Small-data analytics efforts are presently siloed, with most research efforts targeting entire software stacks, from the user interface front-end to the back-end database.
The standard evaluation tools offered by the \PocketData{} benchmar would help to that decouple the research challenges involved in small-data analytics, and allow a broader community of researchers to contribute.
The standard evaluation tools offered by the \PocketData{} benchmark would help to decouple the research challenges involved in small-data analytics and allow a broader community of researchers to contribute.
For example, an embedded database benchmark simulating a visual query interface workload would serve as a standard for evaluating novel algorithms, indexes, and data management tools.
\textbf{Community Interest:}
@ -53,11 +53,11 @@ In this respect, data-driven smartphone apps are similar to data-driven enterpri
However, enterprise software is typically supported by experienced database administrators who can carefully fine-tune the database to efficiently support the application.
This is not the case for smartphone apps, which instead rely on compiler tools and software libraries to efficiently mediate access to persistent data.
Consequently, \PocketData{} offers new research opportunities at the interface between imperative programming languages like C, C\#, or Java, and back-end data management tools.
Forms of inline SQL like LinQ~\cite{box2007linq,Meijer:2006:LRO:1142473.1142552} have existed for nearly a decade, but are not frequently used in the design smartphone apps.
Forms of inline SQL like LinQ~\cite{box2007linq,Meijer:2006:LRO:1142473.1142552} have existed for nearly a decade, but are not frequently used in the development of smartphone apps.
Instead, app developers frequently rely on higher level primitives including object-relational mappers~\cite{Melnik:2007:CMB:1247480.1247532} (ORMs) like Hibernate~\cite{hibernate} to mediate access to the database.
Unfortunately, at present, most ORMs are implemented as libraries and lack the ability to introspect the invoking program.
This creates an impedance mismatch between the available information and SQL's declarative syntax, forcing ORMs to misuse SQL, or to rely on optional hints provided by the app developer to provide efficient data access.
In our preliminary exploration~\cite{pocketdata}, we found significant anti-patterns emerging in data access patterns.
In our preliminary exploration~\cite{pocketdata}, we found significant anti-patterns emerging in queries to SQLite.
Examples include the use of expensive \texttt{UPSERT} operations when \texttt{UPDATE}s would be sufficient, the use of multiple \texttt{SELECT} queries to dereference foreign-keys instead of using an outer-join query, and the use of separate read-then-write queries rather than in-place updates.
Several research efforts, including StatusQuo~\cite{StatusQuo}, Sloth~\cite{Cheung:2014:SLV:2588555.2593672}, and Truffle/Graal~\cite{wimmer2012truffle} have addressed similar problems in enterprise data-driven applications and could find new challenges in the \PocketData{} space.
Other research efforts explore data-flow in smartphones for performance optimization~\cite{yang-phd15,yang-icse15,rountev-cgo14} and correctness~\cite{yan-cgo14}, and would benefit from more detailed tools for introspection and measurement.
@ -65,34 +65,35 @@ Other research efforts explore data-flow in smartphones for performance optimiza
\textbf{Infrastructure Justification:} Research on data-driven app development requires a detailed understanding of application requirements, and programming language research needs real-world workloads to demonstrate its viability.
The metrics that we propose to gather and the benchmark suite we propose to develop are critical for driving research in this space.
\textbf{Community Interest:} \textit{Nasko Rountev} of Ohio State will use \PocketData{} as part of the Presto group's work on data-flow analysis to debug of GUI responsiveness issues and as part of his LeakDroid project.
\textbf{Community Interest:} \textit{Nasko Rountev} of Ohio State will use \PocketData{} as part his work on data-flow analysis debug of GUI responsiveness issues and as part of his LeakDroid project.
\subsection{Database-App Coupling}
Smartphone apps are integrated with the data management tools they use to a far greater degree than enterprise applications.
Embedded databases are libraries that operate within the app's memory space, and not external tools.
Apps generate virtually all queries procedurally, making it possible to specify their data management requirements extremely precisely at compile time.
Moreover, access to data often occurs through higher-level primitives that are supported by their own library wrappers.
Moreover, access to data often occurs through higher-level primitives like ORMs.
In short, although embedded databases are in principle capable of emulating stand-alone database engines, in practice they are used more as toolkits of data management building blocks.
The tight coupling between app and database promises to offer numerous avenues for workload-driven database optimization.
A leader in this area is BerkeleyDB.
Although BerkeleyDB does provide a SQL emulation front-end, its core functionality is to provide simple database building blocks like primary and secondary indexing, foreign-key consistency primitives, and transactional access to data.
Similar efforts are taking place across multiple industrial research labs and startup companies, as numerous organizations have begun to invest into embedded databases, including MongoDB's WiredTiger~\cite{shakuntalagupta2015practical}, SAP's SqlAnywhere~\cite{4401024}, and Facebook's RocksDB, as well as open-source efforts including the H2 Database~\cite{mueller2006h2} and SQLite~\cite{sqlite}.
Similar efforts are taking place across multiple industrial research labs and startup companies, as numerous organizations have begun to invest into embedded databases. Corporate investment in embedded databases includes MongoDB's WiredTiger~\cite{shakuntalagupta2015practical}, SAP's SqlAnywhere~\cite{4401024}, and Facebook's RocksDB, as well as open-source efforts including the H2 Database~\cite{mueller2006h2} and SQLite~\cite{sqlite}.
The tight coupling between database and the invoking application also admits possibilities for more aggressive database specialization.
Database compilers like DBToaster~\cite{kennedy2011dbtoaster,koch2013dbtoaster,Ahmad:2012:DHD:2336664.2336670}, HyPer/LLVM~\cite{Neumann:2011:ECE:2002938.2002940}, and Legorithmics~\cite{Klonatos:2013:ASO:2463676.2465334,Klonatos:2014:BEQ:2732951.2732959} use aggressive compilation to create a database uniquely specialized for a specific application's query and update workload.
Database compilers like DBToaster~\cite{kennedy2011dbtoaster,koch2013dbtoaster,Ahmad:2012:DHD:2336664.2336670}, HyPer/LLVM~\cite{Neumann:2011:ECE:2002938.2002940}, and Legorithmics~\cite{Klonatos:2013:ASO:2463676.2465334,Klonatos:2014:BEQ:2732951.2732959} aggressively compile and optimize database engines that are uniquely specialized for a specific application's query and update workload, as well as its underlying hardware.
As already noted above, many of these statistics are available at compile time, making the \PocketData{} setting an ideal candidate for deploying these applications.
\textbf{Infrastructure Justification:} Realistic evaluation of embedded databases and database compilers requires realistic workloads. Moreover, smartphones are one of the most prolific examples of embedded databases deployed in the wild. Given the variation in smartphone apps' data management requirements, even limited data releases by a single app developer will not be representative. The metrics we will gather, and the benchmark we are proposing will be key to helping researchers evaluate new embedded database tools.
\textbf{Community Interest:} \textit{Michael Brey} of Oracle is interested in participating in the \PocketData{} community to advance research on embedded databases.
\citedquote{Michael Brey (Oracle's BerkeleyDB Team)}{Within Oracle, we are always looking at how the industry both consumer and enterprise is using data in mobile applications. Things like db size, access patterns, single/multi user (multiple apps accessing same db), speed of access required, record size/structure etc. are all important to understand. We are also very interested in the movement of data from the device to some backend repository.}
\textbf{Community Interest:} \textit{Christoph Koch}'s DATA lab at EPFL is interested in using the \PocketData{} benchmark to evaluate their work on database compilers. \textit{Ashok Joshi} and \textit{Michael Brey} of Oracle are interested in participating in the \PocketData{} community to advance research on embedded databases.
\citedquote{Ashok Joshi (Senior Director at Oracle)}{I got some feedback from one of my colleagues on this topic. Yes, the real-world traces of embedded data usage would be useful; so would the benchmarking toolkit.}
\citedquote{Michael Brey (Oracle)}{Within Oracle, we are always looking at how the industry both consumer and enterprise is using data in mobile applications. Things like db size, access patterns, single/multi user (multiple apps accessing same db), speed of access required, record size/structure etc. are all important to understand. We are also very interested in the movement of data from the device to some backend repository.}
%Additionally, PI Kennedy will make use of the same resources in his efforts on incremental computation.
\subsection{Enabled Research For the PIs}
The PIs have a joint research project aimed at exposing \emph{uncertainty} in mobile computing~\cite{Challen:2015:MWE:2699343.2699361}. The project focuses on exposing new language primitives to the programmer to specify multiple implementation for
a given functionality allowing the system to pick which implementation to use at runtime. This allows the system to specialize software to a given hardware platform and more importantly to a given set of external
considerations (e.g. network connectivity, available sensors, etc.). Our proposed infrastructure will enable us to study two key aspects of uncertainty: (1) almost all mobile applications store user data and configuration parameters in
mobile databases, access to this data can have a profound impact on the behavior of an application, \PocketData{} will allow us to more readily study this aspect of mobile uncertainty; (2) the infrastructure powering our
The PIs have a joint research project aimed at exposing \emph{uncertainty} in mobile computing~\cite{Challen:2015:MWE:2699343.2699361}. The project focuses on exposing new language primitives to the programmer to specify multiple implementations of
system functionality allowing the system to pick which implementation to use at runtime. This allows the system to specialize software to a given hardware platform and more importantly to a given set of external
considerations (e.g. network connectivity, available sensors, etc.). Our proposed infrastructure will enable us to study two key aspects of uncertainty: (1) Almost all mobile applications store user data and configuration parameters in
mobile databasesand access to this data can have a profound impact on the behavior of an application. \PocketData{} will allow us to more readily study this aspect of mobile uncertainty; (2) The infrastructure powering our
runtime system for exposing uncertainty is built around a mobile database that stores possible choices the software system can make. \PocketData{} will allow us to optimize this database to reduce choice latency.
PIs Kennedy and Ziarek have a joint research project, Just-in-Time Data Structures (JITDs), focusing on adaptive indexing~\cite{kennedy2015just}.
@ -101,11 +102,7 @@ The level of variation in load and resource availability that occurs in \PocketD
As noted above, our proposed infrastructure will provide us with a benchmark workload that will help us to evaluate adaptive indexes under real-world conditions, rather than through purely synthetic workloads.
PI Kennedy is part of a collaborative research project with \textit{Shambhu Upadhyaya} (UB), \textit{Varun Chandola} (UB), \textit{Hung Ngo} (UB), and \textit{Long Nguyen} (UMich) that explores techniques for identifying insider attacks on databases (NSF-CNS-1409551).
Although the threat of insider attacks on mobile devices is minimal, the specific methodology behind the work involves summarizing query logs by clustering queries into groups of queries with similar ``intent.''
Although the threat of insider attacks on mobile devices may be minimal, the specific methodology behind the work involves summarizing query logs by creating clusters of queries with similar ``intent.''
The approach is showing promise for summarizing query logs from a corporate (banking) setting.
Having query logs from other settings like \PocketData{} would show that the approach can be generalized and may have applications beyond Insider Threat detection (for example to the design of index selection tools).
Having query logs from other settings like \PocketData{} would show that the approach can be generalized to domains other than Insider Threat detection (for example to the design of index selection tools).
If successful, these efforts could also contribute back to the \PocketData{} project, as a tool for quickly summarizing and clustering query logs would help to build out the visualization and benchmark design components of the proposed infrastructure.
\subsection{Enabled Research for the Broader Community}

View File

@ -1,22 +1,24 @@
% !TEX root = ../fullproposal.tex
The PIs have already reached out to the database and mobile systems communities for feedback on the current infrastructure, providing the PIs with an initial community and a preliminary source of
feedback on design, APIs, and features (details presented in Section~\ref{sec:research}).
feedback on design, APIs, and features. A detailed description can be found above, in Section~\ref{sec:research}. In summary, there is interest from researchers working on embedded databases, small-scale data management, personal sensing, query interfaces, and several closely related areas.
The \PocketData{} benchmark will serve as a focal point for the community's involvement by providing the community with a way to explore, discuss, and disseminate new data management use cases, and by offering a standard way to evaluate systems on those use cases.
Preliminary work on characterizing differences between \PocketData{} and traditional benchmarking infrastructures
was presented at the TPCTC symposium, which has allowed the PIs to solicit industrial feedback. The PIs are currently in first stage discussions with researchers from: VMware, Cisco, Google, Samsung, and Oracle.
was presented at the TPC-TC symposium, which has allowed the PIs to solicit industrial feedback. The PIs are currently in first stage discussions with researchers from VMware, Cisco, Google, Samsung, and Oracle regarding TPC involvement in the \PocketData{} benchmark.
From this starting point the PIs will also broaden their target communities to included researchers from programming languages as well as real-time and embedded systems.
The PIs believe that an expansions to expand the pervue of \PocketData{} to also include IoT, will broaden the utility of the proposed infrastructure.
Designing and optimizing specialized databases systems (typically stream databases) for IoT that are able to execute on embedded devices are
growing topics in the database community.
The PIs believe that expanding the pervue of \PocketData{} to also include IoT will broaden the utility of the proposed infrastructure.
IoT has recently renewed interest in databases systems that are specialized for IoT (stream databases, in-network query processors) and/or are capable of running on embedded devices (\textit{e.g.}, TinyDB~\cite{madden2005tinydb}).
As embedded processors become more capable, with
larger amounts of main memory available (e.g. Intel Galileo), there is a growing push from the embedded
and also from the real-time communities to explore including database
systems and query processing systems in small scale embedded systems. The PIs believe that emerging research in smart cities and personalized
larger amounts of main memory available (e.g. Intel's Edison platform), there is a growing push from the embedded
and also from the real-time communities to explore including databases and query processing in small scale embedded systems. The PIs believe that emerging research in smart cities and personalized
medical devices that aggregate and processes biometric data would benefit from \PocketData{}.
Domains specific languages (DSLs) are becoming more pervasive as solutions proposed by the programming language community as mechanisms
to both easy programmer effort for specialized systems, but to also greatly improve performance in time, space, and even energy consumption.
The PIs believe that \PocketData{} will be of interested to programming language researchers who work on DSLs for IoT solutions.
Domain specific languages (DSLs) are becoming more pervasive as mechanisms
to both amplify programmer effort for specialized systems and to greatly improve performance in time, space, and even energy consumption.
The PIs believe that \PocketData{} will be of interested to both database and programming language researchers in the IoT space.
\citedquote{Ashok Joshi (Senior Director; Oracle NoSQL Database, Berkeley DB, Database Mobile Server)}{I think synchronizing device data with server data is a very common occurrence in this space. As a simple example, you should be able to synchronize your `contacts' database on your cell phone with a server repository. Recently, Mike Brey, Raghu Nambiar and I proposed a ``strawman'' IoT benchmark~\cite{ashok2015benchmarking} --- I think extending your work to include large-scale data synchronization would be worth considering.}
\textbf{Evidence of Support}
@ -25,21 +27,23 @@ our current efforts toward building a community and the community's support for
\begin{figure}[th]
\begin{center}
\begin{tabular}{rl||c|l}
\begin{tabular}{rl||c|c}
\hline
researcher & affiliation & research area & enabled research \\ \hline
Stratos Idreos & \emph{Harvard} & Databases & Adaptive Indexes\\
Arnab Nandi & \emph{Ohio State} & Databases/HCI & Data Analytics for IoT\\
\textbf{researcher} & \textbf{affiliation} & \textbf{research area} & \textbf{enabled research} \\ \hline
Stratos Idreos & \emph{Harvard} & Databases & Adaptive indexes\\
Arnab Nandi & \emph{Ohio State} & Databases/HCI & Interactive analytics\\
Nasko Routnev & \emph{Ohio State}& Programming Languages & Mobile data flow analysis\\
Michael Brey & \emph{Oracle } & Databases/Mobile Systems & Embedded DB performance\\
Meikel Poess & \emph{Oracle} & Databases & Performance analytics \\
Raghunath Nambiar & \emph{Cisco} & Databases & Performance analytics \\
Reza Taheri & \emph{VMWare} & Databases & Performance analytics \\
Jens Dittrich & \emph{Saarland University}& Databases/Mobile Systems & Small data analytics \\
Sharad Agarwal & \emph{Microsoft}& Mobile Systems/Sensing & Mobile systems privacy \\ \hline
Christoph Koch & \emph{EPFL} & Databases/Theory & Database compilers\\
Ashok Joshi & \emph{Oracle} & Databases/IoT & IoT performance\\
Michael Brey & \emph{Oracle} & Databases/Mobile Systems & Embedded DB performance\\
Meikel Poess & \emph{Oracle} & Databases & Performance measurement \\
Raghunath Nambiar & \emph{Cisco} & Databases & Performance measurement \\
Reza Taheri & \emph{VMWare} & Databases & Performance measurement \\
Jens Dittrich & \emph{Saarland University}& Databases/Mobile Systems & Small-data analytics \\
Sharad Agarwal & \emph{Microsoft Research}& Mobile Systems/Sensing & Mobile systems performance \\ \hline
\end{tabular}
\end{center}
\caption{Enabled Research}
\caption{\textbf{Existing Community Interest in} \PocketData{}}
\label{tab:enabled}
\end{figure}

View File

@ -1,21 +1,27 @@
% !TEX root = ../fullproposal.tex
Our planning process will consist of a development effort and an outreach effort.
First and foremost, the centerpiece of our community-building efforts is the \PocketData{} benchmark.
In addition to acting as a standard for evaluating research efforts that overcome bottlenecks and limitations of existing technology, the benchmark will serve as a hub for the community to discuss and describe these limitations.
First and foremost, the centerpiece of our development side community-building efforts is the \PocketData{} benchmark.
In addition to acting as a standard for evaluating research efforts that overcome bottlenecks and limitations of existing technology, the \textit{modular} benchmark will serve as a hub for the community to discuss and describe these limitations.
Under the guidance of the PIs, the graduate student supported by this proposal will be responsible for developing a preliminary prototype benchmark.
The first version of this benchmark will stress bottlenecks identified in our preliminary study~\cite{pocketdata} by simulating the behavior of a small number of smartphone apps.
Using data and query logs derived from our preliminary study, we hope to have version one of the benchmark ready within 4-6 months.
The benchmark will be released and advertised over community mailing lists like DBWorld~\cite{dbworld}.
By this point, we expect to have expanded the \PocketData{} community through our outreach efforts.
After releasing the benchmark we will hold a 3 month community feedback process, allowing us to release version 2 of the benchmark based on community feedback before the end of the planning period.
After releasing the benchmark we will hold a 3 month community feedback process, allowing us to release version 2 of the benchmark based on community feedback before the end of the planning period.
Additionally we will pursue feedback from the IoT community to understand how \PocketData{} can be extended to meet the IoT community's needs. We envision that these
needs will vary depending on the aspect of IoT a given community is interested in (e.g. language runtime design vs. embedded databases). To avoid creating an infrastructure only suited to the needs
of a particular niche, we will solicit feedback from many sources.
In addition to the PocketData community, we will leverage interest from the Transaction Processing Council (TPC) in developing an embedded database benchmark.
The TPC represents one of the most prominant names in database benchmarking, and is responsible for benchmarks like TPC-C~\cite{tpcc}, TPC-H~\cite{tpch}, and TPC-DS~\cite{tpcds} that are canonical tools for evaluating research in databases.
After presenting our preliminary work at the TPC's annual symposium colocated with VLDB 2015, \textit{Raghunath Nambiar} (Cisco), \textit{Reza Taheri} (VMWare), and \textit{Meikel Poess} (Oracle) of the TPC expressed interest in helping us to develop \PocketData{} as an eventual TPC benchmark.
Although all PIs will be responsible for communicating with the TPC as a joint benchmark is fleshed out, PI Kennedy will act as a lead point of contact.
The TPC represents one of the most prominent names in database benchmarking, and is responsible for benchmarks like TPC-C~\cite{tpcc}, TPC-H~\cite{tpch}, and TPC-DS~\cite{tpcds} that are touchstones for evaluating research in databases.
After presenting our preliminary work at the TPC's annual symposium colocated with VLDB 2015, \textit{Raghunath Nambiar} (Cisco), \textit{Reza Taheri} (VMWare), and \textit{Meikel Poess} (Oracle) of the TPC expressed interest in helping us to develop \PocketData{} as an eventual TPC benchmark. The PIs hope to also participate in TPC discussions on IoT concerns. The TPC discussions will provide the PIs will both industry and
academic perspectives on both embedded databases as well as IoT. The PIs hope to leverage this information in the design of the proposed \PocketData{} infrastructure.
Although all PIs will be involved in communications with the TPC and its members, PI Kennedy will act as a lead point of contact.
Our outreach efforts will begin with poster sessions, tutorials, demos and/or short papers presented at prominent database conferences. One candidate is ICDE 2017, which takes place early in the planning period. PI Kennedy will coordinate efforts to perform a demonstration at a database conference to incite discussion and interest in \PocketData{} from the database community. PI Ziarek will coordinate efforts for a demonstration or poster presentation initially targeting SPLASH 2016 to reach out to the PL community, and PI Challen will coordinate efforts for a demonstration or poster presentation initially targeting MobiSys 2017 to reach out to the mobile systems community.
Towards the end of the first year of the proposal, the PIs will begin to develop a tutorial on embedded databases and/or plan for a \PocketData{} workshop.
To continue building our current community and to expand it to include IoT researchers, the PIs expect to travel to top conferences in a variety of fields.
Our outreach efforts will begin with poster sessions, tutorials, and demos presented at prominent database conferences. One candidate is ICDE 2017, which takes place early in the planning period. PI Kennedy will coordinate efforts to perform a demonstration at a database conference to incite discussion and interest in \PocketData{} from the database community. PI Ziarek will coordinate efforts for a demonstration or poster presentation initially targeting SPLASH 2016 to reach out to the PL community, and PI Challen will coordinate efforts for a demonstration or poster presentation initially targeting MobiSys 2017 to reach out to the mobile systems community.
At these conferences the PIs will network with researchers who work on IoT as well. In addition, there are many new conferences focusing on IoT that are emerging. The PIs expect to attend
IoTA, IoTDI, and WF-IoT. Towards the end of the first year of the proposal, the PIs will begin to develop a tutorial on embedded databases and plan for a \PocketData{} workshop.
The PIs will submit a \textbf{CI-NEW} proposal for \PocketData{} in Fall of 2017, approximately 14 months after the start of the planning proposal.
The PIs will submit a \textbf{CI-NEW} proposal for \PocketData{} in Fall of 2017, approximately 15 months after the start of the planning proposal.

View File

@ -1,10 +1,10 @@
% !TEX root = ../fullproposal.tex
With 2 billion smartphones in the world and more being added every day, mobile platforms together form the most pervasive distributed systems on the planet.
People are increasingly relying on smartphones to manage their lives, from contacts and todo lists to their health, their homes, and the contents of their wallets.
This proliferation of data-driven smartphone apps is driving a need to create more, better, faster, more user-friendly, and more power-aware techniques for managing their data.
With 2 billion smartphones in the world and more being added every day, mobile platforms together form the most pervasive distributed system on the planet.
People are increasingly relying on smartphones to manage their lives, from contacts and todo lists, to their health, their homes, and the contents of their wallets.
This proliferation of data-driven smartphone apps is causing a need for more, faster, more user-friendly, and more power-aware techniques for managing data on smartphones and embedded devices.
To meet the challenges of this new frontier in data management, it is critical that we begin understand how smartphone apps store and retrieve structured state and establish standards for evaluating potential advances based on this understanding. Our proposal lays the groundwork for research on pocket-scale data management. We have interest from the Transaction Processing Council for our proposed benchmark, and even now several members of the database, systems, and programming language communities have expressed interest in the resources we propose to offer.
To meet the challenges of this new frontier in data management, it is critical that we begin understand how smartphone apps store and retrieve structured state and establish standards for evaluating potential advances based on this understanding. Our proposal lays the groundwork for research on pocket-scale data management. We have interest from the Transaction Processing Council for our proposed benchmark, and even before the planning stage, several members of the database, systems, and programming language communities have expressed interest in the resources we propose to offer.
In addition to supporting research in a critical area, this proposal will support one graduate student during the planning phase and up to two graduate students in later phases, resulting in between one and two PhD theses. We anticipate that the proposed work may also lead to one or two MS theses, and if funded, plan to apply for an REU grant for this proposal.
The resources created by this proposal will also be integrated into courses taught by the PIs; This has already happened: PIs Kennedy and Ziarek co-taught a project-oriented course entitled ``CSE-662: Languages and Runtimes for Big Data.'' The course included material related to \PocketData{} research, and three of the seven groups in the course worked on projects based on \PocketData{} and the Internet of Things.
In addition to supporting research in a critical area, this proposal will support one graduate student during the planning phase and up to two graduate students in later phases, contributing to between one and two PhD theses. We anticipate that the proposed work may also lead to one or two MS theses, and if funded, plan to apply for an REU supplement for this proposal.
The resources created by this proposal will also be integrated into courses taught by the PIs, a process that has already started: PIs Kennedy and Ziarek recently co-taught a project-oriented course entitled ``CSE-662: Languages and Runtimes for Big Data.'' The course included material related to \PocketData{} research, and three of the seven groups in the course worked on projects based on \PocketData{} and the Internet of Things.

View File

@ -0,0 +1,96 @@
A. Collaborators for Oliver Kennedy; SUNY Buffalo; PI
1. Sumit Agarwal; Unknown
2. Yanif Ahmad; Johns Hopkins University
3. Jerry Antony Ajay; University at Buffalo
4. Daniel Bellinger; Global Foundries
5. Geoffrey Challen; University at Buffalo
6. Sharath Chandrashekhara; University at Buffalo
7. Jan Chomicki; University at Buffalo
8. Nick DiRienzo; University at Buffalo
9. Ronny Fehling; Oracle
10. Dieter Gawlick; Oracle
11. Boris Glavic; Illinois Inst. Tech.
12. Zhen Hua-Liu; Oracle
13. Kyungho Jeon; University at Buffalo
14. Steven Y. Ko; University at Buffalo
15. Christoph Koch; EPFL
16. Steve Lee; Microsoft Corp.
17. Charles Loboz; Microsoft Corp.
18. Daniel Lupei; EPFL
19. Anudipa Maiti; University at Buffalo
20. Shikhar Mehra; University at Buffalo
21. Niccolò Meneghetti; University at Buffalo
22. Arindam Nandi; University at Buffalo
23. Anandatirtha Nandugudi; University at Buffalo
24. Suman Nath; Microsoft Research
25. Milos Nicolic; EPFL
26. Andres Nötzli; Stanford
27. Amir Shaikhana; EPFL
28. Sriram Shantharam; University at Buffalo
29. Feng Shen; University at Buffalo
30. Jinghao Shi; University at Buffalo
31. Slawek Smyl; Microsoft Corp.
32. Guru Prasad Srinivasa; University at Buffalo
33. Ankur Upadhyay; FactSet
34. Ying Yang; University at Buffalo
35. Lukasz Ziarek; University at Buffalo
B. Collaborators for Lukasz Ziarek; SUNY Buffalo; Co-PI
1. Umut Acar; CMU
2. Ali-Reza Adl-Tabatabai; Intel
3. Ethan Blanton; Fiji Systems Inc.
4. Patrick Eugster; Purdue University
5. Mathew Fluet; Rochester
6. Christoph Hoffman; Purdue University
7. Anthony Hosking; Purdue University
8. Suresh Jagannathan; Purdue University
9. Bharat Jayaraman; University at Buffalo
10. Oliver Kennedy; University at Buffalo
11. Steve Ko; University at Buffalo
12. Sree Harsha Konduri; Amazon
13. Amit Kulkarni; University at Buffalo
14. Zihuan Li; Purdue University
15. Vijay Menon; Google
16. Filip Pizlo; Apple Inc.
17. Jennifer Sartor; Ghent University
18. Tatiana Shpeisman; Intel
19. KC Sivaramakrishnan; Purdue University
20. Sam Tobin-Hochstadt; Indiana University
21. Jan Vitek; Purdue University, Fiji Systems Inc.
22. Adam Welc; Oracle
23. Yin Yan; University at Buffalo
C. Collaborators for Geoffrey Challen; SUNY Buffalo; Co-PI
1. Sharad Agarwal; Microsoft Research
2. Nilanjan Banerjee; University of Maryland
3. Milind Buddhikot; Bell Labs
4. Yih-Farn Chen; AT&T Labs Research
5. Murat Demirbas; University at Buffalo
6. Prabal Dutta; University of Michigan
7. Wen Dong; University at Buffalo
8. Carla Schlatter Ellis; Duke University
9. Shyamnath Gollakota; University of Washington
10. Michelle Gong; Google
11. Marco Gruteser; Rutgers University
12. Mark Hempstead; Drexel University
13. Oliver Kennedy; University at Buffalo
14. Robin Kravets; University of Illinois, Urbana-Champaig
15. Steven Y. Ko; University at Buffalo
16. Tevfik Kosar; University at Buffalo
17. Dimitrios Koutsonikolas; University at Buffalo
18. Branislav Kusy; CSIRO
19. Eyal de Lara; University of Toronto
20. James Martin; Clemson University
21. Tommaso Melodia; Northeastern University
22. Emiliano Miluzzo; Apio Systems
23. Iqbal Mohomed; IBM Research
24. James Pepin; Clemson University
25. Matthai Philipose; Microsoft Research
26. Sami Rollins; University of San Francisco
27. Margo Seltzer; Harvard University
28. Ivan Seskar; Rutgers University
29. Jacob Sorber; Clemson University
30. Aaron Striegel; Notre Dame
31. Khai N. Truong; University of Toronto
32. Chunming Qiao; University at Buffalo
33. Kuangching Wang; Clemson University
34. Lin Zhong; Rice University
35. Lukasz Ziarek; University at Buffalo

Binary file not shown.

View File

@ -27,7 +27,7 @@ Computer Science and Engineering (CSE) data storage facilities include vulcan, a
CSE faculty compute systems include castor, a Sun Blade 1000; citrix[1-3], a load-balanced Citrix farm of Dell PowerEdge 2650 servers; the-who, a Sun Fire V20z desktop virtualization server; benatar, a virtualized general compute server; and the underground cluster, a 4-node compute cluster comprised of Dell 1425s. CSE Faculty also have use of all CSE student systems (below).
CSE student compute systems include timberlake, a Dell PowerEdge R600 compute server; metallica, a Dell PowerEdge R500 compute server; pollux, a Sun Sparc enterprise T5220 compute server; coldplay, a Sun Fire V20z compute server; fork, a Sun Fire V20z dedicated to the Operating Systems course; nickelback, a Dell PowerEdge 1950 desktop virtualization server; dragonforce, a Dell PowerEdge R720 desktop virtualization server; styx, a Dell PowerEdge R400 desktop virtualization server.
CSE student compute systems include a Dell PowerEdge R600 compute server; a Dell PowerEdge R500 compute server; a Sun Sparc enterprise T5220 compute server; a Sun Fire V20z compute server; a Sun Fire V20z dedicated to the Operating Systems course; a Dell PowerEdge 1950 desktop virtualization server; a Dell PowerEdge R720 desktop virtualization server; and a Dell PowerEdge R400 desktop virtualization server.
CSE research groups occupy 6628 square feet of research lab space ranging from secure, monitored, temperature-controlled data centers to specialized experimental facilities. CSE instructional labs occupy 4096 square feet, each configured to serve the characteristic needs of the courses they host. The Patricia Eberlein is the CSE general student computing lab which occupies 1056 square feet.
@ -53,4 +53,6 @@ The DB/PL lab at the University at Buffalo maintains additional resources specif
The DB/PL lab at University at Buffalo maintains additional resources for internal use, including multiple x86 workstations, laptops, and low-power development boards (Raspberry Pis and Intel Galileos) for general student and PI use. Server infrastructure for the lab includes an application server supporting a lab project management system, teaching support applications, and trial deployments of lab-developed software, an Oracle database server testbed, a 32-core and a 64-core AMD Opteron and a 12-core Intel Xeon-based testbed server, as well as a 16-node Hadoop cluster shared with 3 other labs. Lab workstations and laptops are configured with OSX or Windows. Servers are configured with Redhat Enterprise Linux.
PI Challen is collaborating on this project without support for the duration of the planning phase. He will apply his expertise in mobile systems and operating systems, and will assist in advising students working on this project.
\end{document}

Binary file not shown.

View File

@ -23,7 +23,7 @@
~~\\
\section*{Senior Personnel}
PIs Ziarek is budgeted half a month of summer salary. PIs Kennedy and Challen are each budgeted a quarter-month of summer salary. PI Kennedy will apply his expertise and experience in the areas of databases, incremental computation, web applications, and security. PI Ziarek will apply his expertise and experience in the areas of programming languages, distributed computation, and security. PI Challen will participate without support and will apply his expertise in mobile systems and operating systems. All three PIs will take responsibility for (1) advising and coordinating student-driven efforts as described below, (2) reaching out to their respective research communities to build interest in research on \PocketData{}, (3) organizing a \PocketData{} workshop.
PIs Ziarek is budgeted half a month of summer salary. PIs Kennedy and Challen are each budgeted a quarter-month of summer salary. PI Kennedy will apply his expertise and experience in the areas of databases, incremental computation, web applications, and security. PI Ziarek will apply his expertise and experience in the areas of programming languages, distributed computation, and security. PI Challen will apply his expertise in mobile systems and operating systems. All three PIs will take responsibility for (1) advising and coordinating student-driven efforts as described below, (2) reaching out to their respective research communities to build interest in research on \PocketData{}, (3) organizing a \PocketData{} workshop.
\section*{Other Personnel}
Funding is requested for one computer science graduate student assistant for one year. The two-semester and summer salary for the student is \$22,000.
@ -40,11 +40,11 @@ N/A
Travel may include trips to NSF meetings, conferences and workshops, and any PI meetings. Major conferences such as SIGMOD, VLDB, POPL, PLDI, and ICDE, typically last 4-5 days, and are located both domestically and internationally. Workshops are often affiliated with major conferences, and attendees frequently attend both. We have budgeted for up to 3 conference visits.
\noindent \textbf{Domestic Conferences} As an example of a domestic conference, we use SIGMOD 2016 being held in San Fransisco, CA. We anticipate a lodging cost of \$99 per night and a \$59 perdiem. The subtotal for 2 attendees over 5 nights is \$2,310. We expect airfare of \$630 and average conference registration fees of \$600 per person for a total domestic travel cost of \$4000.
\noindent \textbf{Domestic Conferences} As an example of a domestic conference, we use SIGMOD 2016 being held in San Fransisco, CA. We anticipate a lodging cost of \$99 per night and a \$59 perdiem. The subtotal for 4 attendees over 5 nights is \$2,310. We expect airfare of \$630 and average conference registration fees of \$600 per person for a total domestic travel cost of \$8000.
\noindent \textbf{Other Domestic Travel} We have budgeted an additional \$2000 for travel to NSF PI meetings and for outreach efforts. Outreach efforts include travel support to allow the PIs to visit potential community members, and travel support for community members to visit UB and present on their work.
\noindent \textbf{Foreign Conferences} As an example of a foreign conference, we use ICDE 2016 being held in Helsinki, Finland. We anticipate a lodging cost of \$200 per person, and a \$260 perdiem. The subtotal for 1 attendee over 5 nights is \$5,200. We expect airfare of \$1000 and average conference registration fees of \$700 per person for a total domestic travel cost of \$4,000.
\noindent \textbf{Foreign Conferences} As an example of a foreign conference, we use ICDE 2016 being held in Helsinki, Finland. We anticipate a lodging cost of \$200 per person, and a \$260 perdiem. The subtotal for 2 attendees over 5 nights is \$5,200 per person. We expect airfare of \$1000 and average conference registration fees of \$700 per person for a total international travel cost of \$8,000.
\section*{Other Direct Costs}
@ -55,8 +55,8 @@ The negotiated rate with the Department of Computer Science and Engineering for
\$1,533 is requested per year for Materials and Supplies to purchase desktop computers for the graduate research students and faculty working on this project. The computers will be used for code development, experimental evaluation, paper writing and typesetting and other efforts related to this project.
\subsection*{Other}
Tuition is budgeted at the standard University at Buffalo rates for the Graduate Research Assistant at 9 credit hours per GRA per semester.
The anticipated out-of-state student tuition is \$18,144 for one student for one year.
Tuition is budgeted at the standard University at Buffalo rates for a senior Graduate Research Assistant at 3 credit hours per semester.
The anticipated out-of-state student tuition is \$6,048 for one student for one year.
\subsection*{Indirect Costs}
Indirect cost rates are based on the applicable federally negotiated rates published at \url{http://www.research.buffalo.edu/sps/about/rates.cfm}.

View File

@ -0,0 +1,4 @@
-- List of Personnel --
1. Oliver Kennedy; University at Buffalo, SUNY; PI
2. Lukasz Ziarek; University at Buffalo, SUNY; Co-PI
3. Geoffrey Challen; University at Buffalo, SUNY; Co-PI

15
supplements/summary.txt Normal file
View File

@ -0,0 +1,15 @@
---- Overview ----
A common requirement of the 4 million apps running on the world's 2 billion smartphones is persisting structured data. Embedded databases such as SQLite are heavily used for this purpose, with a single typical Android smartphone averaging more than two SQLite queries per second. The fundamental challenges experienced by mobile apps using embedded databases - minimizing energy consumption, latency, and disk utilization - are familiar ground for database researchers. However, in spite of active research in the areas of smartphone query processing and embedded databases, the specific tradoffs introduced by this new domain of pocket-scale data are far less well understood.
Key challenges in this space include the lack of publicly available data regarding smartphone database usage patterns in the real world, concrete high-level optimization targets, and tools and methodologies for reliably measuring database performance along axes relevant to smartphone apps. We propose infrastructure support and community-building efforts that will both improve existing research on embedded databases, and help to encourage new and innovative research in the area. This infrastructure support will take the form of real-world smartphone usage traces, a benchmarking suite for pocket-scale data, visualization tools, and instrumentation for mobile embedded databases.
Keywords: databases, smartphones, benchmarking
---- Intellectual Merit ----
The proposed infrastructure will be used by researchers from multiple academic and industrial institutions to support of new and existing research. Interest has already been expressed by researchers working on Adaptive Data Systems, Small Data Analytics, Gestural Query Processing, Data-Flow Analysis, Embedded Databases, Database Benchmarking, and others.
---- Broader Impacts ----
With 2 billion smartphones in the world, people are increasingly relying on smartphones to manage their lives. The proliferation of data-driven smartphone apps is driving a need to create more, better, faster, more user-friendly, and more power-aware techniques for managing their data. It is critical that we begin understand how smartphone apps interact with their data. Our proposal lays the groundwork for research on pocket-scale data management. We have interest from the Transaction Processing Council for our proposed benchmark, and even now several members of the database, systems, and programming language communities have expressed interest in the resources we propose to offer. In addition to supporting research in a critical area, this proposal will support one graduate student during the planning phase and up to two graduate students in later phases, resulting in between one and two PhD Theses.