Adding text for fig 13,14

Tweaks to trim us back down to 16 pages.
2015-06-20 18:09:27 -04:00 · 2015-06-20 18:09:27 -04:00 · 2c5f2bde90
parent 7edfd2fdd6
commit 2c5f2bde90
8 changed files with 139 additions and 80 deletions
--- a/main.bib
+++ b/main.bib
@ -2,7 +2,7 @@
 %% http://bibdesk.sourceforge.net/


-%% Created for Oliver Kennedy at 2015-06-20 15:03:07 -0400 
+%% Created for Oliver Kennedy at 2015-06-20 18:08:34 -0400 


 %% Saved with string encoding Unicode (UTF-8) 
@ -12,9 +12,9 @@
@article{box2007linq,
 	Author = {Box, Don and Hejlsberg, Anders},
 	Date-Added = {2015-06-20 19:01:17 +0000},
-	Date-Modified = {2015-06-20 19:01:17 +0000},
+	Date-Modified = {2015-06-20 22:04:10 +0000},
 	Journal = {MSDN Developer Centre},
-	Title = {LinQ: .NET language-integrated query},
+	Title = {{LinQ}: {.NET} language-integrated query},
 	Volume = {89},
 	Year = {2007}}

@ -36,19 +36,17 @@

@inproceedings{phonelab,
 	Acmid = {2536718},
-	Address = {New York, NY, USA},
 	Articleno = {4},
 	Author = {Nandugudi, Anandatirtha and Maiti, Anudipa and Ki, Taeyeon and Bulut, Fatih and Demirbas, Murat and Kosar, Tevfik and Qiao, Chunming and Ko, Steven Y. and Challen, Geoffrey},
-	Booktitle = {Proceedings of First International Workshop on Sensing and Big Data Mining},
+	Booktitle = {SenseMine},
+	Date-Modified = {2015-06-20 22:08:33 +0000},
 	Doi = {10.1145/2536714.2536718},
 	Isbn = {978-1-4503-2430-4},
 	Keywords = {Smartphones, mobile devices, testbed},
 	Location = {Roma, Italy},
 	Numpages = {6},
 	Pages = {4:1--4:6},
-	Publisher = {ACM},
-	Series = {SENSEMINE'13},
-	Title = {PhoneLab: A Large Programmable Smartphone Testbed},
+	Title = {{PhoneLab}: A Large Programmable Smartphone Testbed},
 	Url = {http://doi.acm.org/10.1145/2536714.2536718},
 	Year = {2013},
 	Bdsk-Url-1 = {http://doi.acm.org/10.1145/2536714.2536718},
@ -99,18 +97,15 @@

@inproceedings{kang2013xftl,
 	Acmid = {2465326},
-	Address = {New York, NY, USA},
 	Author = {Kang, Woon-Hak and Lee, Sang-Won and Moon, Bongki and Oh, Gi-Hwan and Min, Changwoo},
-	Booktitle = {Proceedings of the 2013 ACM SIGMOD International Conference on Management of Data},
+	Booktitle = {SIGMOD},
+	Date-Modified = {2015-06-20 22:06:12 +0000},
 	Doi = {10.1145/2463676.2465326},
 	Isbn = {978-1-4503-2037-5},
 	Keywords = {copy-on-write, flash storage devices, flash translation layer, sqlite, transactional atomicity},
 	Location = {New York, New York, USA},
 	Numpages = {12},
-	Pages = {97--108},
-	Publisher = {ACM},
-	Series = {SIGMOD '13},
-	Title = {X-FTL: Transactional FTL for SQLite Databases},
+	Title = {{X-FTL}: {Transactional} {FTL} for {SQLite} Databases},
 	Url = {http://doi.acm.org/10.1145/2463676.2465326},
 	Year = {2013},
 	Bdsk-Url-1 = {http://doi.acm.org/10.1145/2463676.2465326},
@ -120,13 +115,13 @@
 	Acmid = {2535499},
 	Address = {Berkeley, CA, USA},
 	Author = {Jeong, Sooman and Lee, Kisung and Lee, Seongjin and Son, Seoungbum and Won, Youjip},
-	Booktitle = {Proceedings of the 2013 USENIX Conference on Annual Technical Conference},
+	Booktitle = {USENIX ATC},
+	Date-Modified = {2015-06-20 22:03:04 +0000},
 	Location = {San Jose, CA},
 	Numpages = {12},
 	Pages = {309--320},
 	Publisher = {USENIX Association},
-	Series = {USENIX ATC'13},
-	Title = {I/O Stack Optimization for Smartphones},
+	Title = {{I/O} Stack Optimization for Smartphones},
 	Url = {http://dl.acm.org/citation.cfm?id=2535461.2535499},
 	Year = {2013},
 	Bdsk-Url-1 = {http://dl.acm.org/citation.cfm?id=2535461.2535499}}
@ -134,6 +129,7 @@
@incollection{kim2012androbench,
 	Author = {Kim, Je-Min and Kim, Jin-Soo},
 	Booktitle = {Frontiers in Computer Education},
+	Date-Modified = {2015-06-20 22:06:31 +0000},
 	Doi = {10.1007/978-3-642-27552-4_89},
 	Editor = {Sambath, Sabo and Zhu, Egui},
 	Isbn = {978-3-642-27551-7},
@ -142,7 +138,7 @@
 	Pages = {667-674},
 	Publisher = {Springer Berlin Heidelberg},
 	Series = {Advances in Intelligent and Soft Computing},
-	Title = {AndroBench: Benchmarking the Storage Performance of Android-Based Mobile Devices},
+	Title = {{AndroBench}: Benchmarking the Storage Performance of {Android}-Based Mobile Devices},
 	Url = {http://dx.doi.org/10.1007/978-3-642-27552-4_89},
 	Volume = {133},
 	Year = {2012},
@ -150,25 +146,27 @@

@misc{ahmed2009mobigen,
 	Author = {Ahmed, Sabbir},
+	Date-Modified = {2015-06-20 22:04:00 +0000},
 	Howpublished = {http://arrow.monash.edu.au/hdl/1959.1/109933},
-	Title = {MobiGen: a mobility generator for environment aware mobility model},
+	Title = {{MobiGen}: a mobility generator for environment aware mobility model},
 	Year = {2009}}

@article{madden2005tinydb,
 	Acmid = {1061322},
 	Address = {New York, NY, USA},
 	Author = {Madden, Samuel R. and Franklin, Michael J. and Hellerstein, Joseph M. and Hong, Wei},
+	Date-Modified = {2015-06-20 22:03:22 +0000},
 	Doi = {10.1145/1061318.1061322},
 	Issn = {0362-5915},
 	Issue_Date = {March 2005},
-	Journal = {ACM Trans. Database Syst.},
+	Journal = {ACM TODS},
 	Keywords = {Query processing, data acquisition, sensor networks},
 	Month = mar,
 	Number = {1},
 	Numpages = {52},
 	Pages = {122--173},
 	Publisher = {ACM},
-	Title = {TinyDB: An Acquisitional Query Processing System for Sensor Networks},
+	Title = {{TinyDB}: An Acquisitional Query Processing System for Sensor Networks},
 	Url = {http://doi.acm.org/10.1145/1061318.1061322},
 	Volume = {30},
 	Year = {2005},
@ -179,16 +177,15 @@
 	Acmid = {1807152},
 	Address = {New York, NY, USA},
 	Author = {Cooper, Brian F. and Silberstein, Adam and Tam, Erwin and Ramakrishnan, Raghu and Sears, Russell},
-	Booktitle = {Proceedings of the 1st ACM Symposium on Cloud Computing},
+	Booktitle = {SOCC},
+	Date-Modified = {2015-06-20 22:05:09 +0000},
 	Doi = {10.1145/1807128.1807152},
 	Isbn = {978-1-4503-0036-0},
 	Keywords = {benchmarking, cloud serving database},
 	Location = {Indianapolis, Indiana, USA},
 	Numpages = {12},
-	Pages = {143--154},
 	Publisher = {ACM},
-	Series = {SoCC '10},
-	Title = {Benchmarking Cloud Serving Systems with YCSB},
+	Title = {Benchmarking Cloud Serving Systems with {YCSB}},
 	Url = {http://doi.acm.org/10.1145/1807128.1807152},
 	Year = {2010},
 	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1807128.1807152},
@ -196,22 +193,20 @@

@inproceedings{lam2009healthmonitoring,
 	Author = {Lam, S.C.K. and Kai Lap Wong and Kwok On Wong and Wenxiu Wong and Wai Ho Mow},
-	Booktitle = {Information, Communications and Signal Processing, 2009. ICICS 2009. 7th International Conference on},
+	Booktitle = {ICICS},
+	Date-Modified = {2015-06-20 22:07:40 +0000},
 	Doi = {10.1109/ICICS.2009.5397628},
 	Keywords = {biosensors;body area networks;health care;patient monitoring;personal area networks;plethysmography;wireless sensor networks;battery life;biosignal processing;body area sensor network;closed loop control capability;healthcare;personal health monitoring;photoplethysmographic biosensors;portability;smartphone centric platform;upgradability;wireless wearable biosensors;Aging;Application software;Biomedical monitoring;Biosensors;Costs;Medical services;Operating systems;Smart phones;Wearable sensors;Wireless sensor networks;COTS wearable biosensors;Health monitoring;body area sensor network;pervasive computing},
 	Month = {Dec},
-	Pages = {1-7},
 	Title = {A smartphone-centric platform for personal health monitoring using wireless wearable biosensors},
 	Year = {2009},
 	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICICS.2009.5397628}}

@inproceedings{klasnja2009using,
 	Author = {Klasnja, Predrag and Consolvo, Sunny and McDonald, David W and Landay, James A and Pratt, Wanda},
-	Booktitle = {AMIA Annual Symposium Proceedings},
-	Organization = {American Medical Informatics Association},
-	Pages = {338},
+	Booktitle = {AMIA},
+	Date-Modified = {2015-06-20 22:07:22 +0000},
 	Title = {Using mobile \& personal sensing technologies to support health behavior change in everyday life: lessons learned},
-	Volume = {2009},
 	Year = {2009}}

@article{campbell2008peoplesensing,
--- a/paper.tex
+++ b/paper.tex
@ -60,9 +60,9 @@ Website: \texttt{http://odin.cse.buffalo.edu/research/}
 \label{sec:pocketdata}
 \input{sections/6-pocketdata}

-\section{Conclusions and Future Work}
-\label{sec:conc}
-\input{sections/7-conclusions}
+% \section{Conclusions and Future Work}
+% \label{sec:conc}
+% \input{sections/7-conclusions}


 \bibliographystyle{plain}
--- a/sections/1-introduction.tex
+++ b/sections/1-introduction.tex
@ -2,7 +2,7 @@
 Since the introduction of the smartphone, mobile computing has become pervasive in our society, with one in every five people in the world owning a smartphone~\cite{phones}. Mobile devices, however,
 have evolved far beyond the stereotypical smartphone and tablet, and are now employed in a wide variety of domains. Of the currently available mobile systems, Android has seen the most widespread deployment outside of the consumer electronics market. Android's open source nature has prompted its ubiquitous adoption in sensing, medical, command and control, robotics, and automotive applications.

-For each of these domains, Android supports specific client-side applications that often utilize application-resident, or embedded databases such as SQLite~\cite{sqlite} to provide necessary functionality. Even Android
+For each of these domains, Android supports specific client-side applications that often utilize an application-resident, or \textit{embedded database} called SQLite~\cite{sqlite} for persistent state and data analysis. Even Android
 itself, specifically the framework layer, makes use of embedded databases.
 The way in which mobile applications use databases, is however, rather different from traditional server farms and desktop applications that focus on ``big data.''
 Based on the experiments we present in this paper, SQLite on \textit{one} average Android smart-phone satisfies over 178 thousand database requests \textit{per day}, or about 2 requests every second. 
@ -19,9 +19,9 @@ workloads.
 \item An outline of workload characteristics and other desiderata for a proposed TPC-MOBILE benchmark.
 \end{itemize}

-The remainder of the paper is organized as follows.
-We present our motivation observations in Section~\ref{sec:overview}. We outline our experimental setup and the process of obtaining data from 11 primary-use smart phones in
-Section~\ref{sec:experimental}. We discuss relevant
-workloads based on query complexity, and database activity in Section~\ref{sec:queryc} and Section~\ref{sec:dba} respectively. Based on this
-detailed analysis and a review of related work, we sketch the requirements for a TPC-MOBILE benchmark in Section~\ref{sec:pocketdata}.  We wrap up with conclusions in Section~\ref{sec:conc}.
+% The remainder of the paper is organized as follows.
+% We present our motivation observations in Section~\ref{sec:overview}. We outline our experimental setup and the process of obtaining data from 11 primary-use smart phones in
+% Section~\ref{sec:experimental}. We discuss relevant
+% workloads based on query complexity, and database activity in Section~\ref{sec:queryc} and Section~\ref{sec:dba} respectively. Based on this
+% detailed analysis and a review of related work, we sketch the requirements for a TPC-MOBILE benchmark in Section~\ref{sec:pocketdata}.  We wrap up with conclusions in Section~\ref{sec:conc}.

--- a/sections/3-experimental.tex
+++ b/sections/3-experimental.tex
@ -2,7 +2,7 @@

 \PhoneLab{}~\cite{phonelab} is a public smartphone platform testbed operated
 at the University at Buffalo (UB). Approximately 200~UB students, faculty,
-and staff use instrumented LG Nexus~5 smartphones~\cite{nexus5} as their
+and staff use instrumented LG Nexus~5 smartphones as their
 primary device and receive discounted service in return for providing data to
 smartphone experiments. \PhoneLab{} participants are balanced between genders
 and distributed across ages, and thus representative of the broader
--- a/sections/4-queryc.tex
+++ b/sections/4-queryc.tex
@ -91,18 +91,17 @@ Figure~\ref{fig:spjsByWidthAndWhere} shows queries of this class, broken down by
 \texttt{SELECT R.A FROM R, S WHERE R.B = S.B AND S.C = 10}
 This query would have a join width of 2 (\texttt{R}, \texttt{S}) and 2 conjunctive terms (\texttt{R.B = S.B} and \texttt{S.C = 10}).  For uniformity, \texttt{NATURAL JOIN} and \texttt{JOIN ON} (\textit{e.g.}, \texttt{SELECT R.A from R JOIN S ON B}) expressions appearing in the \texttt{FROM} clause are rewritten into equivalent expressions in the \texttt{WHERE} clause.

+The first column of this table indicates queries to a single relation.  Just over 1 million queries were full table scans (0 where clauses), and just under 27 million queries involved only a single conjunctive term.  This latter class constitutes the bulk of the simple query workload, at just over 87 percent of the simple look-up queries.  Single-clause queries appear to be the norm.  Recall that an N-way equi-join requires N-1 conjunctive terms; Spikes occur in the number of queries with one more term than strictly required to perform a join, suggesting a constraint on at least one relation.
+
 \begin{figure}
 \centering
-{\small
+{\footnotesize
 \input{tables/sp_trivial_condition_breakdown}
 }
 \caption{\textbf{The \texttt{WHERE} clause structure for single-tabled simple lookup queries with a single conjunctive term in the \texttt{WHERE} clause.}}
 \label{fig:singleClauseExpressions}
 \end{figure} 

-The first column of this table indicates queries to a single relation.  Just over 1 million queries were full table scans (0 where clauses), and just under 27 million queries involved only a single conjunctive term.  This latter class constitutes the bulk of the simple query workload, at just over 87 percent of the simple look-up queries.  Single-clause queries appear to be the norm.  Recall that an N-way equi-join requires N-1 conjunctive terms; Spikes occur in the number of queries with one more term than strictly required to perform a join, suggesting a constraint on at least one relation.
-
-
 Narrowing further, we examine simple look-up queries referencing only a single source table and a single conjunctive term in the WHERE clause.  Figure~\ref{fig:singleClauseExpressions} summarizes the structure of the predicate that appears in each of these queries.  In this figure, constant terms (Const) are any primitive value term (\textit{e.g.}, a quoted string, an integer, or a float), or any JDBC-style parameter ($?$).  For simple relational comparators, we group together \textit{in}equalities (\textit{i.e.}, $<$, $\leq$, $>$, $\geq$ and $\neq$) under the symbol $\theta$, and explicitly list equalities.  Other relational operators such as \texttt{LIKE}, \texttt{BETWEEN}, and \texttt{IN} are also seen with some frequency.  However, the majority of look-ups (85\% of all simple look-ups) are exact match look-ups.  
 Not surprisingly, this suggests that the most common use-case for SQLite is as a relational key-value store.  As we show shortly through a per-app analysis of the data (Section~\ref{sec:select:perapp}), 24 out of the 179 apps that we encountered posed no queries other than exact look-ups and full table scans.  

@ -114,7 +113,7 @@ Figure~\ref{fig:allSelectConditionBreakdown} shows a similar breakdown for all 3

 \begin{figure}
 \centering
-{\small
+{\footnotesize
 \input{tables/select_condition_breakdown}
 }
 \caption{\textbf{WHERE clause expression structures, and the number of SELECT queries in which the structure appears as a conjunctive clause.}}
@ -127,7 +126,7 @@ App developers make frequent use of SQLite's dynamic typing: Where clauses inclu

 \begin{figure}
 \centering
-{\small
+{\footnotesize
 \input{tables/select_functions}
 }
 \caption{\textbf{Functions appearing in SELECT queries by number of times the function is used.}}
@ -189,7 +188,7 @@ Write statements, \texttt{INSERT}, \texttt{INSERT OR REPLACE} (here abbreviated

 \begin{figure}
 \centering
-{\small
+{\footnotesize
 \input{tables/delete_condition_breakdown}
 }
 \caption{\textbf{\texttt{WHERE} clause expression structures, and the number of \texttt{DELETE} statements in which the structure appears.}}
@ -204,24 +203,27 @@ This suggests extensive use of \texttt{DELETE} as a form of garbage-collection o

 \subsubsection{\texttt{UPDATE} Statements}

-\begin{figure}
-\centering
-{\small
-\input{tables/update_condition_breakdown}
-}
-\caption{\textbf{\texttt{WHERE} clause expression structures, and the number of \texttt{UPDATE} statements in which the structure appears.}}
-\label{fig:allUpdateConditionBreakdown}
-\end{figure}

 Slightly over 1 million statements executed by SQLite over the course of the month were \texttt{UPDATE} statements.  Figure~\ref{fig:allUpdateConditionBreakdown} breaks down the predicates used to select rows to be updated.  Virtually all \texttt{UPDATE} statements involved an exact look-up.  Of the million updates, 28 thousand did not include an exact look-up.  

 193 of the \texttt{UPDATE} statements relied on a nested \texttt{SELECT} statement as part of their \texttt{WHERE} clause, including 56 that involved 2 levels of nesting.  Of the 193 \texttt{UPDATE}s with nested subqueries, 25 also involved aggregation.  

-Although the \texttt{WHERE} clause of the updates included a variety of expressions, \textit{every single setter} in every \texttt{UPDATE} statement in the trace assigned a constant value, as in the following statement:
-\begin{verbatim}
-UPDATE ScheduledTaskProto SET value=?,key=?,sortingValue=? WHERE key = ?;
-\end{verbatim}
-Not a single \texttt{UPDATE} expression attempted to compute new values in the SQL space, suggesting a strong preference for doing so in the application itself.  This is not entirely unexpected, as the database lives in the address space of the application.  Consequently, it is feasible to first perform a \texttt{SELECT} to read values out of the database and then perform an \texttt{UPDATE} to write out the changes, a tactic used by many ORMs.  An unfortunate consequence of this tactic is that ORMs cache database objects at the application layer unnecessarily, suggesting that a stronger coupling between SQL and Java (e.g. language primitives like LINQ~\cite{box2007linq}, StatusQuo~\cite{cheung2013statusquo} or Truffle~\cite{wimmer2012truffle}) could be of significant benefit for Android developers.
+Although the \texttt{WHERE} clause of the updates included a variety of expressions, \textit{every single setter} in every \texttt{UPDATE} statement in the trace assigned a constant value;
+% , as in the following statement:
+% \begin{verbatim}
+% UPDATE ScheduledTaskProto SET value=?,key=?,sortingValue=? WHERE key = ?;
+% \end{verbatim}
+Not a single \texttt{UPDATE} expression attempted to compute new values using SQL, suggesting a strong preference for computing updated values in the application itself.  This is not entirely unexpected, as the database lives in the address space of the application.  Consequently, it is feasible to first perform a \texttt{SELECT} to read values out of the database and then perform an \texttt{UPDATE} to write out the changes, a tactic used by many ORMs.  An unfortunate consequence of this tactic is that ORMs cache database objects at the application layer unnecessarily, suggesting that a stronger coupling between SQL and Java (\textit{e.g.}, through language primitives like LINQ~\cite{box2007linq} or StatusQuo~\cite{cheung2013statusquo}% or Truffle~\cite{wimmer2012truffle}) 
+could be of significant benefit for Android developers.
+
+\begin{figure}
+\centering
+{\footnotesize
+\input{tables/update_condition_breakdown}
+}
+\caption{\textbf{\texttt{WHERE} clause expression structures, and the number of \texttt{UPDATE} statements in which the structure appears.}}
+\label{fig:allUpdateConditionBreakdown}
+\end{figure}


 % \begin{figure}
--- a/sections/5-dba.tex
+++ b/sections/5-dba.tex
@ -27,6 +27,8 @@ during our study. We examine how often queries arrive, how long they run, and
 how many rows they return---all important inputs into desiging the TPC-Mobile
 embedded database benchmark.

+\subsubsection{General Characteristics}
+
 Figure~\ref{fig-overview} shows query interarrival times, runtimes, and
 returned row counts (for \texttt{SELECT} statements) for all users,
 applications, and non-informational query types (\texttt{SELECT},
@ -62,45 +64,100 @@ applications seem to be using the SQLite database almost as a key-value store.
  \begin{subfigure}[t]{0.33\textwidth}
    \includegraphics[width=\textwidth]{./graphs/activity/All_Devices__All_Apps__All_Queries__ByTypePreviousQueryCDFGraph.pdf}
    \caption{}
-    \label{fig-app-interarrival}
+    \label{fig-type-interarrival-prev}
  \end{subfigure}%
  \begin{subfigure}[t]{0.33\textwidth}
    \includegraphics[width=\textwidth]{./graphs/activity/All_Devices__All_Apps__All_Queries__ByTypeNextQueryCDFGraph.pdf}
    \caption{}
-    \label{fig-app-runtime}
+    \label{fig-type-interarrival-next}
  \end{subfigure}%
  \begin{subfigure}[t]{0.33\textwidth}
    \includegraphics[width=\textwidth]{./graphs/activity/All_Devices__All_Apps__All_Queries__ByTypeRuntimeCDFGraph.pdf}
    \caption{}
-    \label{fig-app-rowcount}
+    \label{fig-type-runtime}
  \end{subfigure}%

  \caption{\textbf{By-Query-Type Statistics for Android SQLite Queries. Distribution of times since the query (a) immediately preceeding, and (b) immediately following the query in question.  (c) Distribution of runtimes for each query.}}
-  \label{fig-app}
+  \label{fig-type}

 \end{figure*}

+\subsubsection{Runtime Characteristics by Query Type}
+
+Figure~\ref{fig-type} show runtime characteristics for each of 
+the four types of SQL statement.  Figure~\ref{fig-type-interarrival-prev} and 
+\ref{fig-type-interarrival-next} in particular show the time since the last
+query to be issued and the time until the next query is issued (respectively), 
+while Figure~\ref{fig-type-runtime} shows the distribution of runtimes for each 
+type of query.  
+Examining the differences between Figures~\ref{fig-type-interarrival-prev} and 
+\ref{fig-type-interarrival-next}, we observe that \texttt{INSERT} queries are far
+more likely to arrive shortly before another query than shortly after.  Almost 80\% of 
+\texttt{INSERT}s are followed by another query within 100$\mu$s.  A similar, but far
+more subdued pattern can be seen for \texttt{UPDATE} statements.  Conversely, both 
+\texttt{SELECT} and \texttt{DELETE} statements are slightly more likely to arrive
+shortly before, rather than shortly after another query.  
+Figure~\ref{fig-type-runtime} shows significant deviations from the global average 
+runtime for \texttt{DELETE} and \texttt{UPDATE} statements.  \texttt{UPDATE} 
+statements in particular have a bimodal ditribution of runtimes, spiking at 100$\mu$s
+and 10ms.  We suspect that this performance distribution is related to SQLite's use
+of filesystem primitives for locking and write-ahead logging~
+\cite{jeong2013iostack,kang2013xftl}.  This could also help to explain the 0.01Hz
+query periodicity we observed above.
+
 \begin{figure*}[t]
  \centering
  \includegraphics[width=0.6\textwidth]{./graphs/activity/All_Devices__Top_10__Key}
  \begin{subfigure}[t]{0.33\textwidth}
    \includegraphics[width=\textwidth]{./graphs/activity/All_Devices__Top_10__All_Queries__ByAppPreviousQueryCDFGraph_noKey.pdf}
    \caption{}
-    \label{fig-type-interarrival}
+    \label{fig-app-interarrival}
  \end{subfigure}%
  \begin{subfigure}[t]{0.33\textwidth}
    \includegraphics[width=\textwidth]{./graphs/activity/All_Devices__Top_10__All_Queries__ByAppRuntimeCDFGraph_noKey.pdf}
    \caption{}
-    \label{fig-type-runtime}
+    \label{fig-app-runtime}
  \end{subfigure}%
  \begin{subfigure}[t]{0.33\textwidth}
    \includegraphics[width=\textwidth]{./graphs/activity/All_Devices__Top_10__All_Queries__ByAppRowcountCDFGraph_noKey.pdf}
    \caption{}
-    \label{fig-type-rowcount}
+    \label{fig-app-rowcount}
  \end{subfigure}%

-  \caption{\textbf{Summary Statistics for Android SQLite Queries. Distributions of (a) inter-query arrival times, (b) query runtimes, and (c) rows returned per query.}}
+  \caption{\textbf{Per-App Summary Statistics for Android SQLite Queries. Distributions of (a) inter-query arrival times, (b) query runtimes, and (c) rows returned per query.}}

-  \label{fig-type}
+  \label{fig-app}

 \end{figure*}
+
+\subsubsection{Runtime Characteristics by Application}
+
+Figure~\ref{fig-app} shows query interarrival times, runtimes, and returned row 
+counts for ten of the most active SQLite clients.  As seen in 
+Figure~\ref{fig-app-interarrival}, the 0.01Hz periodicity is not unique to any one
+application, further suggesting filesystem locking as a culprit.  Two of the most
+prolific SQLite clients, \textit{Google Play services} and \textit{Media Storage} 
+appear to be very bursty: 70\% of all statements for these applications are issued 
+within 0.1ms of the previous statement.  Also interesting is the curve for queries 
+issued by the \textit{Android System} itself.  The interarrival time CDF appears 
+to be almost precisely logarithmic for rates above 10$\mu$s, but has a notable lack 
+of interarrival times in the 1ms to 10ms range.  This could suggest caching 
+effects, with the cache expiring after 1ms.
+As seen in Figure~\ref{fig-app-runtime}, most apps hold to the average runtime of 
+100$\mu$s, with several notable exceptions.   Over 50\% of the 
+\textit{Android System}'s statements take on the order of 1ms.  Just under 20\% of 
+\textit{Hangouts} statements take 10ms, suggesting an update-heavy workload.  Also, 
+\textit{Contacts Storage} has a heavier-duty workload, with 30\% of statements taking
+between 100$\mu$s and 1ms.  
+Figure~\ref{fig-app-rowcount} shows that the \textit{Android System} and 
+\textit{Media Storage} issue almost exclusively single-row lookup queries.  
+The remaining apps issue a large number of single-row queries --- Even 
+\textit{Contacts Storage} has a workload consisting of 45\% single-row reads --- 
+the number of rows returned in general varies much more widely.  Many of these 
+apps' user interfaces have both a list and search views that shows multiple records
+at a time, suggesting that these views are backed directly by SQLite.  Although all
+apps have long tails, two apps in particular: \textit{Gmail} and \textit{Google+} are
+notable for regularly issuing queries that return on the order of 100 rows.  
+
+
+
--- a/sections/6-pocketdata.tex
+++ b/sections/6-pocketdata.tex
@ -1,13 +1,14 @@
-In spite of the prevalence of mobile devices, relatively little attention has been paid to pocket-scale data management.  We believe that this is, in large part, due to the lack of a common, overarching mechanism to evaluate potential solutions to known challenges in the space.  In this section, we first explore some existing research on mobile databases, and in particular focus on how the authors evaluate their solutions.  Then, we turn to existing benchmarking suites and identify specific disconnects that prevent them from being applied directly to model pocket data.  In the process, we also explore aspects of these benchmarks that could be drawn into a benchmark better suited to pocket data.
+In spite of the prevalence of mobile devices, relatively little attention has been paid to pocket-scale data management.  We believe that this is, in large part, due to the lack of a common, overarching mechanism to evaluate potential solutions to known challenges in the space.  In this section, we first explore some existing research on mobile databases, and in particular focus on how the authors evaluate their solutions.  Then, we turn to existing benchmarking suites and identify specific disconnects that prevent them from being applied directly to model pocket data.  In the process, we explore aspects of these benchmarks that could be drawn into a potential pocket-data benchmark.

 \subsection{Pocket Data Management}
+\label{sec:pocketdata:related}

 Kang et. al.~\cite{kang2013xftl} explored the design of a flash-aware transactional layer called X-FTL, specifically targeting limitations of SQLite's undo/redo logging on mobile devices.  To evaluate their work, the authors used the TPC-C benchmark in conjunction with a series of micro-benchmarks that evaluate the file system's response to database write operations.  This workload is appropriate for their target optimizations.  However, as we discuss below, TPC-C is not sufficiently representative of a pocket data workload to be used as a general-purpose mobile database benchmark.

-Jeong et. al.~\cite{jeong2013iostack} noted similar limitations in SQLite's transactional layer, and went about streamlining the IO-stack, again primarily for the benefit of mobile devices.  Again, micro-benchmarks played a significant role in the author's evaluation of their work.  Additionally, to evaluate their system's behavior under real-world conditions, the authors ran the \textit{Twitter} and \textit{Facebook} apps, simulating user behavior by replaying a mobility trace generated by MobiGen~\cite{ahmed2009mobigen}.  This is perhaps the most representative benchmarking workload that we encountered in our survey of related work.  However, it too could be improved.
-In our traces, Facebook and Twitter do represent a substantial contribution to the database workload of a typical smartphone, but still perform orders of magnitude less work with SQLite than built-in apps and system services.
+Jeong et. al.~\cite{jeong2013iostack} noted similar limitations in SQLite's transactional layer, and went about streamlining the IO-stack, again primarily for the benefit of mobile devices.  Again, micro-benchmarks played a significant role in the author's evaluation of their work.  To evaluate their system's behavior under real-world conditions, the authors ran the \textit{Twitter} and \textit{Facebook} apps, simulating user behavior using a mobility trace generated by MobiGen~\cite{ahmed2009mobigen}.  This is perhaps the most representative benchmarking workload that we encountered in our survey of related work.  %However, it too could be improved.
+%In our traces, Facebook and Twitter do represent a substantial contribution to the database workload of a typical smartphone, but still perform orders of magnitude less work with SQLite than built-in apps and system services.

-Many of the same issues with IO and power management that now appear in mobile phones have also historically arisen in sensor networks.  Madden et. al.'s work on embedded databases with TinyDB~\cite{madden2005tinydb} is emblematic of this space, where database solutions are driven by one or more specific target application domains.  Naturally, evaluation benchmarks and metrics in sensor networks are typically derived from, and closely tied to the target domain --- for example distributed event monitoring in the case of TinyDB.  
+Many of the same issues with IO and power management that now appear in mobile phones have also historically arisen in sensor networks.  Madden et. al.'s work on embedded databases with TinyDB~\cite{madden2005tinydb} is emblematic of this space, where database solutions are driven by one or more specific target application domains.  Naturally, evaluation benchmarks and metrics in sensor networks are typically derived from, and closely tied to the target domain.% --- for example distributed event monitoring in the case of TinyDB.  

 \subsection{Comparison to Existing Benchmarks}

@ -17,7 +18,7 @@ Given the plethora of available benchmarking software, it is reasonable to ask w

 Although no explicit macro-benchmarks exist for mobile embedded databases, we note two benchmark data generators that do simulate several properties of interest: AndroBench~\cite{kim2012androbench} and MobiGen~\cite{ahmed2009mobigen}.  AndroBench is a micro-benchmark capable of simulating the IO behavior of SQLite under different workloads.  It is primarily designed to evaluate the file-system supporting SQLite, rather than the embedded database itself.  However, the structure of its micro-benchmark workloads can just as effectively be used to compare two embedded database implementations.

-The second benchmark, MobiGen has little to do with data management directly.  Rather, it generates realistic traces of environmental inputs (\textit{e.g.}, signal strength, accelerometer readings, \textit{etc}\ldots), simulating the effects of a phone being carried through a physical space.  Replaying these traces through a virtual machine running a realistic application workload could generate realistic conditions (\textit{e.g.}, as in the evaluation of X-FTL~\cite{jeong2013iostack}).  However, it does not simulate the effects of user interactions with apps running on the device, something that TPC-MOBILE must be able to do.
+The second benchmark, MobiGen has little to do with data management directly.  Rather, it generates realistic traces of environmental inputs (\textit{e.g.}, signal strength, accelerometer readings, \textit{etc}\ldots), simulating the effects of a phone being carried through a physical space.  Replaying these traces through a virtual machine running a realistic application workload could generate realistic conditions (\textit{e.g.}, as in the evaluation of X-FTL~\cite{jeong2013iostack}).  However, it does not simulate the effects of user interactions with apps running on the device.

 \subsubsection{TPC-C} 

--- a/tables/select_functions.tex
+++ b/tables/select_functions.tex
@ -1,5 +1,5 @@

-\begin{tabular}{ccc}
+\begin{tabular}{ccccc}
 \begin{tabular}{c|c}
 \textbf{Function} & \textbf{Call Sites}\\\hline
 \texttt{GROUP\_CONCAT} & \ \ 583,474\ \ \\
@ -8,16 +8,20 @@
 \texttt{COUNT} & \ \ 173,031\ \ \\
 \texttt{LENGTH} & \ \ 102,747\ \ \\
 \texttt{SUBSTR} & \ \ 88,462\ \ \\
+\end{tabular}
+&\ \ \ \ \ &
+\begin{tabular}{c|c}
+\textbf{Function} & \textbf{Call Sites}\\\hline
 \texttt{CAST} & \ \ 38,208\ \ \\
 \texttt{UPPER} & \ \ 20,487\ \ \\
 \texttt{MIN} & \ \ 19,566\ \ \\
-\end{tabular}
-&\ \ \ &
-\begin{tabular}{c|c}
-\textbf{Function} & \textbf{Call Sites}\\\hline
 \texttt{COALESCE} & \ \ 3,494\ \ \\
 \texttt{LOWER} & \ \ 3,110\ \ \\
 \texttt{PHONE\_NUMBERS\_EQUAL} & \ \ 2,017\ \ \\
+\end{tabular}
+&\ \ \ \ \ &
+\begin{tabular}{c|c}
+\ \ \textbf{Function}\ \ & \textbf{Call Sites}\\\hline
 \texttt{STRFTIME} & \ \ 1,147\ \ \\
 \texttt{IFNULL} & \ \ 657\ \ \\
 \texttt{JULIANDAY} & \ \ 587\ \ \\