master
Geoffrey Challen 2015-06-18 17:54:45 -04:00
commit b7a1f6b719
49 changed files with 4926 additions and 0 deletions

21
.gitignore vendored Normal file
View File

@ -0,0 +1,21 @@
ABSTRACT
/badwords
/paper.pdf
/.xxxnote
/.spellcheck
/.draft
/.blue
*.swp
*.aux
*.log
*.out
*.bbl
*.blg
*.pyg
*.fdb_latexmk
*.fls
.deps
.~lock*
out
auto/
*.synctex.gz*

34
Makefile Normal file
View File

@ -0,0 +1,34 @@
export PYTEX=$(shell pwd)/pytex/
START = noxxxnote nodraft noblue
END = missing
CLASS = $(PWD)/llncs.cls
all: paper ABSTRACT
open: paper.pdf
@nohup acroread -openInNewWindow paper.pdf 1>/dev/null 2>/dev/null &
figures:
@cd figures ; make
ABSTRACT: $(PYTEX)/bin/clean $(PYTEX)/bin/lib.py sections/0-abstract.tex
@$(PYTEX)/bin/clean sections/0-abstract.tex ABSTRACT
# 16 Nov 2010 : GWA : Add other cleaning rules here.
clean: rulesclean
@rm -f ABSTRACT
include $(PYTEX)/make/Makerules
spellcheck: .spellcheck | silent
.spellcheck: $(PAPER_TEXFILES) .okwords
@hunspell -t -l -p $(PWD)/.okwords $(PAPER_TEXFILES) | sort -f | uniq | tee badwords && touch .spellcheck
silent:
@:
.PHONY:
spellcheck

88
aliascnt.sty Normal file
View File

@ -0,0 +1,88 @@
%%
%% This is file `aliascnt.sty',
%% generated with the docstrip utility.
%%
%% The original source files were:
%%
%% aliascnt.dtx (with options: `package')
%%
%% This is a generated file.
%%
%% Project: aliascnt
%% Version: 2009/09/08 v1.3
%%
%% Copyright (C) 2006, 2009 by
%% Heiko Oberdiek <heiko.oberdiek at googlemail.com>
%%
%% This work may be distributed and/or modified under the
%% conditions of the LaTeX Project Public License, either
%% version 1.3c of this license or (at your option) any later
%% version. This version of this license is in
%% http://www.latex-project.org/lppl/lppl-1-3c.txt
%% and the latest version of this license is in
%% http://www.latex-project.org/lppl.txt
%% and version 1.3 or later is part of all distributions of
%% LaTeX version 2005/12/01 or later.
%%
%% This work has the LPPL maintenance status "maintained".
%%
%% This Current Maintainer of this work is Heiko Oberdiek.
%%
%% This work consists of the main source file aliascnt.dtx
%% and the derived files
%% aliascnt.sty, aliascnt.pdf, aliascnt.ins, aliascnt.drv.
%%
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{aliascnt}%
[2009/09/08 v1.3 Alias counter (HO)]%
\newcommand*{\newaliascnt}[2]{%
\begingroup
\def\AC@glet##1{%
\global\expandafter\let\csname##1#1\expandafter\endcsname
\csname##1#2\endcsname
}%
\@ifundefined{c@#2}{%
\@nocounterr{#2}%
}{%
\expandafter\@ifdefinable\csname c@#1\endcsname{%
\AC@glet{c@}%
\AC@glet{the}%
\AC@glet{theH}%
\AC@glet{p@}%
\expandafter\gdef\csname AC@cnt@#1\endcsname{#2}%
\expandafter\gdef\csname cl@#1\expandafter\endcsname
\expandafter{\csname cl@#2\endcsname}%
}%
}%
\endgroup
}
\newcommand*{\aliascntresetthe}[1]{%
\@ifundefined{AC@cnt@#1}{%
\PackageError{aliascnt}{%
`#1' is not an alias counter%
}\@ehc
}{%
\expandafter\let\csname the#1\expandafter\endcsname
\csname the\csname AC@cnt@#1\endcsname\endcsname
}%
}
\newcommand*{\AC@findrootcnt}[1]{%
\@ifundefined{AC@cnt@#1}{%
#1%
}{%
\expandafter\AC@findrootcnt\csname AC@cnt@#1\endcsname
}%
}
\def\AC@patch#1{%
\expandafter\let\csname AC@org@#1reset\expandafter\endcsname
\csname @#1reset\endcsname
\expandafter\def\csname @#1reset\endcsname##1##2{%
\csname AC@org@#1reset\endcsname{##1}{\AC@findrootcnt{##2}}%
}%
}
\RequirePackage{remreset}
\AC@patch{addto}
\AC@patch{removefrom}
\endinput
%%
%% End of file `aliascnt.sty'.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

186
graphs/stats Normal file
View File

@ -0,0 +1,186 @@
----- Global Stats -----
Total Queries: 45090798
OUTER JOIN Queries: 391288
DISTINCT Queries: 1893624
LIMIT Queries: 1165518
ORDER BY Queries: 3169109
Aggregate Queries: 641352
GROUP BY Aggregate Queries: 438944
UNION Queries: 13866
WITH Queries: 0
Average Runtime: 1.3473690060371963 ms
Call Sites per Function:
| CAST -> 38208
| MAX -> 318153
| LENGTH -> 102747
| JULIANDAY -> 587
| SUM -> 321387
| UPPER -> 20487
| DATE -> 44
| COALESCE -> 3494
| AVG -> 15
| MIN -> 19566
| IFNULL -> 657
| PHONE_NUMBERS_EQUAL -> 2017
| LOWER -> 3110
| COUNT -> 173038
| GROUP_CONCAT -> 583474
| SUBSTR -> 88462
| STRFTIME -> 1147
Query Counts by Number of Tables Referenced:
| 0 -> 11574173
| 1 -> 31846350
| 2 -> 1347385
| 3 -> 200258
| 4 -> 32896
| 5 -> 83744
| 6 -> 1765
| 7 -> 4216
| 8 -> 11
Query Counts by Query Nesting Depth:
| 0 -> 11574173
| 1 -> 33021712
| 2 -> 389294
| 3 -> 88951
| 4 -> 16668
Query Runtimes by Query Nesting Depth:
Total Parse Errors: 308752
Percent Success Rate: 99.31992277456494
----- DELETE Stats -----
Total Queries: 1248594
OUTER JOIN Queries: 236
DISTINCT Queries: 5586
LIMIT Queries: 422
ORDER BY Queries: 194
Aggregate Queries: 3190
GROUP BY Aggregate Queries: 0
UNION Queries: 65
WITH Queries: 0
Average Runtime: 3.7761548616708076 ms
Call Sites per Function:
| MAX -> 3183
| COUNT -> 7
Query Counts by Number of Tables Referenced:
| 0 -> 1202472
| 1 -> 42528
| 2 -> 1736
| 3 -> 782
| 4 -> 460
| 7 -> 616
Query Counts by Query Nesting Depth:
| 0 -> 1202472
| 1 -> 46122
Query Runtimes by Query Nesting Depth:
----- SELECT Stats -----
Total Queries: 33470310
OUTER JOIN Queries: 391052
DISTINCT Queries: 1888013
LIMIT Queries: 1165096
ORDER BY Queries: 3168915
Aggregate Queries: 638137
GROUP BY Aggregate Queries: 438919
UNION Queries: 13801
WITH Queries: 0
Average Runtime: 1.1290731669464669 ms
Call Sites per Function:
| CAST -> 38208
| MAX -> 314970
| LENGTH -> 102747
| JULIANDAY -> 587
| SUM -> 321387
| UPPER -> 20487
| DATE -> 44
| COALESCE -> 3494
| AVG -> 15
| MIN -> 19566
| IFNULL -> 657
| PHONE_NUMBERS_EQUAL -> 2017
| LOWER -> 3110
| COUNT -> 173031
| GROUP_CONCAT -> 583474
| SUBSTR -> 88462
| STRFTIME -> 1147
Query Counts by Number of Tables Referenced:
| 1 -> 31803710
| 2 -> 1345568
| 3 -> 199476
| 4 -> 32436
| 5 -> 83744
| 6 -> 1765
| 7 -> 3600
| 8 -> 11
Query Counts by Query Nesting Depth:
| 1 -> 32975453
| 2 -> 389238
| 3 -> 88951
| 4 -> 16668
Query Runtimes by Query Nesting Depth:
SP Queries: 28723834
SP Runtime: 0.6330146528792779 ms
SPJ Queries: 29147210
SPJ Runtime: 0.6557674440088433 ms
SPA Queries: 28947579
SPA Runtime: 0.6698433789751812 ms
SPJA Queries: 29405360
SPJA Runtime: 0.6921697210216438 ms
SPJ Query Counts by Join Width:
| 1 -> 28723834
| 2 -> 375061
| 3 -> 47187
| 4 -> 1125
| 6 -> 3
----- INSERT Stats -----
Total Queries: 1953279
OUTER JOIN Queries: 0
DISTINCT Queries: 0
LIMIT Queries: 0
ORDER BY Queries: 0
Aggregate Queries: 0
GROUP BY Aggregate Queries: 0
UNION Queries: 0
WITH Queries: 0
Average Runtime: 2.3116640830337087 ms
Call Sites per Function:
Query Counts by Number of Tables Referenced:
| 0 -> 1953279
Query Counts by Query Nesting Depth:
| 0 -> 1953279
Query Runtimes by Query Nesting Depth:
----- UPDATE Stats -----
Total Queries: 1041967
OUTER JOIN Queries: 0
DISTINCT Queries: 25
LIMIT Queries: 0
ORDER BY Queries: 0
Aggregate Queries: 25
GROUP BY Aggregate Queries: 25
UNION Queries: 0
WITH Queries: 0
Average Runtime: 6.588152973392632 ms
Call Sites per Function:
Query Counts by Number of Tables Referenced:
| 0 -> 1041774
| 1 -> 112
| 2 -> 81
Query Counts by Query Nesting Depth:
| 0 -> 1041774
| 1 -> 137
| 2 -> 56
Query Runtimes by Query Nesting Depth:
----- UPSERT Stats -----
Total Queries: 7376648
OUTER JOIN Queries: 0
DISTINCT Queries: 0
LIMIT Queries: 0
ORDER BY Queries: 0
Aggregate Queries: 0
GROUP BY Aggregate Queries: 0
UNION Queries: 0
WITH Queries: 0
Average Runtime: 0.9311366858334572 ms
Call Sites per Function:
Query Counts by Number of Tables Referenced:
| 0 -> 7376648
Query Counts by Query Nesting Depth:
| 0 -> 7376648
Query Runtimes by Query Nesting Depth:

Binary file not shown.

Binary file not shown.

Binary file not shown.

1208
llncs.cls Normal file

File diff suppressed because it is too large Load Diff

230
main.bib Normal file
View File

@ -0,0 +1,230 @@
@misc{phones,
key = {One In Every 5 People In The World Own A Smartphone},
title = {{O}ne {I}n {E}very 5 {P}eople {I}n {T}he {W}orld {O}wn {A} {S}martphone},
howpublished = {\url{http://www.businessinsider.com/smartphone-and-tablet-penetration-2013-10}}
}
@inproceedings{Dit2015CIDR,
author = {Jens Dittrich},
title = {{The Case for Small Data Management}},
booktitle = {CIDR},
year = {2015},
}
@inproceedings{phonelab,
author = {Nandugudi, Anandatirtha and Maiti, Anudipa and Ki, Taeyeon and Bulut, Fatih and Demirbas, Murat and Kosar, Tevfik and Qiao, Chunming and Ko, Steven Y. and Challen, Geoffrey},
title = {PhoneLab: A Large Programmable Smartphone Testbed},
booktitle = {Proceedings of First International Workshop on Sensing and Big Data Mining},
series = {SENSEMINE'13},
year = {2013},
isbn = {978-1-4503-2430-4},
location = {Roma, Italy},
pages = {4:1--4:6},
articleno = {4},
numpages = {6},
url = {http://doi.acm.org/10.1145/2536714.2536718},
doi = {10.1145/2536714.2536718},
acmid = {2536718},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Smartphones, mobile devices, testbed},
}
@misc{tpch,
title={{TPC-H} specification},
author={Transaction Processing Performance Council},
howpublished={http://www.tpc.org/tpch/},
}
@misc{tpcc,
title={{TPC-C} specification},
author={Transaction Processing Performance Council},
howpublished={http://www.tpc.org/tpcc/},
}
@misc{tpcds,
title={{TPC-DS} specification},
author={Transaction Processing Performance Council},
howpublished={http://www.tpc.org/tpcds/},
}
@misc{o2007star,
title={The star schema benchmark (SSB)},
author={ONeil, Patrick E and ONeil, Elizabeth J and Chen, Xuedong},
year={2007}
}
@incollection{ssb,
year={2009},
isbn={978-3-642-10423-7},
booktitle={Performance Evaluation and Benchmarking},
volume={5895},
series={Lecture Notes in Computer Science},
editor={Nambiar, Raghunath and Poess, Meikel},
doi={10.1007/978-3-642-10424-4_17},
title={The Star Schema Benchmark and Augmented Fact Table Indexing},
url={http://dx.doi.org/10.1007/978-3-642-10424-4_17},
publisher={Springer Berlin Heidelberg},
keywords={Benchmark; Star Schema; Data Warehousing; Clustering; Multi-Dimensional Clustering; DB2; Oracle; Vertica},
author={ONeil, Patrick and ONeil, Elizabeth and Chen, Xuedong and Revilak, Stephen},
pages={237-252},
language={English}
}
@book{sqlite,
title={SQLite},
author={Owens, Mike and Allen, Grant},
year={2010},
publisher={Springer}
}
@inproceedings{kang2013xftl,
author = {Kang, Woon-Hak and Lee, Sang-Won and Moon, Bongki and Oh, Gi-Hwan and Min, Changwoo},
title = {X-FTL: Transactional FTL for SQLite Databases},
booktitle = {Proceedings of the 2013 ACM SIGMOD International Conference on Management of Data},
series = {SIGMOD '13},
year = {2013},
isbn = {978-1-4503-2037-5},
location = {New York, New York, USA},
pages = {97--108},
numpages = {12},
url = {http://doi.acm.org/10.1145/2463676.2465326},
doi = {10.1145/2463676.2465326},
acmid = {2465326},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {copy-on-write, flash storage devices, flash translation layer, sqlite, transactional atomicity},
}
@inproceedings{jeong2013iostack,
author = {Jeong, Sooman and Lee, Kisung and Lee, Seongjin and Son, Seoungbum and Won, Youjip},
title = {I/O Stack Optimization for Smartphones},
booktitle = {Proceedings of the 2013 USENIX Conference on Annual Technical Conference},
series = {USENIX ATC'13},
year = {2013},
location = {San Jose, CA},
pages = {309--320},
numpages = {12},
url = {http://dl.acm.org/citation.cfm?id=2535461.2535499},
acmid = {2535499},
publisher = {USENIX Association},
address = {Berkeley, CA, USA},
}
@incollection{kim2012androbench,
year={2012},
isbn={978-3-642-27551-7},
booktitle={Frontiers in Computer Education},
volume={133},
series={Advances in Intelligent and Soft Computing},
editor={Sambath, Sabo and Zhu, Egui},
doi={10.1007/978-3-642-27552-4_89},
title={AndroBench: Benchmarking the Storage Performance of Android-Based Mobile Devices},
url={http://dx.doi.org/10.1007/978-3-642-27552-4_89},
publisher={Springer Berlin Heidelberg},
keywords={AndroBench; Android; Storage performance; Benchmark},
author={Kim, Je-Min and Kim, Jin-Soo},
pages={667-674},
language={English}
}
@misc{ahmed2009mobigen,
title={MobiGen: a mobility generator for environment aware mobility model},
howpublished={http://arrow.monash.edu.au/hdl/1959.1/109933},
author={Ahmed, Sabbir},
year={2009}
}
@article{madden2005tinydb,
author = {Madden, Samuel R. and Franklin, Michael J. and Hellerstein, Joseph M. and Hong, Wei},
title = {TinyDB: An Acquisitional Query Processing System for Sensor Networks},
journal = {ACM Trans. Database Syst.},
issue_date = {March 2005},
volume = {30},
number = {1},
month = mar,
year = {2005},
issn = {0362-5915},
pages = {122--173},
numpages = {52},
url = {http://doi.acm.org/10.1145/1061318.1061322},
doi = {10.1145/1061318.1061322},
acmid = {1061322},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Query processing, data acquisition, sensor networks},
}
@inproceedings{ycsb,
author = {Cooper, Brian F. and Silberstein, Adam and Tam, Erwin and Ramakrishnan, Raghu and Sears, Russell},
title = {Benchmarking Cloud Serving Systems with YCSB},
booktitle = {Proceedings of the 1st ACM Symposium on Cloud Computing},
series = {SoCC '10},
year = {2010},
isbn = {978-1-4503-0036-0},
location = {Indianapolis, Indiana, USA},
pages = {143--154},
numpages = {12},
url = {http://doi.acm.org/10.1145/1807128.1807152},
doi = {10.1145/1807128.1807152},
acmid = {1807152},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {benchmarking, cloud serving database},
}
@INPROCEEDINGS{lam2009healthmonitoring,
author={Lam, S.C.K. and Kai Lap Wong and Kwok On Wong and Wenxiu Wong and Wai Ho Mow},
booktitle={Information, Communications and Signal Processing, 2009. ICICS 2009. 7th International Conference on},
title={A smartphone-centric platform for personal health monitoring using wireless wearable biosensors},
year={2009},
month={Dec},
pages={1-7},
keywords={biosensors;body area networks;health care;patient monitoring;personal area networks;plethysmography;wireless sensor networks;battery life;biosignal processing;body area sensor network;closed loop control capability;healthcare;personal health monitoring;photoplethysmographic biosensors;portability;smartphone centric platform;upgradability;wireless wearable biosensors;Aging;Application software;Biomedical monitoring;Biosensors;Costs;Medical services;Operating systems;Smart phones;Wearable sensors;Wireless sensor networks;COTS wearable biosensors;Health monitoring;body area sensor network;pervasive computing},
doi={10.1109/ICICS.2009.5397628},}
@inproceedings{klasnja2009using,
title={Using mobile \& personal sensing technologies to support health behavior change in everyday life: lessons learned},
author={Klasnja, Predrag and Consolvo, Sunny and McDonald, David W and Landay, James A and Pratt, Wanda},
booktitle={AMIA Annual Symposium Proceedings},
volume={2009},
pages={338},
year={2009},
organization={American Medical Informatics Association}
}
@ARTICLE{campbell2008peoplesensing,
author={Campbell, A.T. and Eisenman, S.B. and Lane, N.D. and Miluzzo, E. and Peterson, R.A. and Hong Lu and Xiao Zheng and Musolesi, M. and Fodor, K. and Gahng-Seop Ahn},
journal={Internet Computing, IEEE},
title={The Rise of People-Centric Sensing},
year={2008},
month={July},
volume={12},
number={4},
pages={12-21},
keywords={social aspects of automation;ubiquitous computing;global mobile sensing device;mesh sensor networks;mobile devices;near-ubiquitous mobile phone;people-centric sensing;social sensing;Educational institutions;Humans;Mobile communication;Mobile computing;Mobile handsets;Monitoring;Portable media players;Prototypes;Visualization;Wireless sensor networks;Wi-Fi;mesh networking;people-centric sensing},
doi={10.1109/MIC.2008.90},
ISSN={1089-7801},}
@inproceedings{cheung2013statusquo,
title={StatusQuo: Making Familiar Abstractions Perform Using Program Analysis.},
author={Cheung, Alvin and Arden, Owen and Madden, Samuel and Solar-Lezama, Armando and Myers, Andrew C},
booktitle={CIDR},
year={2013}
}
@inproceedings{wimmer2012truffle,
author = {Wimmer, Christian and W\"{u}rthinger, Thomas},
title = {Truffle: A Self-optimizing Runtime System},
booktitle = {Proceedings of the 3rd Annual Conference on Systems, Programming, and Applications: Software for Humanity},
series = {SPLASH '12},
year = {2012},
isbn = {978-1-4503-1563-0},
location = {Tucson, Arizona, USA},
pages = {13--14},
numpages = {2},
url = {http://doi.acm.org/10.1145/2384716.2384723},
doi = {10.1145/2384716.2384723},
acmid = {2384723},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {dynamic languages, graal, j, java, javascript, language implementation, truffle, virtual machine},
}

71
paper.tex Normal file
View File

@ -0,0 +1,71 @@
\documentclass{llncs}
\usepackage{geometry}
\usepackage{makeidx} % allows for indexgeneration
\usepackage{graphicx}
\usepackage[utf8]{inputenc}
\usepackage{amssymb}
\usepackage{color}
\usepackage{url}
\newcommand{\ask}[1]{\begin{center}
\textcolor{blue}{\textbf{Question: } {#1}}
\end{center}}
\title{Pocket Data: The Need for TPC-MOBILE}
\titlerunning{TPC-MOBILE}
\author{ % Kennedy + Alphabetical by last
Oliver Kennedy \and
Jerry Ajay \and
Geoffrey Challen \and
Lukasz Ziarek
}
\authorrunning{Kennedy et. al.}
\institute{SUNY Buffalo; Buffalo, NY 14260; USA\\
\email{\{okennedy,jerryant,challen,ziarek\}@buffalo.edu}\\
Website: \texttt{http://odin.cse.buffalo.edu/research/}
}
\begin{document}
\pagestyle{plain}
\maketitle
\begin{abstract}
\input{sections/0-abstract}
\keywords{sqlite, client-side, android, smart-phone, embedded database}
\end{abstract}
\section{Introduction}
\input{sections/1-introduction}
\section{Overview: Why TPC-MOBILE?}
\label{sec:overview}
\input{sections/2-overview}
\section{Experimental Setup}
\label{sec:experimental}
\input{sections/3-experimental}
\section{Query Complexity}
\label{sec:queryc}
\input{sections/4-queryc}
\section{Database Activity}
\label{sec:dba}
\input{sections/5-dba}
\section{Pocket Data, TPC-MOBILE, and Related Work}
\label{sec:pocketdata}
\input{sections/6-pocketdata}
\section{Conclusions and Future Work}
\label{sec:conc}
\input{sections/7-conclusions}
\bibliographystyle{plain}
\bibliography{main}
\end{document}

4
pytex/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
/.pydevproject
/.project
*.pyc
*.swp

2
pytex/bin/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/flatex
*.pyc

7
pytex/bin/blank Executable file
View File

@ -0,0 +1,7 @@
#!/usr/bin/env python
import sys,subprocess
subprocess.check_output("convert -size %dx%d -density 300 -format pdf xc:white -bordercolor black -border 1x1 %s" % \
((int(float(sys.argv[2]) * 300.) - 1),
(int(float(sys.argv[3]) * 300.) - 1),
sys.argv[1]), shell=True)

27
pytex/bin/clean Executable file
View File

@ -0,0 +1,27 @@
#!/usr/bin/env python
import lib, sys
from optparse import OptionParser
import re
parser = OptionParser()
(options, args) = parser.parse_args()
if len(args) < 2:
sys.exit(1)
if args[0] == "-":
dirty_string = sys.stdin.read()
else:
dirty_string = open(args[0], "r").read()
match = re.search(r"""(?ms)<clean:start>\s*(?P<excerpt>.*?)\s*<clean:end>""", dirty_string)
if match != None:
dirty_string = match.group('excerpt')
if args[1] == "-":
outfile = sys.stdout
else:
outfile = open(args[1], "w")
outfile.write(lib.clean(dirty_string).encode('utf8'))

601
pytex/bin/flatex.c Normal file
View File

@ -0,0 +1,601 @@
/*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*
* flatex.c -
* Flatten a latex file into a single file, by explicitly including
* the files inclued by \include and \input commands. Also, if bibtex is
* beeing used, then includes the .bbl file into the resulting file. Thus,
* creating a stand alone latex file that can be emailed to someone else.
*
* Compile : gcc -o flatex flatex.c
* Tested on : Linux + gcc
* By : Sariel Har-Peled
* Email : sariel@math.tau.ac.il
* WEB Page : http://www.math.tau.ac.il/~sariel/flatex.html
* Status : You can do whatever you like with this program. please
* email me bugs & suggestions.
*
* To do : Add support to the includeonly command.
*-----------------------------------------------------------------------
* FLATEX 1.21, 1994, 1996, by Sariel Har-Peled.
*
* flatex - create a single latex file with no include/inputs
*
* flatex [-v] [-x FileName] [files]
* -v Verbose, display file structure.
* -x Unflatex: extract files from archive
* -q Quiet mode. Cleaner output but -x can not be used.
* -b Do not insert bibiliography file(.bbl)
*
* Flatex page: http://www.math.tau.ac.il/~sariel/flatex.html
*-----------------------------------------------------------------------
* History:
* 26/8/96, 1.21
* Fixed bug with includegraphics command.
\*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
/*======================================================================
* Static constants.
\*======================================================================*/
#define LINE_SIZE 1000
#define FALSE 0
#define TRUE 1
#define USE_ARGUMENT( X ) ((void)X)
/*======================================================================
* Types
\*======================================================================*/
typedef struct {
char verbose;
char fBibInsert, fQuiet;
int cSpecialInputLevel;
char szFullName[ LINE_SIZE ];
} structFlags;
/*======================================================================
* Static prototypes.
\*======================================================================*/
static void flatIt( FILE * flOut,
char * szInName,
int level,
structFlags * pFlags );
static void replaceExt( char * str, char * ext );
/*======================================================================
* Start of Code
\*======================================================================*/
static void spacesByLevel( int level )
{
while ( level > 0 ) {
printf( " " );
level--;
}
}
static void printHelp( void )
{
printf( "flatex - create a single latex file with no include/inputs\n" );
printf( "\n\tflatex [-v] [-x FileName] [files]\n" );
printf( "\t\t-v\tVerbose, display file structure.\n" );
printf( "\t\t-x\tUnflatex: extract files from archive\n" );
printf( "\t\t-q\tQuiet mode. Cleaner output but -x can not be used.\n" );
printf( "\t\t-b\tDo not insert bibiliography file(.bbl)\n" );
printf( "\nFlatex page: http://www.math.tau.ac.il/~sariel/flatex.html\n" );
printf( "\n" );
}
static void * myMalloc( unsigned int size )
{
void * ptr;
ptr = malloc( size );
if ( ptr == NULL ) {
fprintf( stderr, "Not enough memory" );
exit( -1 );
}
return ptr;
}
static void handleIncludeCommand( char * line,
char * lpszInclude,
FILE * flOut,
int level,
structFlags * pFlags )
{
char * lpszBrace, * lpszName, * lpszEndBrace;
char ch, fInput = 0;
lpszBrace = NULL;
if ( strncmp( lpszInclude, "\\input", 6 ) == 0 ) {
lpszBrace = lpszInclude + 6;
fInput = 1;
} else
if ( strncmp( lpszInclude, "\\include", 8 ) == 0 ) {
lpszBrace = lpszInclude + 8;
}
ch = *lpszInclude;
*lpszInclude = 0;
fputs( line, flOut );
*lpszInclude = ch;
lpszEndBrace = strchr( lpszBrace, '}' );
if ( *lpszBrace != '{' || lpszEndBrace == NULL ) {
fprintf( stderr, "ERROR: Expected brace not found.\n\n\tline:%s\n",
line );
exit( -1 );
}
*lpszEndBrace = 0;
lpszName = (char *)myMalloc( LINE_SIZE );
strcpy( lpszName, lpszBrace + 1 );
if ( ! fInput )
replaceExt( lpszName, ".tex" );
flatIt( flOut, lpszName, level + 1, pFlags );
lpszEndBrace++;
while ( *lpszEndBrace ) {
*line++ = *lpszEndBrace++;
}
*line = 0;
free( lpszName );
}
static char isBefore( char * lpszA, char * lpszB )
{
if ( lpszB == NULL )
return TRUE;
if ( lpszA == NULL )
return FALSE;
if ( (int)( lpszA -lpszB ) < 0 ) {
return TRUE;
}
return FALSE;
}
static FILE * fopenTex( char * file,
char * mode )
{
FILE * fl;
fl = fopen( file, mode );
if ( fl != NULL )
return fl;
replaceExt( file, ".tex" );
fl = fopen( file, mode );
return fl;
}
static char isTexFileExists( char * file )
{
FILE * fl;
fl = fopenTex( file, "rt" );
if ( fl != NULL ) {
fclose( fl );
return 1;
}
return 0;
}
static void addTexExt( char * file )
{
FILE * fl;
fl = fopenTex( file, "rt");
if ( fl != NULL )
fclose( fl );
}
static char is_str_prefix( char * str, char * prefix )
{
int len;
if ( str == NULL || prefix == NULL )
return 0;
len = strlen( prefix );
return (strncmp( str, prefix, len ) == 0);
}
static void flatIt( FILE * flOut,
char * pSzInName,
int level,
structFlags * pFlags )
{
FILE * flIn;
char * str, * lpszInput, * lpszInclude, * line, * lpszRem, *inc;
char * lpszLine, * lpszRemark, * lpszBib, * lpszBibStyle;
char * lpszNewCommand, * lpszName;
char cont;
char repFlag;
char szInName[ 100 ];
char fInclude;
strcpy( szInName, pSzInName );
addTexExt( szInName );
if ( ! pFlags->fQuiet )
fprintf( flOut, "%%%cflatex input: [%s]\n",
pFlags->cSpecialInputLevel > 0? '*' : ' ',
szInName );
if ( pFlags->verbose ) {
printf( "\t" );
spacesByLevel( level );
printf( "%s\n", szInName );
}
line = (char *)myMalloc( LINE_SIZE );
lpszLine = (char *)myMalloc( LINE_SIZE );
lpszRemark = (char *)myMalloc( LINE_SIZE );
flIn = fopenTex( szInName, "rt" );
if ( flIn == NULL ) {
fprintf( stderr, "Unable to open file: %s\n", szInName );
exit( -1 );
}
*lpszRemark = 0;
while ( ! feof( flIn ) ) {
str = fgets( line, LINE_SIZE, flIn );
if ( str == NULL )
break;
fInclude = FALSE;
strcpy( lpszLine, line );
lpszRem = strchr( line, '%' );
if ( lpszRem != NULL ) {
strcpy( lpszRemark, lpszRem );
*lpszRem = 0;
}
do {
cont = 0;
lpszInput = strstr( line, "\\input" );
lpszBib = strstr( line, "\\bibliography" );
lpszBibStyle = strstr( line, "\\bibliographystyle" );
if ( pFlags->fBibInsert &&
( lpszBib != NULL || lpszBibStyle != NULL ) ) {
lpszName = (char *)myMalloc( LINE_SIZE );
strcpy( lpszName, lpszLine );
strcpy( lpszLine, pFlags->fQuiet? "%" : "%FLATEX-REM:" );
strcat( lpszLine, lpszName );
if ( lpszBibStyle != NULL ) {
strcpy( lpszName, pFlags->szFullName );
replaceExt( lpszName, ".bbl" );
pFlags->cSpecialInputLevel++;
flatIt( flOut, lpszName, level + 1, pFlags );
pFlags->cSpecialInputLevel--;
if ( pFlags->verbose ) {
printf( "\t" );
spacesByLevel( level + 1 );
printf( "(Bibiliography)\n" );
}
}
break;
}
inc = line;
do {
repFlag = 0;
lpszInclude = strstr( inc, "\\include" );
if ( is_str_prefix( lpszInclude, "\\includeversion" )
|| is_str_prefix( lpszInclude,
"\\includegraphics" ) ) {
repFlag = 1;
inc = lpszInclude + 1;
continue;
}
if ( is_str_prefix( lpszInclude, "\\includeonly" ) ) {
fprintf( stderr, "WARNING: \"\\includeonly\" command "
"ignored\n" );
inc = lpszInclude + 1;
repFlag = 1;
continue;
}
if ( lpszInclude != NULL && isalpha( lpszInclude[ 8 ] ) ) {
fprintf( stderr,
"\nWarning: include-like(?) command ignored"
" at line:\n\t%s", lpszLine );
inc = lpszInclude + 1;
repFlag = 1;
continue;
}
} while ( repFlag );
if ( isBefore( lpszInput, lpszInclude ) )
lpszInclude = lpszInput;
if ( lpszInclude != NULL ) {
lpszNewCommand = strstr( line, "\\newcommand" );
if ( lpszNewCommand == NULL ) {
handleIncludeCommand( line, lpszInclude, flOut, level,
pFlags );
cont = 1;
fInclude = TRUE;
}
}
} while ( cont );
if ( fInclude ) {
strcat( line, lpszRemark );
fputs( line, flOut );
} else
fputs( lpszLine, flOut );
}
fclose( flIn );
fputs( "\n", flOut );
if ( ! pFlags->fQuiet )
fprintf( flOut, "%% flatex input end: [%s]\n", szInName );
free( line );
free( lpszLine );
free( lpszRemark );
}
static void replaceExt( char * str, char * ext )
{
int len, ind;
len = strlen( str );
ind = len - 1;
while ( ind >= 0 && str[ ind ] != '.' && str[ ind ] != '\\' &&
str[ ind ] != '/' )
ind--;
if ( ind >= 0 && str[ ind ] == '.' ) {
str[ ind ] = 0;
}
strcat( str, ext );
}
static char strCmpPrefixAndCopy( char * line,
char * str,
char * outName )
{
char * pos, * pPreLine;
pPreLine = line;
pos = strstr( line, str );
if ( pos == NULL )
return 0;
line = pos + strlen( str );
strcpy( outName, line );
pos = strchr( outName, ']' );
if ( pos == NULL ) {
fprintf( stderr, "Error encountered in line: [%s]", pPreLine );
exit( -1 );
}
*pos = 0;
return 1;
}
static void writeFile( FILE * flIn,
char * pOutName,
int level )
{
FILE * flOut;
char * lpszLine;
char line[ LINE_SIZE ], outName[ LINE_SIZE ];
char flag;
outName[ 0 ] = 0;
if ( pOutName == NULL ) {
flOut = NULL;
printf( "Scanning for flatex archive start...\n" );
} else {
flOut = fopen( pOutName, "wt" );
if ( flOut == NULL ) {
fprintf( stderr, "Unable to open file: %s", pOutName );
exit( -1 );
}
spacesByLevel( level );
printf( "[%s]\n", pOutName );
}
do {
lpszLine = fgets( line, LINE_SIZE, flIn );
if ( lpszLine == NULL )
break;
flag = strCmpPrefixAndCopy( line, "% flatex input end: [", outName );
if ( flag ) {
if ( flOut == NULL ) {
fprintf( stderr, "Something is wrong!!!!\n" );
exit( -1 );
}
//spacesByLevel( level );
// printf( "/\n" );
//printf( "Writing [%s] done\n", outName );
break;
}
flag = strCmpPrefixAndCopy( line, "% flatex input: [", outName );
if ( flag ) {
writeFile( flIn, outName, level + 1 );
if ( flOut != NULL )
fprintf( flOut, "\\input{%s}\n", outName );
} else {
flag = strCmpPrefixAndCopy( line, "%*flatex input: [", outName );
if ( flag ) {
writeFile( flIn, outName, level + 1 );
} else {
if ( flOut != NULL ) {
if ( strncmp( line, "%FLATEX-REM:", 12 ) == 0 )
fputs( line + 12, flOut );
else
fputs( line, flOut );
}
}
}
} while ( ! feof( flIn ) );
if ( flOut != NULL )
fclose( flOut );
}
static void flatOutFile( char * fileName,
structFlags * pFlags )
{
FILE * flIn;
USE_ARGUMENT( pFlags );
flIn = fopen( fileName, "rt" );
if ( flIn == NULL ) {
fprintf( stderr, "Unable to open file: %s", fileName );
exit( -1 );
}
writeFile( flIn, NULL, 0 );
fclose( flIn );
}
static void flatFile( char * fileName,
structFlags * pFlags )
{
char * szInName, * szOutName;
int inLen;
FILE * flOut;
szInName = (char *)myMalloc( LINE_SIZE );
szOutName = (char *)myMalloc( LINE_SIZE );
strcpy( szInName, fileName );
if ( ! isTexFileExists( szInName ) ) {
fprintf( stderr, "--Unable to open file: [%s]\n", fileName );
exit( -1 );
}
inLen = strlen( szInName );
if ( inLen < 4 || ( szInName[ inLen ] != '.' &&
strcmp( szInName + inLen - 4, ".tex" ) != 0 ) ) {
strcat( szInName, ".tex" );
}
printf( "input file: [%s]\n", szInName );
strcpy( pFlags->szFullName, szInName );
strcpy( szOutName, szInName );
replaceExt( szOutName, ".flt" );
flOut = fopen( szOutName, "wt" );
if ( flOut == NULL ) {
fprintf( stderr, "Unable to open file: %s", szOutName );
exit( -1 );
}
flatIt( flOut, szInName, 0, pFlags );
fclose( flOut );
printf( "\n\tFile: \"%s\" generated\n", szOutName );
}
static char isFlag( char * str, char ch )
{
if ( str[ 0 ] == '-' &&
( str[ 1 ] == ch || str[ 1 ] == toupper( ch ) )
&& ( str[ 2 ] == 0 ) )
return TRUE;
return FALSE;
}
int main( int argc, char * argv[] )
{
int ind;
structFlags sFlags;
printf( "FLATEX 1.21, 1994, 1996, by Sariel Har-Peled.\n\n" );
if ( argc == 1 )
printHelp();
sFlags.verbose = FALSE;
sFlags.fBibInsert = TRUE;
sFlags.cSpecialInputLevel = 0;
*sFlags.szFullName = 0;
sFlags.fQuiet = FALSE;
for ( ind = 1; ind < argc; ind++ ) {
if ( isFlag( argv[ ind ], 'v' ) ) {
sFlags.verbose = TRUE;
continue;
}
if ( isFlag( argv[ ind ], 'b' ) ) {
sFlags.fBibInsert = FALSE;
continue;
}
if ( isFlag( argv[ ind ], 'q' ) ) {
sFlags.fQuiet = TRUE;
continue;
}
if ( isFlag( argv[ ind ], 'x' ) ) {
flatOutFile( argv[ ind + 1 ], &sFlags );
ind++;
continue;
}
flatFile( argv[ ind ], &sFlags );
}
return 0;
}
/*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*
*
* flatex.c - End of File
\*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/

77
pytex/bin/lib.py Normal file
View File

@ -0,0 +1,77 @@
import re
def clean(inlines):
removecomments = re.compile(r"^(%.*)$", re.M)
inlines = removecomments.sub("", inlines)
fixpercents = re.compile(r"\\%", re.M)
inlines = fixpercents.sub("%", inlines)
removetex = re.compile(r"~?\\(((sub)*)section(\*?)|cite|chapter|thispagestyle)\*+\{([^\}]+)\}", re.M)
inlines = removetex.sub("", inlines)
removetex2 = re.compile(r"\\(clearpage)", re.M)
inlines = removetex2.sub("", inlines)
keeptex = re.compile(r"\\(textit|textbf|texttt|textsc|sloppypar)\{([^\}]+)\}", re.M)
while True:
beforelines = inlines
inlines = keeptex.sub(r"\2", inlines)
if inlines == beforelines:
break
keeptex2 = re.compile(r"\{\\scshape\s+([^\}]+)\}", re.S | re.M)
inlines = keeptex2.sub(r"\1", inlines)
quotes = re.compile(r"(``|'')", re.M)
inlines = quotes.sub(r'"', inlines)
phonelab_macro = re.compile(r"\\PhoneLab{}", re.M)
inlines = phonelab_macro.sub("PhoneLab", inlines)
sciwinet_macro = re.compile(r"\\SciWiNet{}", re.M)
inlines = sciwinet_macro.sub("SciWiNet", inlines)
composite_macro = re.compile(r"\\ComPoSiTe{}", re.M)
inlines = composite_macro.sub("ComPoSiTe", inlines)
agiledroid_macro = re.compile(r"\\AG{}", re.M)
inlines = agiledroid_macro.sub("AgileDroid", inlines)
wifi_macro = re.compile(r"\\wifi{}", re.M)
inlines = wifi_macro.sub("Wifi", inlines)
keep_together = re.compile(r"~", re.M)
inlines = keep_together.sub(" ", inlines)
en_dashes = re.compile(r"([^-])--([^-])", re.M)
inlines = en_dashes.sub(u"\\1\u2013\\2", inlines)
em_dashes = re.compile(r"([^-])---([^-])", re.M)
inlines = em_dashes.sub(u"\\1\u2014\\2", inlines)
enum = re.compile(r"\\begin\{enumerate\}(.*?)\\end\{enumerate\}", re.S | re.M)
class Counter:
def __init__(self):
self.count = 0
def reset(self):
self.count = 0
def increment(self, matchObject):
self.count += 1
return str(self.count) + "."
def match(m):
c = Counter()
item = re.compile(r"\\item")
text = item.sub(c.increment, m.group(1))
c.reset()
return text
inlines = enum.sub(match, inlines)
removeitem = re.compile(r"~?\\item", re.M)
inlines = removeitem.sub("", inlines)
removeflushenumbf = re.compile(r"\\begin\{flushenumbf\}\s+(.*?)\s+\\end\{flushenumbf\}", re.S | re.M)
inlines = removeflushenumbf.sub(r"\1", inlines)
removebeginabstract = re.compile(r"\\begin\{abstract\}\s+(.*?)\s+\\end\{abstract\}", re.S | re.M)
inlines = removebeginabstract.sub(r"\1", inlines)
lines = re.split(r'\s{2,}', inlines)
while re.match(lines[0], r"^\s*$"):
lines = lines[1:]
if len(lines) == 0:
return ""
while re.match(lines[-1], r"^\s*$"):
lines = lines[:-1]
if len(lines) == 0:
return ""
output = '\n\n'.join([re.sub(r'\n', ' ', line) for line in lines])
return output

65
pytex/bin/number Executable file
View File

@ -0,0 +1,65 @@
#!/usr/bin/env python
from optparse import OptionParser
import sys, subprocess, time, re, shlex, tempfile, os
parser = OptionParser()
parser.add_option("-s", "--skip", dest="skip", type=int, default=0, help="number of initial pages to skip (default 0)")
parser.add_option("-a", "--avoid", dest="avoid", type=str, default="", help="pages to avoid, comma separated (default \"\")")
(options, args) = parser.parse_args()
avoid = options.avoid.split(",")
avoid = [int(a) for a in avoid]
infile = args[0]
outfile = args[1]
ininfo = subprocess.Popen("pdfinfo \"%s\"" % (infile), shell=True, stdout=subprocess.PIPE).communicate()[0]
origpages = int(re.search(r'Pages:\s+(\d+)', ininfo).group(1))
numpages = origpages - options.skip
latexstart = r'''\documentclass[11pt]{memoir}
\usepackage{times}
\maxdeadcycles=1000
\setstocksize{11in}{8.5in}
\settrimmedsize{11in}{8.5in}{*}
\settrims{0pt}{0pt}
\setlrmarginsandblock{1in}{1in}{*}
\setulmarginsandblock{1in}{1in}{*}
\setheadfoot{0.1pt}{36pt}
\setmarginnotes{0.5cm}{1.5cm}{0.1cm}
\checkandfixthelayout
\copypagestyle{number}{headings}
\makeoddhead{number}{}{}{}
\makeevenhead{number}{}{}{}
\makeoddfoot{number}{}{\thepage}{}
\makeevenfoot{number}{}{\thepage}{}
\begin{document}
\pagestyle{number}'''
latexend = r'''\end{document}'''
startdir = os.getcwd()
tempdir = tempfile.mkdtemp()
subprocess.call("cp \"%s\" \"%s\"/A.pdf" % (infile, tempdir), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
os.chdir(tempdir)
latexfile = open('B.tex', 'w')
print >>latexfile, latexstart
for a in range(numpages):
print >>latexfile, r'''\mbox{}
\newpage'''
print >>latexfile, latexend
latexfile.close()
subprocess.Popen("pdflatex --interaction=nonstopmode B.tex", shell=True, stdout=subprocess.PIPE).communicate()[0]
subprocess.Popen(r"pdftk A.pdf burst output A%03d.pdf", shell=True, stdout=subprocess.PIPE).communicate()[0]
subprocess.Popen(r"pdftk B.pdf burst output B%03d.pdf", shell=True, stdout=subprocess.PIPE).communicate()[0]
Boffset = options.skip
for Aindex in range(origpages):
Aindex += 1
if (Aindex <= Boffset) or ((Aindex - Boffset) in avoid):
subprocess.Popen(r"cp A%03d.pdf C%03d.pdf" % (Aindex, Aindex), shell=True, stdout=subprocess.PIPE).communicate()[0]
else:
subprocess.Popen(r"pdftk A%03d.pdf background B%03d.pdf output C%03d.pdf" % (Aindex, Aindex - Boffset, Aindex), shell=True, stdout=subprocess.PIPE).communicate()[0]
subprocess.Popen(r"pdftk %s output D.pdf" % (' '.join(["C%03d.pdf" % (i + 1) for i in range(origpages)])), shell=True, stdout=subprocess.PIPE).communicate()[0]
subprocess.call("cp D.pdf \"%s\"/\"%s\"" % (startdir, outfile), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
os.chdir(startdir)

33
pytex/bin/rotateandstitch Executable file
View File

@ -0,0 +1,33 @@
#!/usr/bin/env python
from optparse import OptionParser
import sys, subprocess, time, re, shlex
parser = OptionParser()
(options, args) = parser.parse_args()
pagesize = re.compile(r'Page size:\s+(\d+)\s+x\s+(\d+)')
files = []
outpdf = args.pop(0)
for arg in args:
output = subprocess.Popen("pdfinfo \"%s\"" % (arg), shell=True, stdout=subprocess.PIPE).communicate()[0]
pagematch = pagesize.search(output)
if pagematch == None:
continue
if pagematch.group(1) < pagematch.group(2):
files.append(arg)
continue
infile = arg
outfile = re.sub(r'.pdf', '.rotate.pdf', infile)
output = subprocess.Popen("pdftk \"%s\" cat 1-endW output \"%s\"" % (infile, outfile), shell=True, stdout=subprocess.PIPE).communicate()[0]
files.append(outfile)
def order(pdf):
nummatch = re.search(r'(\d+)', pdf)
if nummatch == None:
return 0
else:
return int(nummatch.group(1))
files = sorted(files, key=order)
output = subprocess.Popen("pdftk %s cat output \"%s\"" % (' '.join(['"%s"' % (file) for file in files]), outpdf), shell=True, stdout=subprocess.PIPE).communicate()[0]

112
pytex/bin/texincludes Executable file
View File

@ -0,0 +1,112 @@
#!/usr/bin/env python
import sys,re,glob,StringIO,os,tempfile,filecmp,shutil
from optparse import OptionParser
parser = OptionParser()
(options, args) = parser.parse_args()
if len(args) < 1:
sys.exit(1)
files = glob.glob("*.tex")
if len(files) == 0:
sys.exit(0)
outfile = tempfile.NamedTemporaryFile(delete=False)
docfile = re.compile(r"""(?m)^(?!\s*%).*\\begin\{document\}""")
inputs = re.compile(r"""(?m)^(?!\s*%).*\\input{(.*)}""")
bibs = re.compile(r"""(?m)^(?!\s*%).*\\bibliography\{(.*)\}""")
citations = re.compile(r"""^(?m)^(?!\s*%).*\\(?:no)?cite""")
graphics = re.compile(r"""(?m)^(?!\s*%).*\\includegraphics(\[.*?\])?\{(.*?)\}""")
withpdf = re.compile(r"^.*\.pdf$")
nobibtex = re.compile(r"""(?m)^% !NOBIBTEX!""")
nobibtexs = {}
output = StringIO.StringIO()
allnames = []
for f in files:
lines = open(f, "r").read()
if not docfile.search(lines):
continue
input_files = []
bib_files = []
graphic_files = []
toprocess = [f]
docitations = False
dontbibtex = False
fbasename = os.path.splitext(f)[0]
while len(toprocess) > 0:
try:
lines = open(toprocess[0], "r").read()
if nobibtex.search(lines):
nobibtexs[toprocess[0]] = True
else:
nobibtexs[toprocess[0]] = False
if len(citations.findall(lines)) > 0:
docitations = True
inputs = inputs.findall(lines)
real_inputs = []
for possible_input in inputs:
if os.path.splitext(possible_input)[1] == '':
possible_input += '.tex'
real_inputs.append(possible_input)
toprocess += real_inputs
b = bibs.finditer(lines)
for m in b:
allbibs = m.group(1).split(",")
for bib in allbibs:
bib_files.append(bib + ".bib")
g = graphics.finditer(lines)
for m in g:
if withpdf.match(m.group(2)):
graphic_files.append(m.group(2))
else:
path, ext = os.splitext(m.group(2))
if ext == '':
graphic_files.append(path + ".pdf")
else:
graphic_files.append(m.group(2))
except:
True
input_files.append(toprocess.pop(0))
all_files = input_files
all_files.extend(graphic_files)
all_files.extend(bib_files)
for file in args[1:]:
all_files.append(file)
allnames.append(fbasename)
tex_files = [all_file for all_file in all_files if all_file.endswith(".tex")]
print >>output, "%s_TEXFILES=%s" % (fbasename.upper(), " ".join(tex_files),)
print >>output, "%s : LOG := %s.log" % (fbasename, fbasename)
print >>output, "%s : PDF := %s.pdf" % (fbasename, fbasename)
print >>output, "%s : $(START) %s.pdf $(END)" % (fbasename, fbasename)
print >>output, "%s.ps : %s.pdf" % (fbasename, fbasename)
print >>output, "%s.pdf %s.blg : .deps %s" % (fbasename, fbasename, " ".join(all_files))
if docitations and not nobibtexs[f]:
print >>output, "\tpdflatex -shell-escape %s" % (f)
print >>output, "\tbibtex %s" % (fbasename)
print >>output, "\tpdflatex -shell-escape %s" % (f)
print >>output, "\tpdflatex -shell-escape %s" % (f)
else:
print >>output, "\tpdflatex -shell-escape %s" % (f)
print >>output, "\tpdflatex -shell-escape %s" % (f)
print >>output, "spell-%s : %s" % (fbasename, " ".join(tex_files),)
print >>output, "\tispell %s" % (" ".join(tex_files),)
print >>outfile, output.getvalue(),
print >>outfile, "PDFS = %s" % (" ".join([n + ".pdf" for n in allnames]))
outfile.close()
if not os.path.exists(args[0]) or not filecmp.cmp(outfile.name, args[0], shallow=False):
shutil.move(outfile.name, args[0])
else:
os.unlink(outfile.name)

55
pytex/bin/wc Executable file
View File

@ -0,0 +1,55 @@
#!/usr/bin/env python
import lib
import sys, re, os
from optparse import OptionParser
parser = OptionParser()
parser.add_option("-o", "--overonly", dest="overonly", action="store_true", default=False, help="only display sections over the word count (default False)")
(options, args) = parser.parse_args()
if len(args) < 2:
sys.exit(1)
if args[0] == "-":
inlines = sys.stdin.read()
else:
try:
inlines = open(args[0], "r").read()
except:
sys.exit(1)
if args[1] == "-":
outfile = sys.stdout
else:
try:
outfile = open(args[1], "w")
except:
sys.exit(1)
clean = re.compile(r'<wc:start description="([^"]*)" max=(\d+)>(.*?)<wc:end>', re.S)
index = 1
for f in clean.finditer(inlines):
description = f.group(1)
max = int(f.group(2))
count = len(lib.clean(f.group(3)).split())
if not options.overonly or count > max:
if count > max:
char = "*"
else:
char = " "
print "%c %2d. M:%3d C:%3d %s" % (char, index, max, count, description)
index += 1
clean = re.compile(r'<cc:start description="([^"]*)" max=(\d+)>(.*?)<cc:end>', re.S)
index = 1
for f in clean.finditer(inlines):
description = f.group(1)
max = int(f.group(2))
count = len(lib.clean(f.group(3)).strip())
if not options.overonly or count > max:
if count > max:
char = "*"
else:
char = " "
print "%c %2d. M:%3d C:%3d %s" % (char, index, max, count, description)
index += 1

109
pytex/make/Makerules Normal file
View File

@ -0,0 +1,109 @@
SHELL := /bin/bash
export TEXINPUTS :=.:$(PYTEX)/cls:
# 16 Nov 2010 : GWA : Watch all .tex files below this directory to determine
# when to rebuild the dependencies.
TEXFILES = $(shell find . -name "*.tex")
# 16 Nov 2010 : GWA : Kind of a nasty hack, but we use a special Python
# script to regenerate make rules which are then loaded by the
# include below. This was the least nasty way of getting
# complex Latex dependencies to rebuild properly, while also
# enabling/disabling Bibtex as needed.
.deps: $(TEXFILES)
@$(PYTEX)/bin/texincludes .deps $(CLASS)
include .deps
%.ps : %.pdf
acroread -toPostScript $<
allclean: rulesclean
@/bin/rm -f .deps
rulesclean:
@/bin/rm -f *.dvi *.aux *.ps *~ *.log *.lot *.lof *.toc *.blg *.bbl url.sty *.out *.bak $(PDFS)
# 16 Nov 2010 : GWA : Special dummy targets below.
xxxnote:
@echo "\\newcommand{\\XXXnote}[1]{\\textcolor{red}{\\bfseries XXX: #1}}" > .xxxnote-new
@if [ -n "`diff -N 2>/dev/null .xxxnote .xxxnote-new`" ]; then\
mv .xxxnote-new .xxxnote; \
else\
rm -f .xxxnote-new; \
fi
noxxxnote:
@echo "\\newcommand{\\XXXnote}[1]{}" > .xxxnote-new
@if [ -n "`diff -N 2>/dev/null .xxxnote .xxxnote-new`" ]; then\
mv .xxxnote-new .xxxnote; \
else\
rm -f .xxxnote-new; \
fi
draft:
@echo "\\def\\isdraft{1}" > .draft-new
@if [ -n "`diff -N 2>/dev/null .draft .draft-new`" ]; then\
mv .draft-new .draft; \
else\
rm -f .draft-new; \
fi
nodraft:
@echo "" > .draft-new
@if [ -n "`diff -N 2>/dev/null .draft .draft-new`" ]; then\
mv .draft-new .draft; \
else\
rm -f .draft-new; \
fi
blue:
@echo "\\def\\isblue{1}" > .blue-new
@if [ -n "`diff -N 2>/dev/null .blue .blue-new`" ]; then\
mv .blue-new .blue; \
else\
rm -f .blue-new; \
fi
noblue:
@echo "" > .blue-new
@if [ -n "`diff -N 2>/dev/null .blue .blue-new`" ]; then\
mv .blue-new .blue; \
else\
rm -f .blue-new; \
fi
.embed.pdf: $(PDF)
gs -dSAFER -dNOPLATFONTS -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -sPAPERSIZE=letter -dCompatibilityLevel=1.4 -dPDFSETTINGS=/printer -dCompatibilityLevel=1.4 -dMaxSubsetPct=100 -dSubsetFonts=true -dEmbedAllFonts=true -sOutputFile=.embed.pdf -f $(PDF)
@cp .embed.pdf $(PDF)
embed: .embed.pdf
MISSINGREFERENCES = $(strip $(shell grep Ref $(LOG) | awk '{print substr($$4, 2, length($$4) - 2)}'))
MISSINGCITATIONS = $(strip $(shell grep Cit $(LOG) | awk '{print substr($$4, 2, length($$4) - 2)}'))
missing:
@if [ "$(MISSINGREFERENCES)" != "" ]; then\
echo "-------------------------------------------------------------";\
echo "Missing references:";\
echo "-------------------------------------------------------------";\
echo $(MISSINGREFERENCES);\
fi
@if [ "$(MISSINGCITATIONS)" != "" ]; then\
echo "-------------------------------------------------------------";\
echo "Missing citations:";\
echo "-------------------------------------------------------------";\
echo $(MISSINGCITATIONS);\
fi
missing-fail: missing
@if [ "$(MISSINGREFERENCES)" != "" ]; then false; fi
@if [ "$(MISSINGCITATIONS)" != "" ]; then false; fi
pages: $(PDF)
@pdfinfo $(PDF) 2>/dev/null | grep "Pages" | awk '{print "$(PDF)", $$2;}'
# 16 Nov 2010 : GWA : Phony targets.
.PHONY : pages rulesclean missing-fail missing xxxnote noxxxnote draft nodraft blue noblue clean allclean all figures wc

39
remreset.sty Normal file
View File

@ -0,0 +1,39 @@
% remreset package
%%%%%%%%%%%%%%%%%%
% Copyright 1997 David carlisle
% This file may be distributed under the terms of the LPPL.
% See 00readme.txt for details.
% 1997/09/28 David Carlisle
% LaTeX includes a command \@addtoreset that is used to declare that
% a counter should be reset every time a second counter is incremented.
% For example the book class has a line
% \@addtoreset{footnote}{chapter}
% So that the footnote counter is reset each chapter.
% If you wish to bas a new class on book, but without this counter
% being reset, then standard LaTeX gives no simple mechanism to do
% this.
% This package defines |\@removefromreset| which just undoes the effect
% of \@addtorest. So for example a class file may be defined by
% \LoadClass{book}
% \@removefromreset{footnote}{chapter}
\def\@removefromreset#1#2{{%
\expandafter\let\csname c@#1\endcsname\@removefromreset
\def\@elt##1{%
\expandafter\ifx\csname c@##1\endcsname\@removefromreset
\else
\noexpand\@elt{##1}%
\fi}%
\expandafter\xdef\csname cl@#2\endcsname{%
\csname cl@#2\endcsname}}}

1
sections/0-abstract.tex Normal file
View File

@ -0,0 +1 @@
Embedded database engines such as SQLite are now found in most major operating systems, where they serve as a persistence layer for user-facing applications. This is especially true for modern mobile operating systems like Android. The performance of these engines directly impacts the power-consumption and response time of user-facing applications and the devices on which they are deployed. It is now more important than ever that we understand how such applications interact with their embedded databases, the environment in which these applications are run, and other factors such as power consumption, which impact and are impacted by database performance. In this paper, we present the results of a long-running case study, tracing SQLite access patterns and run-time characteristics for applications on Android smart phones. We outline our findings, present key features that distinguish user-facing smart phone database workloads from canonical server workloads, and propose the foundational characteristics of a benchmarking suite for mobile device databases based on our findings.

View File

@ -0,0 +1,27 @@
Since the introduction of the smartphone, mobile computing has become pervasive in our society, with one in every five people in the world owning a smartphone~\cite{phones}. Mobile devices, however,
have evolved far beyond the stereotypical smartphone and tablet, and are now employed in a wide variety of domains. Of the currently available mobile systems, Android has seen the most widespread deployment outside of the consumer electronics market. Android's open source nature has prompted its ubiquitous adoption in sensing, medical, command and control, robotics, and automotive applications.
For each of these domains, Android supports specific client-side applications that often utilize application-resident, or embedded databases such as SQLite~\cite{sqlite} to provide necessary functionality. Even Android
itself, specifically the framework layer, makes use of embedded databases.
The way in which mobile applications use databases, is however, rather different from traditional server farms and desktop applications that focus on ``big data.''
Based on the experiments we present in this paper, SQLite on \textit{one} average Android smart-phone satisfies over 178 thousand database requests \textit{per day}, or about 2 requests every second.
As such, client-side mobile applications represent a new and rapidly growing class of \textit{pocket-scale} database consumers. The execution context for these client-side application workloads varies wildly. This means that the performance of the database is not only dependent on the query workload being executed by the client-side application, but is also impacted by other applications that may have access to the database, other applications currently using system resources ({\em e.g.} processor, memory,
file system), and the overall energy available and power profile of the system itself ({\em e.g.} voltage scaling, power governors, OS policies).
The challenges that arise in handling pocket-scale data --- that is, minimizing power consumption, latency, and io --- are familiar ground for database practitioners, but the trade-offs and workload characteristics are far less well understood. In this paper, we present the results of a one-month trace of SQLite activity on 11 Android smartphones, part of a real-world experimental testbed called PhoneLab~\cite{phonelab}. We show that the workload characteristics encountered by SQLite on these phones differ substantially from the characteristics of database workloads expressed in popular database benchmarking suites. We argue that a new benchmark for mobile and embedded databases could spur innovation in this area, and outline the workload characteristics of such a benchmark.
\noindent The contributions of this paper are the following:
\begin{itemize}
\item A detailed examination of SQLite~\cite{sqlite} usage in a real-world mobile context. We compare our gathered results on mobile application and Android framework usage of SQLite to tradition TPC benchmark
workloads.
\item The synthesis of an open source data set gathered over one month of "in the wild" execution of database queries using eleven test subjects and their personal smartphones. Data was gathered using PhoneLab~\cite{phonelab}.
\item An outline of workload characteristics and other desiderata for a proposed TPC-MOBILE benchmark.
\end{itemize}
The remainder of the paper is organized as follows.
We present our motivation observations in Section~\ref{sec:overview}. We outline our experimental setup and the process of obtaining data from 11 primary-use smart phones in
Section~\ref{sec:experimental}. We discuss relevant
workloads based on query complexity, and database activity in Section~\ref{sec:queryc} and Section~\ref{sec:dba} respectively. Based on this
detailed analysis and a review of related work, we sketch the requirements for a TPC-MOBILE benchmark in Section~\ref{sec:pocketdata}. We wrap up with conclusions in Section~\ref{sec:conc}.

10
sections/2-overview.tex Normal file
View File

@ -0,0 +1,10 @@
Our primary observation is that an embedded database workload in a modern mobile device includes a mix of both OLTP and OLAP characteristics. The majority of operations performed by SQLite are simple key-value manipulations and look-ups. However, a substantial fraction of the (comparatively read-heavy) workload consists of far more complex OLAP-style operations involving wide, multi-table joins, nested sub-queries, complex selection predicates, and aggregation.
Many of these workload characteristics are motivated by factors unique to embedded databases. For example, SQLite uses single-file databases that have a standard, platform-independent format. As a consequence, it is common to see entire databases, indexes and all, transported in their entirety through web downloads or as attachments to other files~\cite{Dit2015CIDR}. A common pattern we observed was for a cloud service to package a fragment of its state into a SQLite database, which could then be cached locally on the device for lower-latency and offline access.
Optimization targets also differ substantially. Latency is a primary concern, but at vastly different scales. Over our one-month trial, the average SQL statement took 2 ms to evaluate, and even complex \texttt{SELECT} queries with 4-level deep nesting only took an average of 120 ms. %Flash wear~\cite{SIGMOD2014?} is also a concern.
Finally, unlike typical server-class benchmark workloads where throughput is a key factor, embedded databases have fixed, ``small data"~\cite{Dit2015CIDR} workloads and need to share computing resources fairly with other processes on the same device. This means that in stark contrast to server-class workloads, the database is idle more frequently. Periods of low-utilization are opportunities for background optimization, but must be managed against the needs of other applications running on the device, as well as the device's limited power budget. We use the term ``pocket data" to refer to data management settings that exhibit such characteristics.
Pocket data workloads represent a growing, and extremely important class of database consumers. Unfortunately, research and development on embedded databases (\textit{e.g.},~\cite{jeong2013iostack,kang2013xftl}) is presently obligated to rely on micro-benchmarks or anecdotal observations about the needs and requirements of embedded database engines. In this paper, we lay out the characteristics of a one month trace of SQLite operations performed on eleven Android smartphones participating in the PhoneLab experimental platform~\cite{phonelab}. We believe that a new TPC-MOBILE benchmark that captures these characteristics can provide a principled, standardized way to evaluate advances in mobile database technology, which will in turn, help to drive the development of such advances.

View File

@ -0,0 +1,10 @@
\subsection{PhoneLab}
PhoneLab~\cite{phonelab} is a large, programmable smartphone testbed deployed at the the University at Buffalo. PhoneLab provides researchers with limited access to deploy experiments to participants' smartphones, and provides participants with incentives to participate in these experiments. As of June 2015, PhoneLab had 290 total participants, all of whom were using Nexus 5 smartphones running an instrumented branch of Android 4.4.4. The age of these participants vary between 18-70 with a majority of them between 21-40. The gender distribution is approximately 50-50. Participating smartphones log experimental results, which are collected by a centralized service every 24 hours, where they are made available to authorized researchers.
% These logs are then studied and analysed to obtain results such as the ones this script talks about.
\subsection{Data Collection and Analysis}
Our trace data-set is drawn from log data collected by PhoneLab. 11 PhoneLab participants willingly released\footnote{\url{https://phone-lab.org/static/experiment/sample_dataset.tgz}} complete trace data for their phone for the month of March 2015, a period of 31 days. The publicly released trace data includes a log of all statements evaluated by SQLite\footnote{All features are documented at \url{https://phone-lab.org/experiment/data/}}, as well as the calling application and statement run time. Personally identifying information has been stripped out, and prepared statement arguments are only provided as hash values. Of the eleven participants, seven had phones that were participating in the SQLite experiment every day for the full month. The remaining phones were active for 1, 3, 14, and 19 days respectively. A total of 254 phone/days of data were collected. The collected data includes 45,399,550 SQL statements. Of these, we were unable to interpret 308,752 statements (representing slightly over half a percent of the trace) due to a combination of data corruption and the use of unusual SQL syntax. Results presented in this paper are based on the 45,090,798 queries that we were able to successfully parse.

230
sections/4-queryc.tex Normal file
View File

@ -0,0 +1,230 @@
\begin{figure}
\centering
\input{tables/query_breakdown}
\caption{Types and numbers of SQL statements executed during the trace, and query features used in each.}
\label{fig:breakdownByCatAndFeature}
\end{figure}
In this section we discuss the query complexity we observed during our study and illustrate typical workloads over pocket data.
Figure~\ref{fig:breakdownByCatAndFeature} summarizes all 45 million statements executed by SQLite over the 1 month period. As might be expected, \texttt{SELECT} forms almost three quarters of the workload by volume. \texttt{UPSERT} statements (\textit{i.e.}, \texttt{INSERT OR REPLACE}) form a similarly substantial 16\% of the workload --- more than simple \texttt{INSERT} and \texttt{UPDATE} statements combined. Also of note is a surprising level of complexity in \texttt{DELETE} statements, many of which rely on nested sub-queries when determining which records to delete.
\begin{figure}
\centering
\begin{tabular}{ccc}
\begin{tabular}{c|c}
\textbf{Client App} & \textbf{Statements Executed} \\ \hline
Google Play services & 14,813,949 \\
Media Storage & 13,592,982 \\
Gmail & 2,259,907 \\
Google+ & 2,040,793 \\
Facebook & 1,272,779 \\
Hangouts & 974,349 \\
Messenger & 676,993 \\
Calendar Storage & 530,535\\
User Dictionary & 252,650 \\
Android System & 237,154\\
\end{tabular}
& &
\begin{tabular}{c|c}
\textbf{Client App} & \textbf{Statements Executed} \\ \hline
Weather & 12 \\
Speedtest & 11 \\
KakaoStory & 8 \\
MX Player Pro & 4 \\
Quickoffice & 4\\
VLC & 4\\
Barcode Scanner & 2\\
Office Mobile & 2\\
PlayerPro & 2\\
KBS kong & 2 \\
\end{tabular}
\\
(a) & (b)
\end{tabular}
\caption{Apps that executed the (a) 10 most and (b) 10 fewest number of SQL statements.}
\label{fig:topBottom10Apps}
\end{figure}
Figure~\ref{fig:topBottom10Apps} shows the 10 most frequent and 10 least frequent clients of SQLite over the one month trace. The most active SQLite clients include internal android services that broker access to data shared between apps such as personal media, calendars and address books, as well as pre-installed and popular social media apps. There is less of a pattern at the low end, although several infrequent SQLite clients are themselves apps that may be used only infrequently, especially on a phone-sized device. We suspect that the distribution of apps would differ significantly for a tablet-sized device.
\subsection{Database Reads}
\begin{figure}
\centering
\begin{tabular}{c c}
\includegraphics[width=0.49\textwidth]{graphs/select_breakdown_by_width}\ &
\ \includegraphics[width=0.49\textwidth]{graphs/select_breakdown_by_nesting}\\
(a) & (b) \\
\end{tabular}
\caption{\texttt{SELECT} queries grouped by (a) number of tables accessed and (b) maximum nesting depth.}
\label{fig:coarseSelectComplexity}
\end{figure}
Of the 45 million queries analyzed, 33.47 million were read-only SELECT queries.
Figure~\ref{fig:coarseSelectComplexity} shows the distribution of \texttt{SELECT} queries by number of tables accessed by the query, as well as the maximum level of query nesting. Nesting includes from-nesting (\textit{e.g.}, \texttt{SELECT \ldots\ FROM (SELECT \ldots)}), as well as expression-nesting (\textit{e.g.}, \texttt{SELECT \ldots\ WHERE EXISTS (SELECT \ldots)}).
Even at this coarse-grained view of query complexity, the read-only portion of the embedded workload distinguishes itself from existing TPC benchmarks.
Like TPC-C~\cite{tpcc}, the vast majority of the workload involves simple, small requests for data that touch a small number of tables.
29.15 million, or about 87 percent of the \texttt{SELECT} queries were simple select-project-join queries. Of those, 28.72 million or about 86 percent of all queries were simple single-table scans or look-ups. In these queries, which form the bulk of SQLite's read workload, the query engine exists simply to provide an iterator over the relationally structured data it is being used to store.
Conversely, the workload also has a tail that consists of complex, TPC-H-like~\cite{tpch} queries. Several hundred thousand queries involve at least 2 levels of nesting, and over a hundred thousand queries access 5 or more tables. In 10 instances of a similar query, attributable to the Google Play Games Service\footnote{\url{https://developers.google.com/games/services/}}, a single \texttt{SELECT} query accesses as many as 8 distinct tables to combine together developer-provided game state, user preferences, device profile meta-data, and historical game-play results from the user.
\subsubsection{Simple \texttt{SELECT} Queries}
\begin{figure}
\centering
\input{tables/spjsort_by_width_and_where}
\caption{Number of simple look-up queries subdivided by join width (number of tables) and number of conjunctive terms in the \texttt{WHERE} clause.}
\label{fig:spjsByWidthAndWhere}
\end{figure}
We next examine more closely a class of \textit{simple look-up} queries, defined as any \texttt{SELECT} query that consists exclusively of selections, projections, joins, limit, and order by clauses, and which does not contain any nested sub-queries or unions.
Figure~\ref{fig:spjsByWidthAndWhere} shows queries of this class, broken down by the number of tables involved in the query (Join Width) and the complexity of the where clause, as measured in number of conjunctive terms (Where Clauses). For example, consider a query of the form:
\texttt{SELECT R.A FROM R, S WHERE R.B = S.B AND S.C = 10}
This query would have a join width of 2 (\texttt{R}, \texttt{S}) and 2 conjunctive terms (\texttt{R.B = S.B} and \texttt{S.C = 10}). For uniformity, \texttt{NATURAL JOIN} and \texttt{JOIN ON} (\textit{e.g.}, \texttt{SELECT R.A from R JOIN S ON B}) expressions appearing in the \texttt{FROM} clause are rewritten into equivalent expressions in the \texttt{WHERE} clause.
The first column of this table indicates queries to a single relation. Just over 1 million queries were full table scans (0 where clauses), and just under 27 million queries involved only a single conjunctive term. This latter class constitutes the bulk of the simple query workload, at just over 87 percent of the simple look-up queries. Single-clause queries appear to be the norm. Recall that an N-way equi-join requires N-1 conjunctive terms; Spikes occur in the number of queries with one more term than strictly required to perform a join, suggesting a constraint on at least one relation.
\begin{figure}
\centering
\input{tables/sp_trivial_condition_breakdown}
\caption{The \texttt{WHERE} clause structure for single-tabled simple lookup queries with a single conjunctive term in the \texttt{WHERE} clause.}
\label{fig:singleClauseExpressions}
\end{figure}
Narrowing further, we examine simple look-up queries referencing only a single source table and a single conjunctive term in the WHERE clause. Figure~\ref{fig:singleClauseExpressions} summarizes the structure of the predicate that appears in each of these queries. In this figure, constant terms (Const) are any primitive value term (\textit{e.g.}, a quoted string, an integer, or a float), or any JDBC-style parameter ($?$). For simple relational comparators, we group together \textit{in}equalities (\textit{i.e.}, $<$, $\leq$, $>$, $\geq$ and $\neq$) under the symbol $\theta$, and explicitly list equalities. Other relational operators such as \texttt{LIKE}, \texttt{BETWEEN}, and \texttt{IN} are also seen with some frequency. However, the majority of look-ups (85\% of all simple look-ups) are exact match look-ups.
Not surprisingly, this suggests that the most common use-case for SQLite is as a relational key-value store. As we show shortly through a per-app analysis of the data (Section~\ref{sec:select:perapp}), 24 out of the 179 apps that we encountered posed no queries other than exact look-ups and full table scans.
\subsubsection{Other \texttt{SELECT} Queries}
\begin{figure}
\centering
\input{tables/select_condition_breakdown}
\caption{WHERE clause expression structures, and the number of SELECT queries in which the structure appears as a conjunctive clause.}
\label{fig:allSelectConditionBreakdown}
\end{figure}
Figure~\ref{fig:allSelectConditionBreakdown} shows a similar breakdown for all 33.5 million \texttt{SELECT} queries seen. As before, the table shows the form of all expressions that appear as one of the conjunctive terms of a \texttt{WHERE} clause, alongside the number of queries where the expression appears. 31.0 million of these queries contain an exact lookup.
1.6 million queries contain at least one multi-attribute equality expression such as an equijoin constraint, lining up nicely with the 1.7 million queries that reference at least two tables.
App developers make frequent use of SQLite's dynamic typing: Where clauses include bare column references (\textit{e.g.}, \texttt{WHERE A}, implicitly equivalent to \texttt{WHERE A <> 0}) as well as bare bit-wise AND expressions (\textit{e.g.}, \texttt{A\&0xc4}). This latter predicate appearing in a half-million queries indicates extensive use of bit-arrays packed into integers.
\subsubsection{Functions}
\begin{figure}
\centering
\input{tables/select_functions}
\caption{Functions appearing in SELECT queries, sorted by number of times the function is used.}
\label{fig:selectFunctions}
\end{figure}
Functions extend the basic SQL syntax, providing for both specialized local data transformations, as well as computation of aggregate values. Figure~\ref{fig:selectFunctions} shows all functions appearing in \texttt{SELECT} queries during our trace, organized by the number of times that each function is used.
All functions that we saw are either built-in SQLite functions, or in the case of \texttt{PHONE\_NUMBERS\_EQUAL} are Android-specific extensions; No user-defined functions appeared in the trace.
Overall, the most common class of function was aggregate functions (\textit{e.g.}, \texttt{SUM}, \texttt{MAX}, \texttt{COUNT}), followed by string operations (\textit{e.g.}, \texttt{LENGTH} and \texttt{SUBSTR}).
The most commonly used function was \texttt{GROUP\_CONCAT}, an aggregate operator that constructs a string by concatenating its input rows. This is significant, as it means that the most commonly used aggregate operator is holistic --- its output size is linear in the number of input rows.
\subsubsection{Per-Application Analysis}
\label{sec:select:perapp}
\begin{figure}
\centering
\begin{tabular}{cc}
\includegraphics[width=0.49\textwidth]{graphs/select_count_cdf_by_app} &
\includegraphics[width=0.49\textwidth]{graphs/select_percent_simple_cdf_by_app} \\
(a) & (b)
\end{tabular}
\caption{Breakdown of \texttt{SELECT} queries by app. (a) Cumulative distribution of applications by the number of \texttt{SELECT} queries issued (note the logarithmic scale). (b) Cumulative distribution of applications by the percent of the app's \texttt{SELECT} queries that are full table scans or exact look-ups.}
\label{fig:selectByApp}
\end{figure}
We next break the \texttt{SELECT} workload down by the calling application (app).
Due to limitations of the logging infrastructure, 4.32 million queries (just over 12.9\% of the workload) could not be associated with a specific application, and our app-specific analysis excludes these queries.
Additionally, system services in Android are often implemented as independent apps and counted as such in the numbers presented.
Over the course of the one-month trace we observed 179 distinct apps,, varying from built-in android applications such as \textit{Gmail} or \textit{YouTube} to video players such as \textit{VLC} to games such as \textit{3 Kingdoms}. Figure~\ref{fig:selectByApp}.a shows the cumulative distribution of apps sorted by the number of queries that the app performs. The results are highly skewed, with the top 10\% of apps each posing more than 100 thousand queries over the one month trace, or an average of about 1 query every 4 minutes on any given phone. The most query-intensive system service, \textit{Media Storage} was responsible for 13.57 million queries or just shy of 40 queries per minute per phone. The most query-intensive user-facing app was \textit{Google+}, which performed 1.94 million queries over the course of the month or 5 queries per minute.
At the other end of the spectrum, the bottom 10\% of apps posed as few as 30 queries over the entire month.
We noted above that a large proportion of \texttt{SELECT} queries were exact look-ups, suggesting that many applications running on the device might be using SQLite as a simple key-value store. This suggestion was confirmed in our app-level analysis. For example, approximately half of one specific app's query workload consisted of the following two queries:
\begin{verbatim}
INSERT OR REPLACE INTO properties(property_key,property_value) VALUES (?,?);
SELECT property_value FROM properties WHERE property_key=?;
\end{verbatim}
Note that \texttt{?} is a prepared statement parameter, which acts as a placeholder for values that are bound when the prepared statement is evaluated.
To broaden the scope of our search for key/value queries, we define a key-value look-up query as a \texttt{SELECT} query over a single relation that either performs a full table scan, or performs an exact look-up on a single attribute.
Figure~\ref{fig:selectByApp}.b shows the cumulative distribution of apps sorted by the percent of its queries that are key-value lookup queries. For 24 apps (13.4\%), we observed only key-value queries during the entire, month-long trace.
% Adobe Reader, Barcode Scanner, BuzzFeed, Candy Crush Saga, Discover, Evernote, Foursquare, GPS Status, Google Play Newsstand, Google Sky Map, KBS kong, LTE Discovery, MX Player Pro, Muzei, My Tracks, Office Mobile, PayPal, Quickoffice, SignalCheck Lite, Titanium Backup, TuneIn Radio Pro, VLC, Weather, Wifi Analyzer
\subsection{Database Writes}
Write statements, \texttt{INSERT}, \texttt{INSERT OR REPLACE} (here abbreviated as \texttt{UPSERT}), \texttt{UPDATE}, and \texttt{DELETE}, together constitute 11.6 million statements or about 25\% of the trace. As shown in Figure~\ref{fig:breakdownByCatAndFeature}, the most prevalent operation is the \texttt{UPSERT}. \texttt{INSERT} and \texttt{UPSERT} together account for 9.3 million operations, of which 7.4 are \texttt{UPSERT}s. In many of these cases, the use of \texttt{UPSERTS} appears to be defensive programming on the part of wrapper libraries that make use of SQLite (\textit{e.g.}, Object Relational Mappers, or ORMs). \texttt{UPSERTS} are also the canonical form of update in key-value stores, further supporting the argument that a large fragment of SQLite's traffic is based on key-value access patterns.
%\ask{At what frequency do upserts need to replace? Unfortunately, we don't have a number of rows modified to figure this out.}
\subsubsection{\texttt{DELETE} Statements}
\begin{figure}
\centering
\input{tables/delete_condition_breakdown}
\caption{\texttt{WHERE} clause expression structures, and the number of \texttt{DELETE} statements in which the structure appears.}
\label{fig:allDeleteConditionBreakdown}
\end{figure}
The trace includes 1.25 million \texttt{DELETE} statements. This was by far the most expensive class of statement, with an average \texttt{DELETE} taking just under 4 ms to complete. A significant portion of this cost is attributable to the use of \texttt{DELETE} as a form of bulk erasure. 323 thousand \texttt{DELETE}s have no exact match condition in their WHERE clause, while 528 thousand do include a range predicate.
\texttt{DELETE} predicates can become quite complex; 46,122 \texttt{DELETE}s (just under 3.7 percent) use nested \texttt{SELECT} queries, and touch as many as 7 separate tables (in 616 cases).
This suggests extensive use of \texttt{DELETE} as a form of garbage-collection or cache invalidation, where SQL is used to express the corresponding deletion policy.
%\ask{Suggestion is one thing... how might we validate the claim that DELETE is used for cache invalidation?}
\subsubsection{\texttt{UPDATE} Statements}
\begin{figure}
\centering
\input{tables/update_condition_breakdown}
\caption{\texttt{WHERE} clause expression structures, and the number of \texttt{UPDATE} statements in which the structure appears.}
\label{fig:allUpdateConditionBreakdown}
\end{figure}
Slightly over 1 million statements executed by SQLite over the course of the month were \texttt{UPDATE} statements. Figure~\ref{fig:allUpdateConditionBreakdown} breaks down the predicates used to select rows to be updated. Virtually all \texttt{UPDATE} statements involved an exact look-up. Of the million updates, 28 thousand did not include an exact look-up.
193 of the \texttt{UPDATE} statements relied on a nested \texttt{SELECT} statement as part of their \texttt{WHERE} clause, including 56 that involved 2 levels of nesting. Of the 193 \texttt{UPDATE}s with nested subqueries, 25 also involved aggregation.
Although the \texttt{WHERE} clause of the updates included a variety of expressions, \textit{every single setter} in every \texttt{UPDATE} statement in the trace assigned a constant value, as in the following statement:
\begin{verbatim}
UPDATE ScheduledTaskProto SET value=?,key=?,sortingValue=? WHERE key = ?;
\end{verbatim}
Not a single \texttt{UPDATE} expression attempted to compute new values in the SQL space, suggesting a strong preference for doing so in the application itself. This is not entirely unexpected, as the database lives in the address space of the application, minimizing the round-trip latency of first performing a \texttt{SELECT} to read values out of the database, followed by an \texttt{UPDATE} to write out the changes. However, it also hints that database objects might be getting cached at the application layer unnecessarily, and that language primitives that couple imperative programming languages with declarative query languages (e.g., StatusQuo~\cite{cheung2013statusquo} or Truffle~\cite{wimmer2012truffle}) could provide a significant benefit to mobile developers.
% \begin{figure}
% \centering
% \includegraphics[width=\textwidth]{graphs/update_target_cols}
% \caption{Frequency with which \texttt{UPDATE} statements update a given number of rows.}
% \label{fig:colsUpdated}
% \end{figure}
% Figure~\ref{fig:colsUpdated} breaks down \texttt{UPDATE} statements by the number of columns updated by the statement. Under the hypothesis that SQLite is being used as a pure key-value store, we might expect a strong skew in favor of
\subsubsection{Per-Application Analysis}
\begin{figure}
\centering
\begin{tabular}{cc}
\includegraphics[width=0.49\textwidth]{graphs/data_mod_ops_cdf_by_app} &
\includegraphics[width=0.49\textwidth]{graphs/read_write_ratio_cdf_by_app} \\
(a) & (b)
\end{tabular}
\caption{App-level write behavior. (a) Cumulative distribution of applications by number of data manipulation statements performed (note the logarithmic scale). (b) Cumulative distribution of applications by read/write ratio. }
\label{fig:updateByApp}
\end{figure}
Figure~\ref{fig:updateByApp}.a illustrates app-level write workloads, sorting applications by the number of \texttt{INSERT}, \texttt{UPSERT}, \texttt{UPDATE}, and \texttt{DELETE} operations that could be attributed to each. The CDF is almost perfectly exponential, suggesting that the number of write statements performed by any given app follows a long-tailed distribution, a feature to be considered in the design of a pocket data benchmark.
Figure~\ref{fig:updateByApp}.b breaks apps down by their read/write ratio. Surprisingly, 25 apps (14\% of the apps seen) did not perform a single write over the course of the entire trace. Manual examination of these apps suggested two possible explanations. Several apps have reason to store state that is updated only infrequently. For example, \textit{JuiceSSH} or \textit{Key Chain} appear to use SQLite as a credential store. A second, far more interesting class of apps includes apps like \textit{Google Play Newsstand}, \textit{Eventbrite}, \textit{Wifi Analyzer}, and \textit{TuneIn Radio Pro}, which all have components that query data stored in the cloud. We suspect that the cloud data is being encapsulated into a pre-constructed SQLite database and being pushed to, or downloaded by the client applications.
This type of behavior might be compared to a bulk ETL process or log shipment in a server-class database workload, except that here, the database has already been constructed. Pre-caching through database encapsulation is a unique feature of embedded databases, and one that is already being used in a substantial number of apps.
% Barcode Scanner, BharatMatrimony, CamCard, CityMaps2Go Pro, Discover, Download Manager, Eventbrite, GPS Status, Google Play Newsstand, JuiceSSH, KBS kong, Key Chain, LTE Discovery, MX Player Pro, My Tracks, PlayerPro, Pushbullet, Quickoffice, SignalCheck Lite, Sound Search for Google Play, Splitwise, TuneIn Radio Pro, VLC, WeChat, Wifi Analyzer

0
sections/5-dba.tex Normal file
View File

37
sections/6-pocketdata.tex Normal file
View File

@ -0,0 +1,37 @@
In spite of the prevalence of mobile devices, relatively little attention has been paid to pocket-scale data management. We believe that this is, in large part, due to the lack of a common, overarching mechanism to evaluate potential solutions to known challenges in the space. In this section, we first explore some existing research on mobile databases, and in particular focus on how the authors evaluate their solutions. Then, we turn to existing benchmarking suites and identify specific disconnects that prevent them from being applied directly to model pocket data. In the process, we also explore aspects of these benchmarks that could be drawn into a benchmark better suited to pocket data.
\subsection{Pocket Data Management}
Kang et. al.~\cite{kang2013xftl} explored the design of a flash-aware transactional layer called X-FTL, specifically targeting limitations of SQLite's undo/redo logging on mobile devices. To evaluate their work, the authors used the TPC-C benchmark in conjunction with a series of micro-benchmarks that evaluate the file system's response to database write operations. This workload is appropriate for their target optimizations. However, as we discuss below, TPC-C is not sufficiently representative of a pocket data workload to be used as a general-purpose mobile database benchmark.
Jeong et. al.~\cite{jeong2013iostack} noted similar limitations in SQLite's transactional layer, and went about streamlining the IO-stack, again primarily for the benefit of mobile devices. Again, micro-benchmarks played a significant role in the author's evaluation of their work. Additionally, to evaluate their system's behavior under real-world conditions, the authors ran the \textit{Twitter} and \textit{Facebook} apps, simulating user behavior by replaying a mobility trace generated by MobiGen~\cite{ahmed2009mobigen}. This is perhaps the most representative benchmarking workload that we encountered in our survey of related work. However, it too could be improved.
In our traces, Facebook and Twitter do represent a substantial contribution to the database workload of a typical smartphone, but still perform orders of magnitude less work with SQLite than built-in apps and system services.
Many of the same issues with IO and power management that now appear in mobile phones have also historically arisen in sensor networks. Madden et. al.'s work on embedded databases with TinyDB~\cite{madden2005tinydb} is emblematic of this space, where database solutions are driven by one or more specific target application domains. Naturally, evaluation benchmarks and metrics in sensor networks are typically derived from, and closely tied to the target domain --- for example distributed event monitoring in the case of TinyDB.
\subsection{Comparison to Existing Benchmarks}
Given the plethora of available benchmarking software, it is reasonable to ask what a new benchmark for pocket-scale data management brings to the table. We next compare the assumptions and workload characteristics behind a variety of popular benchmarking suites against a potential TPC-MOBILE, and identify concerns that this benchmark would need to address in order to accurately capture the workload characteristics that we have observed.
\subsubsection{Existing Mobile Benchmarks and Data Generators}
Although no explicit macro-benchmarks exist for mobile embedded databases, we note two benchmark data generators that do simulate several properties of interest: AndroBench~\cite{kim2012androbench} and MobiGen~\cite{ahmed2009mobigen}. AndroBench is a micro-benchmark capable of simulating the IO behavior of SQLite under different workloads. It is primarily designed to evaluate the file-system supporting SQLite, rather than the embedded database itself. However, the structure of its micro-benchmark workloads can just as effectively be used to compare two embedded database implementations.
The second benchmark, MobiGen has little to do with data management directly. Rather, it generates realistic traces of environmental inputs (\textit{e.g.}, signal strength, accelerometer readings, \textit{etc}\ldots), simulating the effects of a phone being carried through a physical space. Replaying these traces through a virtual machine running a realistic application workload could generate realistic conditions (\textit{e.g.}, as in the evaluation of X-FTL~\cite{jeong2013iostack}). However, it does not simulate the effects of user interactions with apps running on the device, something that TPC-MOBILE must be able to do.
\subsubsection{TPC-C}
One macro-benchmark suite that bears a close resemblance to the trace workload is TPC-C~\cite{tpcc}, which simulates a supply-chain management system. It includes a variety of transactional tasks ranging from low-latency user interactions for placing and querying orders, to longer-running batch processes that simulate order fulfilment. A key feature of this benchmark workload is the level of concurrency expected and required of the system. Much of the data is neatly partitioned, but the workload is designed to force a non-trivial level of cross-talk between partitions, making concurrency a bottleneck at higher throughputs. Conversely, mobile SQLite databases are isolated into specialized app-specific silos. In our experiments, throughput remained at very manageable levels from a concurrency standpoint. The most intensive database user, \textit{Google Play services} had 14.8 million statements attributable to it, just under half of which were writes. This equates to about one write every 3 seconds, which is substantial from a power management and latency perspective, but not from the standpoint of concurrency.
\subsubsection{YCSB}
We observed many applications using SQLite as a simple key/value store. Indeed, 13\% of the applications we observed had a read workload that consisted exclusively of key/value queries, and over half of the applications we observed had a workload that consisted of at least 80\% key/value queries.
The Yahoo Cloud Services benchmark~\cite{ycsb} is designed to capture a variety of key/value query workloads, and could provide a foundation for a pocket-scale data benchmark in this capacity. However, it would need to be extended with support for more complex queries over the same data.
\subsubsection{Analytics}
These more complex queries include multiple levels of query nesting, wide joins, and extensive use of aggregation. As such, they more closely resemble analytics workload benchmarks such as TPC-H~\cite{tpch}, The Star-Schema Benchmark~\cite{ssb}, and TPC-DS~\cite{tpcds}. This resemblance is more than passing; many of the more complex queries we encountered appeared to be preparing application run-time state for presentation to the user. For example the \textit{Google Play Games} service tracks so-called \textit{events} and \textit{quests}, and participating \textit{apps}. One of the most complex queries that we encountered appeared to be linking and summarizing these features together for presentation in a list view. Additionally, we note that the presence of analytics queries in pocket data management workloads is likely to increase further, as interest grows in smart phones as a platform for personal sensing~\cite{campbell2008peoplesensing,klasnja2009using,lam2009healthmonitoring}.
\subsubsection{TPC-E}
The TPC-E benchmark emulates a brokerage firm, and includes a mix of reporting and data mining queries alongside stream-monitoring queries. It models decision support systems that involve a high level of CPU and IO load, and that examine large volumes of rapidly changing data. SQLite does not presently target or support streaming or active database applications, although such functionality may become available as personal sensing becomes more prevalent.

View File

1519
splncs03.bst Normal file

File diff suppressed because it is too large Load Diff

4
sprmindx.sty Normal file
View File

@ -0,0 +1,4 @@
delim_0 "\\idxquad "
delim_1 "\\idxquad "
delim_2 "\\idxquad "
delim_n ",\\,"

View File

@ -0,0 +1,16 @@
\begin{tabular}{c|c|c}
\textbf{Expression Type} & \textbf{Expression Form} & \textbf{Count}\\\hline
Exact Lookups & \texttt{Const $=$ Expr} & \ \ 926,042\ \ \\
Other Inequality & \texttt{Expr $\theta$ Expr} & \ \ 527,517\ \ \\
Membership Test & \texttt{Expr [NOT] IN (List or Query)} & \ \ 190,695\ \ \\
Disjunction & \texttt{[NOT] Expr $\vee$ Expr} & \ \ 48,534\ \ \\
Inequality on 1 constant & \texttt{Const $\theta$ Expr} & \ \ 31,128\ \ \\
Other Equality & \texttt{Expr $=$ Expr} & \ \ 10,037\ \ \\
Subquery Membership & \texttt{[NOT] EXISTS (Query)} & \ \ 9,079\ \ \\
Boolean Column Cast & \texttt{[NOT] Column} & \ \ 6,490\ \ \\
Patterned String Lookup & \texttt{Expr [NOT] LIKE Pattern} & \ \ 6,109\ \ \\
Validity Test & \texttt{Expr IS [NOT] NULL} & \ \ 2,693\ \ \\
Functional If-Then-Else & \texttt{CASE WHEN \ldots} & \ \ 390\ \ \\
No-op Clause & \texttt{Const or (Const = Const)} & \ \ 249\ \ \\
Range Test & \texttt{Expr BETWEEN Const AND Const} & \ \ 18\ \ \\
\end{tabular}

View File

@ -0,0 +1,13 @@
\begin{tabular}{|r|c|c|c|c|c|c|}\hline
\ \ \textbf{Operation}\ \ & \texttt{SELECT} & \texttt{INSERT} & \texttt{UPSERT} & \texttt{UPDATE} & \texttt{DELETE} & \textbf{Total}\\\hline
\textbf{\ \ Count } & \ \ 33,470,310\ \ & \ \ 1,953,279\ \ & \ \ 7,376,648\ \ & \ \ 1,041,967\ \ & \ \ 1,248,594\ \ & \ \ 45,090,798\ \ \\\hline
\textbf{\ \ Runtime (ms) } & \ \ 1.13\ \ & \ \ 2.31\ \ & \ \ 0.93\ \ & \ \ 6.59\ \ & \ \ 3.78\ \ & \\\hline
\multicolumn{7}{|c|}{\textbf{Features Used}}\\\hline
\texttt{OUTER JOIN} & \ \ 391,052\ \ & & & & \ \ 236\ \ & \ \ 391,288\ \ \\\hline
\texttt{DISTINCT} & \ \ 1,888,013\ \ & & & \ \ 25\ \ & \ \ 5,586\ \ & \ \ 1,893,624\ \ \\\hline
\texttt{LIMIT} & \ \ 1,165,096\ \ & & & & \ \ 422\ \ & \ \ 1,165,518\ \ \\\hline
\texttt{ORDER BY} & \ \ 3,168,915\ \ & & & & \ \ 194\ \ & \ \ 3,169,109\ \ \\\hline
\texttt{Aggregate} & \ \ 638,137\ \ & & & \ \ 25\ \ & \ \ 3,190\ \ & \ \ 641,352\ \ \\\hline
\texttt{GROUP BY} & \ \ 438,919\ \ & & & \ \ 25\ \ & & \ \ 438,944\ \ \\\hline
\texttt{UNION} & \ \ 13,801\ \ & & & & \ \ 65\ \ & \ \ 13,866\ \ \\\hline
\end{tabular}

View File

@ -0,0 +1,18 @@
\begin{tabular}{c|c|c}
\textbf{Expression Type} & \textbf{Expression Form} & \textbf{Count}\\\hline
Exact Lookups & \texttt{Const $=$ Expr} & \ \ 30,974,814\ \ \\
Other Equality & \texttt{Expr $=$ Expr} & \ \ 1,621,556\ \ \\
Membership Test & \texttt{Expr [NOT] IN (List or Query)} & \ \ 1,041,611\ \ \\
Inequality on 1 constant & \texttt{Const $\theta$ Expr} & \ \ 677,259\ \ \\
Disjunction & \texttt{[NOT] Expr $\vee$ Expr} & \ \ 631,404\ \ \\
Bitwise AND & \texttt{Expr \& Expr} & \ \ 480,921\ \ \\
Other Inequality & \texttt{Expr $\theta$ Expr} & \ \ 442,164\ \ \\
Boolean Column Cast & \texttt{[NOT] Column} & \ \ 302,014\ \ \\
No-op Clause & \texttt{Const or (Const = Const)} & \ \ 229,247\ \ \\
Patterned String Lookup & \texttt{Expr [NOT] LIKE Pattern} & \ \ 156,309\ \ \\
Validity Test & \texttt{Expr IS [NOT] NULL} & \ \ 87,873\ \ \\
Functional If-Then-Else & \texttt{CASE WHEN \ldots} & \ \ 2,428\ \ \\
Range Test & \texttt{Expr BETWEEN Const AND Const} & \ \ 2,393\ \ \\
Function Call & \texttt{Function(Expr)} & \ \ 1,965\ \ \\
Subquery Membership & \texttt{[NOT] EXISTS (Query)} & \ \ 1,584\ \ \\
\end{tabular}

View File

@ -0,0 +1,28 @@
\begin{tabular}{ccc}
\begin{tabular}{c|c}
\textbf{Function} & \textbf{Call Sites}\\\hline
\texttt{GROUP\_CONCAT} & \ \ 583,474\ \ \\
\texttt{SUM} & \ \ 321,387\ \ \\
\texttt{MAX} & \ \ 314,970\ \ \\
\texttt{COUNT} & \ \ 173,031\ \ \\
\texttt{LENGTH} & \ \ 102,747\ \ \\
\texttt{SUBSTR} & \ \ 88,462\ \ \\
\texttt{CAST} & \ \ 38,208\ \ \\
\texttt{UPPER} & \ \ 20,487\ \ \\
\texttt{MIN} & \ \ 19,566\ \ \\
\end{tabular}
&\ \ \ &
\begin{tabular}{c|c}
\textbf{Function} & \textbf{Call Sites}\\\hline
\texttt{COALESCE} & \ \ 3,494\ \ \\
\texttt{LOWER} & \ \ 3,110\ \ \\
\texttt{PHONE\_NUMBERS\_EQUAL} & \ \ 2,017\ \ \\
\texttt{STRFTIME} & \ \ 1,147\ \ \\
\texttt{IFNULL} & \ \ 657\ \ \\
\texttt{JULIANDAY} & \ \ 587\ \ \\
\texttt{DATE} & \ \ 44\ \ \\
\texttt{AVG} & \ \ 15\ \ \\
\ & \
\end{tabular}
\end{tabular}

View File

@ -0,0 +1,15 @@
\begin{tabular}{c|c|c}
\textbf{Expression Type} & \textbf{Expression Form} & \textbf{Count}\\\hline
Exact Lookups & \texttt{Const $=$ Expr} & \ \ 26,303,579\ \ \\
Membership Test & \texttt{Expr [NOT] IN (List)} & \ \ 331,788\ \ \\
Inequality on 1 constant & \texttt{Const $\theta$ Expr} & \ \ 93,816\ \ \\
Patterned String Lookup & \texttt{Expr [NOT] LIKE Pattern} & \ \ 72,289\ \ \\
Disjunction & \texttt{[NOT] Expr $\vee$ Expr} & \ \ 61,541\ \ \\
Other Inequality & \texttt{Expr $\theta$ Expr} & \ \ 38,714\ \ \\
Validity Test & \texttt{Expr IS [NOT] NULL} & \ \ 17,305\ \ \\
No-op Clause & \texttt{Const or (Const = Const)} & \ \ 6,710\ \ \\
Boolean Column Cast & \texttt{[NOT] Column} & \ \ 5,358\ \ \\
Other Equality & \texttt{Expr $=$ Expr} & \ \ 1,471\ \ \\
Function Call & \texttt{Function(Expr)} & \ \ 43\ \ \\
Range Test & \texttt{Expr BETWEEN Const AND Const} & \ \ 18\ \ \\
\end{tabular}

View File

@ -0,0 +1,17 @@
\begin{tabular}{|c|c|c|c|c|c|c|}\hline
& \multicolumn{5}{|c|}{\textbf{Join Width}} & \\\hline
\textbf{Where Clauses} & 1 & 2 & 3 & 4 & 6 & \textbf{Total}\\\hline
0 & \ \ 1,085,154\ \ & & & & & \textbf{\ \ 1,085,154\ \ }\\\hline
1 & \ \ 26,932,632\ \ & \ \ 9,105\ \ & & & & \textbf{\ \ 26,941,737\ \ }\\\hline
2 & \ \ 1,806,843\ \ & \ \ 279,811\ \ & \ \ 5,970\ \ & & & \textbf{\ \ 2,092,624\ \ }\\\hline
3 & \ \ 384,406\ \ & \ \ 80,183\ \ & \ \ 29,101\ \ & \ \ 1\ \ & & \textbf{\ \ 493,691\ \ }\\\hline
4 & \ \ 115,107\ \ & \ \ 70,891\ \ & \ \ 10,696\ \ & \ \ 939\ \ & & \textbf{\ \ 197,633\ \ }\\\hline
5 & \ \ 28,347\ \ & \ \ 15,061\ \ & \ \ 1,162\ \ & \ \ 17\ \ & \ \ 11\ \ & \textbf{\ \ 44,598\ \ }\\\hline
6 & \ \ 212\ \ & \ \ 524\ \ & \ \ 591\ \ & \ \ 471\ \ & \ \ 3\ \ & \textbf{\ \ 1,801\ \ }\\\hline
7 & \ \ 349\ \ & \ \ 22,574\ \ & \ \ 333\ \ & \ \ 1,048\ \ & \ \ 8\ \ & \textbf{\ \ 24,312\ \ }\\\hline
8 & \ \ 35\ \ & \ \ 18\ \ & & & \ \ 6\ \ & \textbf{\ \ 59\ \ }\\\hline
9 & & \ \ 541\ \ & \ \ 2,564\ \ & \ \ 4\ \ & & \textbf{\ \ 3,109\ \ }\\\hline
10 & \ \ 159\ \ & & & & & \textbf{\ \ 159\ \ }\\\hline
11 & \ \ 545\ \ & & & & & \textbf{\ \ 545\ \ }\\\hline
\textbf{Total} & \textbf{\ \ 30,353,789\ \ } & \textbf{\ \ 478,708\ \ } & \textbf{\ \ 50,417\ \ } & \textbf{\ \ 2,480\ \ } & \textbf{\ \ 28\ \ } & \textbf{\ \ 30,885,422\ \ }\\\hline
\end{tabular}

View File

@ -0,0 +1,12 @@
\begin{tabular}{c|c|c}
\textbf{Expression Type} & \textbf{Expression Form} & \textbf{Count}\\\hline
Exact Lookups & \texttt{Const $=$ Expr} & \ \ 1,013,697\ \ \\
Disjunction & \texttt{[NOT] Expr $\vee$ Expr} & \ \ 84,937\ \ \\
Inequality on 1 constant & \texttt{Const $\theta$ Expr} & \ \ 18,146\ \ \\
Membership Test & \texttt{Expr [NOT] IN (List or Query)} & \ \ 14,146\ \ \\
Other Inequality & \texttt{Expr $\theta$ Expr} & \ \ 9,443\ \ \\
Boolean Column Cast & \texttt{[NOT] Column} & \ \ 1,640\ \ \\
Validity Test & \texttt{Expr IS [NOT] NULL} & \ \ 1,517\ \ \\
Other Equality & \texttt{Expr $=$ Expr} & \ \ 221\ \ \\
Patterned String Lookup & \texttt{Expr [NOT] LIKE Pattern} & \ \ 59\ \ \\
\end{tabular}