Switched to \document[sigconf]{acmart} for PODs submission.

master
Aaron Huber 2022-03-14 12:29:22 -04:00
parent 580e6876fb
commit af69943c00
169 changed files with 44315 additions and 377 deletions

15
.gitignore vendored
View File

@ -1,15 +0,0 @@
*.aux
*.log
*.bbl
*.pdf
*.out
*.bak
*.fdb_latexmk
*.fls
*.synctex.gz
*.swp
*.blg
*.xoj
*.auxlock
*.vtc
auto

31
.gitignore.gitignore Normal file
View File

@ -0,0 +1,31 @@
acmart.cls
acmart.pdf
acmguide.pdf
samples/sample-*.pdf
*.log
*.aux
*.cfg
*.glo
*.idx
*.toc
*.ilg
*.ind
*.out
*.lof
*.lot
*.bbl
*.blg
*.gls
*.cut
*.hd
*.dvi
*.ps
*.thm
*.tgz
*.zip
*.rpi
*~
*.bcf
*.run.xml
samples/ACM-Reference-Format.bst
samples/*.tex

View File

@ -263,7 +263,18 @@
\usebibmacro{issue}%
\newunit}
\renewbibmacro*{doi+eprint+url}{%
\iftoggle{bbx:url}
{\iffieldundef{doi}{\usebibmacro{url+urldate}}{}}
{}%
\newunit\newblock
\iftoggle{bbx:eprint}
{\usebibmacro{eprint}}
{}%
\newunit\newblock
\iftoggle{bbx:doi}
{\printfield{doi}}
{}}
%%% Definitions for drivers (alphabetical)

View File

@ -64,6 +64,7 @@ ENTRY
isbn-13 % UTAH
issn % UTAH
lccn % UTAH
distinctURL % whether to print url if doi is present
}
{}
{ label.year extra.label sort.year sort.label basic.label.year}
@ -150,6 +151,16 @@ FUNCTION { empty.or.unknown }
if$
}
FUNCTION { empty.or.zero }
{
%% Examine the top entry and push 1 if it is empty, or is zero
duplicate$ empty$
{ pop$ #1 }
{ "0" = }
if$
}
FUNCTION { writeln }
{
%% In BibTeX style files, the sequences
@ -737,10 +748,14 @@ FUNCTION { output.eprint } %
% Changes by BV 2011/04/15. Do not output
% url if doi is defined
%
%
% Changes by BV 2021/11/26. Output url even if doi is defined
% if distinctURL is not zero.
%
FUNCTION { output.url } % UTAH
{ % return with stack untouched
% output URL and associated lastaccessed fields
doi empty.or.unknown
doi empty.or.unknown distinctURL empty.or.zero not or
{
url empty.or.unknown
{ }
@ -2048,10 +2063,12 @@ FUNCTION { calc.label }
FUNCTION { output.bibitem }
{
newline$
"\bibitem[\protect\citeauthoryear{" write$
calc.label write$
"\bibitem[" write$
calc.basic.label write$
"(" write$
sort.year write$
"}]%" writeln
")" write$
"]%" writeln
" {" write$
cite$ write$
"}" writeln

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,104 @@
\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand\babel@aux[2]{}
\@nameuse{bbl@beforestart}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldcontentsline\contentsline
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\contentsline\oldcontentsline
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\bibstyle{plainurl}
\gdef\@authornum{1}
\gdef\@authornum{2}
\babel@aux{UKenglish}{}
\gdef\@pageNumberEndAbstract{1}
\@writefile{toc}{\contentsline {section}{\numberline {1}Typesetting instructions -- Summary}{1}{section.1}\protected@file@percent }
\newlabel{sec:typesetting-summary}{{1}{1}{Typesetting instructions -- Summary}{section.1}{}}
\newlabel{sec:typesetting-summary@cref}{{[section][1][]1}{[1][1][]1}}
\citation{DBLP:journals/cacm/Knuth74}
\@writefile{toc}{\contentsline {section}{\numberline {2}Lorem ipsum dolor sit amet}{2}{section.2}\protected@file@percent }
\citation{DBLP:books/mk/GrayR93}
\citation{DBLP:journals/cacm/Dijkstra68a}
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{list:8-6}{{1}{3}{Useless code}{lstlisting.1}{}}
\newlabel{list:8-6@cref}{{[listing][1][]1}{[1][3][]3}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {1}Useless code.}{3}{lstlisting.1}\protected@file@percent }
\@writefile{loe}{\contentsline {lemma}{\ifthmt@listswap Lemma~1\else \numberline {1}Lemma\fi \thmtformatoptarg {Lorem ipsum}}{3}{lemma.1}\protected@file@percent }
\newlabel{lemma:lorem}{{1}{3}{Lorem ipsum}{lemma.1}{}}
\newlabel{lemma:lorem@cref}{{[lemma][1][]1}{[1][3][]3}}
\@writefile{loe}{\contentsline {claim}{\ifthmt@listswap Claim~2\else \numberline {2}Claim\fi }{3}{claim.2}\protected@file@percent }
\@writefile{loe}{\contentsline {corollary}{\ifthmt@listswap Corollary~3\else \numberline {3}Corollary\fi \thmtformatoptarg {Curabitur pulvinar, \cite {DBLP:books/mk/GrayR93}}}{3}{corollary.3}\protected@file@percent }
\newlabel{lemma:curabitur}{{3}{3}{Curabitur pulvinar, \cite {DBLP:books/mk/GrayR93}}{corollary.3}{}}
\newlabel{lemma:curabitur@cref}{{[corollary][3][]3}{[1][3][]3}}
\@writefile{loe}{\contentsline {proposition}{\ifthmt@listswap Proposition~4\else \numberline {4}Proposition\fi }{3}{proposition.4}\protected@file@percent }
\newlabel{prop1}{{4}{3}{}{proposition.4}{}}
\newlabel{prop1@cref}{{[proposition][4][]4}{[1][3][]3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Curabitur dictum felis id sapien}{3}{subsection.2.1}\protected@file@percent }
\citation{DBLP:conf/focs/HopcroftPV75}
\bibdata{lipics-v2021-sample-article}
\gdef\@pageNumberStartAppendix{4}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Proin ac fermentum augue}{4}{subsection.2.2}\protected@file@percent }
\@writefile{loe}{\contentsline {remark}{\ifthmt@listswap Remark~5\else \numberline {5}Remark\fi }{4}{remark.5}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {3}Pellentesque quis tortor}{4}{section.3}\protected@file@percent }
\@writefile{loe}{\contentsline {lemma}{\ifthmt@listswap Lemma~6\else \numberline {6}Lemma\fi \thmtformatoptarg {Quisque blandit tempus nunc}}{4}{lemma.6}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {4}Morbi eros magna}{4}{section.4}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {A}Styles of lists, enumerations, and descriptions}{4}{appendix.A}\protected@file@percent }
\newlabel{sec:itemStyles}{{A}{4}{Styles of lists, enumerations, and descriptions}{appendix.A}{}}
\newlabel{sec:itemStyles@cref}{{[appendix][1][2147483647]A}{[1][4][]4}}
\@writefile{toc}{\contentsline {section}{\numberline {B}Theorem-like environments}{5}{appendix.B}\protected@file@percent }
\newlabel{sec:theorem-environments}{{B}{5}{Theorem-like environments}{appendix.B}{}}
\newlabel{sec:theorem-environments@cref}{{[appendix][2][2147483647]B}{[1][5][]5}}
\@writefile{loe}{\contentsline {theorem}{\ifthmt@listswap Theorem~7\else \numberline {7}Theorem\fi }{5}{theorem.7}\protected@file@percent }
\newlabel{testenv-theorem}{{7}{5}{}{theorem.7}{}}
\newlabel{testenv-theorem@cref}{{[theorem][7][2147483647]7}{[1][5][]5}}
\@writefile{loe}{\contentsline {lemma}{\ifthmt@listswap Lemma~8\else \numberline {8}Lemma\fi }{5}{lemma.8}\protected@file@percent }
\newlabel{testenv-lemma}{{8}{5}{}{lemma.8}{}}
\newlabel{testenv-lemma@cref}{{[lemma][8][2147483647]8}{[1][5][]5}}
\@writefile{loe}{\contentsline {corollary}{\ifthmt@listswap Corollary~9\else \numberline {9}Corollary\fi }{5}{corollary.9}\protected@file@percent }
\newlabel{testenv-corollary}{{9}{5}{}{corollary.9}{}}
\newlabel{testenv-corollary@cref}{{[corollary][9][2147483647]9}{[1][5][]5}}
\@writefile{loe}{\contentsline {proposition}{\ifthmt@listswap Proposition~10\else \numberline {10}Proposition\fi }{5}{proposition.10}\protected@file@percent }
\newlabel{testenv-proposition}{{10}{5}{}{proposition.10}{}}
\newlabel{testenv-proposition@cref}{{[proposition][10][2147483647]10}{[1][5][]5}}
\@writefile{loe}{\contentsline {conjecture}{\ifthmt@listswap Conjecture~11\else \numberline {11}Conjecture\fi }{5}{conjecture.11}\protected@file@percent }
\newlabel{testenv-conjecture}{{11}{5}{}{conjecture.11}{}}
\newlabel{testenv-conjecture@cref}{{[conjecture][11][2147483647]11}{[1][5][]5}}
\@writefile{loe}{\contentsline {observation}{\ifthmt@listswap Observation~12\else \numberline {12}Observation\fi }{6}{observation.12}\protected@file@percent }
\newlabel{testenv-observation}{{12}{6}{}{observation.12}{}}
\newlabel{testenv-observation@cref}{{[observation][12][2147483647]12}{[1][5][]6}}
\@writefile{loe}{\contentsline {exercise}{\ifthmt@listswap Exercise~13\else \numberline {13}Exercise\fi }{6}{exercise.13}\protected@file@percent }
\newlabel{testenv-exercise}{{13}{6}{}{exercise.13}{}}
\newlabel{testenv-exercise@cref}{{[exercise][13][2147483647]13}{[1][5][]6}}
\@writefile{loe}{\contentsline {definition}{\ifthmt@listswap Definition~14\else \numberline {14}Definition\fi }{6}{definition.14}\protected@file@percent }
\newlabel{testenv-definition}{{14}{6}{}{definition.14}{}}
\newlabel{testenv-definition@cref}{{[definition][14][2147483647]14}{[1][6][]6}}
\@writefile{loe}{\contentsline {example}{\ifthmt@listswap Example~15\else \numberline {15}Example\fi }{6}{example.15}\protected@file@percent }
\newlabel{testenv-example}{{15}{6}{}{example.15}{}}
\newlabel{testenv-example@cref}{{[example][15][2147483647]15}{[1][6][]6}}
\@writefile{loe}{\contentsline {note}{\ifthmt@listswap Note~16\else \numberline {16}Note\fi }{6}{note.16}\protected@file@percent }
\newlabel{testenv-note}{{16}{6}{}{note.16}{}}
\newlabel{testenv-note@cref}{{[note][16][2147483647]16}{[1][6][]6}}
\@writefile{loe}{\contentsline {note*}{\ifthmt@listswap \else \numberline {\let \autodot \@empty }\fi Note}{6}{thmt@dummyctr.dummy.17}\protected@file@percent }
\@writefile{loe}{\contentsline {remark}{\ifthmt@listswap Remark~17\else \numberline {17}Remark\fi }{6}{remark.17}\protected@file@percent }
\newlabel{testenv-remark}{{17}{6}{}{remark.17}{}}
\newlabel{testenv-remark@cref}{{[remark][17][2147483647]17}{[1][6][]6}}
\@writefile{loe}{\contentsline {remark*}{\ifthmt@listswap \else \numberline {\let \autodot \@empty }\fi Remark}{6}{thmt@dummyctr.dummy.19}\protected@file@percent }
\@writefile{loe}{\contentsline {claim}{\ifthmt@listswap Claim~18\else \numberline {18}Claim\fi }{6}{claim.18}\protected@file@percent }
\newlabel{testenv-claim}{{18}{6}{}{claim.18}{}}
\newlabel{testenv-claim@cref}{{[claim][18][2147483647]18}{[1][6][]6}}
\@writefile{loe}{\contentsline {claim*}{\ifthmt@listswap \else \numberline {\let \autodot \@empty }\fi Claim}{6}{thmt@dummyctr.dummy.21}\protected@file@percent }
\newlabel{testenv-claim2}{{}{6}{}{thmt@dummyctr.dummy.21}{}}
\newlabel{testenv-claim2@cref}{{[thmt@dummyctr][21][2147483647]}{[1][6][]6}}
\newlabel{TotPages}{{6}{6}{}{page.6}{}}
\gdef \@abspage@last{6}

View File

@ -0,0 +1,951 @@
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020/W32TeX) (preloaded format=pdflatex 2021.3.6) 6 MAR 2021 20:19
entering extended mode
restricted \write18 enabled.
%&-line parsing enabled.
**lipics-v2021-sample-article.tex
(./lipics-v2021-sample-article.tex
LaTeX2e <2020-10-01> patch level 4
L3 programming layer <2021-02-18> (./lipics-v2021.cls
Document Class: lipics-v2021 2021/02/25 v3.1.1 LIPIcs articles
(c:/texlive/2020/texmf-dist/tex/latex/base/article.cls
Document Class: article 2020/04/10 v1.4m Standard LaTeX document class
(c:/texlive/2020/texmf-dist/tex/latex/base/fleqn.clo
File: fleqn.clo 2016/12/29 v1.2b Standard LaTeX option (flush left equations)
\mathindent=\skip47
Applying: [2015/01/01] Make \[ robust on input line 50.
LaTeX Info: Redefining \[ on input line 51.
Already applied: [0000/00/00] Make \[ robust on input line 62.
Applying: [2015/01/01] Make \] robust on input line 74.
LaTeX Info: Redefining \] on input line 75.
Already applied: [0000/00/00] Make \] robust on input line 83.
)
(c:/texlive/2020/texmf-dist/tex/latex/base/size10.clo
File: size10.clo 2020/04/10 v1.4m Standard LaTeX file (size option)
)
\c@part=\count179
\c@section=\count180
\c@subsection=\count181
\c@subsubsection=\count182
\c@paragraph=\count183
\c@subparagraph=\count184
\c@figure=\count185
\c@table=\count186
\abovecaptionskip=\skip48
\belowcaptionskip=\skip49
\bibindent=\dimen138
)
\tocfile=\write3
(c:/texlive/2020/texmf-dist/tex/latex/microtype/microtype.sty
Package: microtype 2021/02/25 v2.8b Micro-typographical refinements (RS)
(c:/texlive/2020/texmf-dist/tex/latex/graphics/keyval.sty
Package: keyval 2014/10/28 v1.15 key=value parser (DPC)
\KV@toks@=\toks15
)
\MT@toks=\toks16
\MT@count=\count187
LaTeX Info: Redefining \textls on input line 788.
\MT@outer@kern=\dimen139
LaTeX Info: Redefining \textmicrotypecontext on input line 1358.
\MT@listname@count=\count188
(c:/texlive/2020/texmf-dist/tex/latex/microtype/microtype-pdftex.def
File: microtype-pdftex.def 2021/02/25 v2.8b Definitions specific to pdftex (RS)
LaTeX Info: Redefining \lsstyle on input line 915.
LaTeX Info: Redefining \lslig on input line 915.
\MT@outer@space=\skip50
)
Package microtype Info: Loading configuration file microtype.cfg.
(c:/texlive/2020/texmf-dist/tex/latex/microtype/microtype.cfg
File: microtype.cfg 2021/02/25 v2.8b microtype main configuration file (RS)
))
(c:/texlive/2020/texmf-dist/tex/latex/base/inputenc.sty
Package: inputenc 2020/08/01 v1.3d Input encoding file
\inpenc@prehook=\toks17
\inpenc@posthook=\toks18
)
(c:/texlive/2020/texmf-dist/tex/generic/pdftex/glyphtounicode.tex)
(c:/texlive/2020/texmf-dist/tex/latex/lm/lmodern.sty
Package: lmodern 2009/10/30 v1.6 Latin Modern Fonts
LaTeX Font Info: Overwriting symbol font `operators' in version `normal'
(Font) OT1/cmr/m/n --> OT1/lmr/m/n on input line 22.
LaTeX Font Info: Overwriting symbol font `letters' in version `normal'
(Font) OML/cmm/m/it --> OML/lmm/m/it on input line 23.
LaTeX Font Info: Overwriting symbol font `symbols' in version `normal'
(Font) OMS/cmsy/m/n --> OMS/lmsy/m/n on input line 24.
LaTeX Font Info: Overwriting symbol font `largesymbols' in version `normal'
(Font) OMX/cmex/m/n --> OMX/lmex/m/n on input line 25.
LaTeX Font Info: Overwriting symbol font `operators' in version `bold'
(Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 26.
LaTeX Font Info: Overwriting symbol font `letters' in version `bold'
(Font) OML/cmm/b/it --> OML/lmm/b/it on input line 27.
LaTeX Font Info: Overwriting symbol font `symbols' in version `bold'
(Font) OMS/cmsy/b/n --> OMS/lmsy/b/n on input line 28.
LaTeX Font Info: Overwriting symbol font `largesymbols' in version `bold'
(Font) OMX/cmex/m/n --> OMX/lmex/m/n on input line 29.
LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal'
(Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 31.
LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `normal'
(Font) OT1/cmss/m/n --> OT1/lmss/m/n on input line 32.
LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal'
(Font) OT1/cmr/m/it --> OT1/lmr/m/it on input line 33.
LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `normal'
(Font) OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 34.
LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `bold'
(Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 35.
LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `bold'
(Font) OT1/cmss/bx/n --> OT1/lmss/bx/n on input line 36.
LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold'
(Font) OT1/cmr/bx/it --> OT1/lmr/bx/it on input line 37.
LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `bold'
(Font) OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 38.
)
(c:/texlive/2020/texmf-dist/tex/latex/fontawesome5/fontawesome5.sty
(c:/texlive/2020/texmf-dist/tex/latex/l3kernel/expl3.sty
Package: expl3 2021-02-18 L3 programming layer (loader)
(c:/texlive/2020/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def
File: l3backend-pdftex.def 2021-03-02 L3 backend support: PDF output (pdfTeX)
\l__color_backend_stack_int=\count189
\l__pdf_internal_box=\box47
))
Package: fontawesome5 2020/03/24 v5.13.0 Font Awesome 5
(c:/texlive/2020/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty
Package: l3keys2e 2021-02-02 LaTeX2e option processing using LaTeX3 keys
)
(c:/texlive/2020/texmf-dist/tex/latex/l3packages/xparse/xparse.sty
(c:/texlive/2020/texmf-dist/tex/latex/l3packages/xparse/xparse-2020-10-01.sty
(c:/texlive/2020/texmf-dist/tex/latex/l3packages/xparse/xparse-generic.tex)))
(c:/texlive/2020/texmf-dist/tex/latex/fontawesome5/fontawesome5-generic-helper.
sty
Package: fontawesome5-generic-helper 2020/03/24 v5.13.0 non-uTeX helper for fon
tawesome5
(c:/texlive/2020/texmf-dist/tex/latex/fontawesome5/fontawesome5-mapping.def)))
(c:/texlive/2020/texmf-dist/tex/latex/base/fontenc.sty
Package: fontenc 2020/08/10 v2.0s Standard LaTeX package
LaTeX Font Info: Trying to load font information for T1+lmr on input line 11
2.
(c:/texlive/2020/texmf-dist/tex/latex/lm/t1lmr.fd
File: t1lmr.fd 2009/10/30 v1.6 Font defs for Latin Modern
))
(c:/texlive/2020/texmf-dist/tex/latex/base/textcomp.sty
Package: textcomp 2020/02/02 v2.0n Standard LaTeX package
)
(c:/texlive/2020/texmf-dist/tex/latex/amsfonts/eucal.sty
Package: eucal 2009/06/22 v3.00 Euler Script fonts
LaTeX Font Info: Overwriting math alphabet `\EuScript' in version `bold'
(Font) U/eus/m/n --> U/eus/b/n on input line 33.
)
(c:/texlive/2020/texmf-dist/tex/latex/amsfonts/amssymb.sty
Package: amssymb 2013/01/14 v3.01 AMS font symbols
(c:/texlive/2020/texmf-dist/tex/latex/amsfonts/amsfonts.sty
Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
\@emptytoks=\toks19
\symAMSa=\mathgroup4
\symAMSb=\mathgroup5
LaTeX Font Info: Redeclaring math symbol \hbar on input line 98.
LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold'
(Font) U/euf/m/n --> U/euf/b/n on input line 106.
))
(c:/texlive/2020/texmf-dist/tex/generic/soul/soul.sty
Package: soul 2003/11/17 v2.4 letterspacing/underlining (mf)
\SOUL@word=\toks20
\SOUL@lasttoken=\toks21
\SOUL@cmds=\toks22
\SOUL@buffer=\toks23
\SOUL@token=\toks24
\SOUL@spaceskip=\skip51
\SOUL@ttwidth=\dimen140
\SOUL@uldp=\dimen141
\SOUL@ulht=\dimen142
)
(c:/texlive/2020/texmf-dist/tex/latex/graphics/color.sty
Package: color 2020/02/24 v1.2b Standard LaTeX Color (DPC)
(c:/texlive/2020/texmf-dist/tex/latex/graphics-cfg/color.cfg
File: color.cfg 2016/01/02 v1.6 sample color configuration
)
Package color Info: Driver file: pdftex.def on input line 147.
(c:/texlive/2020/texmf-dist/tex/latex/graphics-def/pdftex.def
File: pdftex.def 2020/10/05 v1.2a Graphics/color driver for pdftex
))
(c:/texlive/2020/texmf-dist/tex/generic/babel/babel.sty
Package: babel 2021/03/03 3.55 The Babel package
(c:/texlive/2020/texmf-dist/tex/generic/babel/babel.def
File: babel.def 2021/03/03 3.55 Babel common definitions
\babel@savecnt=\count190
\U@D=\dimen143
\l@babelnohyphens=\language86
(c:/texlive/2020/texmf-dist/tex/generic/babel/txtbabel.def)
\bbl@readstream=\read2
)
\bbl@dirlevel=\count191
(c:/texlive/2020/texmf-dist/tex/generic/babel-english/UKenglish.ldf
Language: UKenglish 2017/06/06 v3.3r English support from the babel system
(c:/texlive/2020/texmf-dist/tex/generic/babel-english/english.ldf
Language: english 2017/06/06 v3.3r English support from the babel system
Package babel Info: \l@canadian = using hyphenrules for english
(babel) (\language0) on input line 102.
Package babel Info: \l@australian = using hyphenrules for ukenglish
(babel) (\language21) on input line 105.
Package babel Info: \l@newzealand = using hyphenrules for ukenglish
(babel) (\language21) on input line 108.
)))
(c:/texlive/2020/texmf-dist/tex/latex/amsmath/amsmath.sty
Package: amsmath 2020/09/23 v2.17i AMS math features
\@mathmargin=\skip52
For additional information on amsmath, use the `?' option.
(c:/texlive/2020/texmf-dist/tex/latex/amsmath/amstext.sty
Package: amstext 2000/06/29 v2.01 AMS text
(c:/texlive/2020/texmf-dist/tex/latex/amsmath/amsgen.sty
File: amsgen.sty 1999/11/30 v2.0 generic functions
\@emptytoks=\toks25
\ex@=\dimen144
))
(c:/texlive/2020/texmf-dist/tex/latex/amsmath/amsbsy.sty
Package: amsbsy 1999/11/29 v1.2d Bold Symbols
\pmbraise@=\dimen145
)
(c:/texlive/2020/texmf-dist/tex/latex/amsmath/amsopn.sty
Package: amsopn 2016/03/08 v2.02 operator names
)
\inf@bad=\count192
LaTeX Info: Redefining \frac on input line 234.
\uproot@=\count193
\leftroot@=\count194
LaTeX Info: Redefining \overline on input line 399.
\classnum@=\count195
\DOTSCASE@=\count196
LaTeX Info: Redefining \ldots on input line 496.
LaTeX Info: Redefining \dots on input line 499.
LaTeX Info: Redefining \cdots on input line 620.
\Mathstrutbox@=\box48
\strutbox@=\box49
\big@size=\dimen146
LaTeX Font Info: Redeclaring font encoding OML on input line 743.
LaTeX Font Info: Redeclaring font encoding OMS on input line 744.
\macc@depth=\count197
\c@MaxMatrixCols=\count198
\dotsspace@=\muskip16
\c@parentequation=\count199
\dspbrk@lvl=\count266
\tag@help=\toks26
\row@=\count267
\column@=\count268
\maxfields@=\count269
\andhelp@=\toks27
\eqnshift@=\dimen147
\alignsep@=\dimen148
\tagshift@=\dimen149
\tagwidth@=\dimen150
\totwidth@=\dimen151
\lineht@=\dimen152
\@envbody=\toks28
\multlinegap=\skip53
\multlinetaggap=\skip54
\mathdisplay@stack=\toks29
LaTeX Info: Redefining \[ on input line 2923.
LaTeX Info: Redefining \] on input line 2924.
)
(c:/texlive/2020/texmf-dist/tex/latex/tools/enumerate.sty
Package: enumerate 2015/07/23 v3.00 enumerate extensions (DPC)
\@enLab=\toks30
)
(c:/texlive/2020/texmf-dist/tex/latex/graphics/graphicx.sty
Package: graphicx 2020/09/09 v1.2b Enhanced LaTeX Graphics (DPC,SPQR)
(c:/texlive/2020/texmf-dist/tex/latex/graphics/graphics.sty
Package: graphics 2020/08/30 v1.4c Standard LaTeX Graphics (DPC,SPQR)
(c:/texlive/2020/texmf-dist/tex/latex/graphics/trig.sty
Package: trig 2016/01/03 v1.10 sin cos tan (DPC)
)
(c:/texlive/2020/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration
)
Package graphics Info: Driver file: pdftex.def on input line 105.
)
\Gin@req@height=\dimen153
\Gin@req@width=\dimen154
)
(c:/texlive/2020/texmf-dist/tex/latex/tools/array.sty
Package: array 2020/10/01 v2.5c Tabular extension package (FMi)
\col@sep=\dimen155
\ar@mcellbox=\box50
\extrarowheight=\dimen156
\NC@list=\toks31
\extratabsurround=\skip55
\backup@length=\skip56
\ar@cellbox=\box51
)
(c:/texlive/2020/texmf-dist/tex/latex/multirow/multirow.sty
Package: multirow 2021/01/29 v2.7 Span multiple rows of a table
\multirow@colwidth=\skip57
\multirow@cntb=\count270
\multirow@dima=\skip58
\bigstrutjot=\dimen157
)
(c:/texlive/2020/texmf-dist/tex/latex/tools/tabularx.sty
Package: tabularx 2020/01/15 v2.11c `tabularx' package (DPC)
\TX@col@width=\dimen158
\TX@old@table=\dimen159
\TX@old@col=\dimen160
\TX@target=\dimen161
\TX@delta=\dimen162
\TX@cols=\count271
\TX@ftn=\toks32
)
(c:/texlive/2020/texmf-dist/tex/latex/threeparttable/threeparttable.sty
Package: threeparttable 2003/06/13 v 3.0
\@tempboxb=\box52
)
(c:/texlive/2020/texmf-dist/tex/latex/listings/listings.sty
\lst@mode=\count272
\lst@gtempboxa=\box53
\lst@token=\toks33
\lst@length=\count273
\lst@currlwidth=\dimen163
\lst@column=\count274
\lst@pos=\count275
\lst@lostspace=\dimen164
\lst@width=\dimen165
\lst@newlines=\count276
\lst@lineno=\count277
\lst@maxwidth=\dimen166
(c:/texlive/2020/texmf-dist/tex/latex/listings/lstmisc.sty
File: lstmisc.sty 2020/03/24 1.8d (Carsten Heinz)
\c@lstnumber=\count278
\lst@skipnumbers=\count279
\lst@framebox=\box54
)
(c:/texlive/2020/texmf-dist/tex/latex/listings/listings.cfg
File: listings.cfg 2020/03/24 1.8d listings configuration
))
Package: listings 2020/03/24 1.8d (Carsten Heinz)
(c:/texlive/2020/texmf-dist/tex/latex/lineno/lineno.sty
Package: lineno 2005/11/02 line numbers on paragraphs v4.41
\linenopenalty=\count280
\output=\toks34
\linenoprevgraf=\count281
\linenumbersep=\dimen167
\linenumberwidth=\dimen168
\c@linenumber=\count282
\c@pagewiselinenumber=\count283
\c@LN@truepage=\count284
\c@internallinenumber=\count285
\c@internallinenumbers=\count286
\quotelinenumbersep=\dimen169
\bframerule=\dimen170
\bframesep=\dimen171
\bframebox=\box55
LaTeX Info: Redefining \\ on input line 3056.
)
(c:/texlive/2020/texmf-dist/tex/latex/totpages/totpages.sty
Package: totpages 2005/09/19 v2.00 Totpages Package (muewi)
(c:/texlive/2020/texmf-dist/tex/latex/base/everyshi-ltx.sty
Package: everyshi-ltx 2020/08/17 v1.0a Emulation of the original everyshi packa
ge
with kernel methods
))
(c:/texlive/2020/texmf-dist/tex/latex/hyperref/hyperref.sty
Package: hyperref 2021-02-27 v7.00k Hypertext links for LaTeX
(c:/texlive/2020/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty
Package: ltxcmds 2020-05-10 v1.25 LaTeX kernel commands for general use (HO)
)
(c:/texlive/2020/texmf-dist/tex/generic/iftex/iftex.sty
Package: iftex 2020/03/06 v1.0d TeX engine tests
)
(c:/texlive/2020/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty
Package: pdftexcmds 2020-06-27 v0.33 Utility functions of pdfTeX for LuaTeX (HO
)
(c:/texlive/2020/texmf-dist/tex/generic/infwarerr/infwarerr.sty
Package: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO)
)
Package pdftexcmds Info: \pdf@primitive is available.
Package pdftexcmds Info: \pdf@ifprimitive is available.
Package pdftexcmds Info: \pdfdraftmode found.
)
(c:/texlive/2020/texmf-dist/tex/generic/kvsetkeys/kvsetkeys.sty
Package: kvsetkeys 2019/12/15 v1.18 Key value parser (HO)
)
(c:/texlive/2020/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty
Package: kvdefinekeys 2019-12-19 v1.6 Define keys (HO)
)
(c:/texlive/2020/texmf-dist/tex/generic/pdfescape/pdfescape.sty
Package: pdfescape 2019/12/09 v1.15 Implements pdfTeX's escape features (HO)
)
(c:/texlive/2020/texmf-dist/tex/latex/hycolor/hycolor.sty
Package: hycolor 2020-01-27 v1.10 Color options for hyperref/bookmark (HO)
)
(c:/texlive/2020/texmf-dist/tex/latex/letltxmacro/letltxmacro.sty
Package: letltxmacro 2019/12/03 v1.6 Let assignment for LaTeX macros (HO)
)
(c:/texlive/2020/texmf-dist/tex/latex/auxhook/auxhook.sty
Package: auxhook 2019-12-17 v1.6 Hooks for auxiliary files (HO)
)
(c:/texlive/2020/texmf-dist/tex/latex/kvoptions/kvoptions.sty
Package: kvoptions 2020-10-07 v3.14 Key value format for package options (HO)
)
\@linkdim=\dimen172
\Hy@linkcounter=\count287
\Hy@pagecounter=\count288
(c:/texlive/2020/texmf-dist/tex/latex/hyperref/pd1enc.def
File: pd1enc.def 2021-02-27 v7.00k Hyperref: PDFDocEncoding definition (HO)
Now handling font encoding PD1 ...
... no UTF-8 mapping file for font encoding PD1
)
(c:/texlive/2020/texmf-dist/tex/latex/hyperref/hyperref-langpatches.def
File: hyperref-langpatches.def 2021-02-27 v7.00k Hyperref: patches for babel la
nguages
)
(c:/texlive/2020/texmf-dist/tex/generic/intcalc/intcalc.sty
Package: intcalc 2019/12/15 v1.3 Expandable calculations with integers (HO)
)
(c:/texlive/2020/texmf-dist/tex/generic/etexcmds/etexcmds.sty
Package: etexcmds 2019/12/15 v1.7 Avoid name clashes with e-TeX commands (HO)
)
\Hy@SavedSpaceFactor=\count289
(c:/texlive/2020/texmf-dist/tex/latex/hyperref/puenc.def
File: puenc.def 2021-02-27 v7.00k Hyperref: PDF Unicode definition (HO)
Now handling font encoding PU ...
... no UTF-8 mapping file for font encoding PU
)
Package hyperref Info: Option `unicode' set `true' on input line 4073.
Package hyperref Info: Hyper figures OFF on input line 4192.
Package hyperref Info: Link nesting OFF on input line 4197.
Package hyperref Info: Hyper index ON on input line 4200.
Package hyperref Info: Plain pages OFF on input line 4207.
Package hyperref Info: Backreferencing OFF on input line 4212.
Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
Package hyperref Info: Bookmarks ON on input line 4445.
\c@Hy@tempcnt=\count290
(c:/texlive/2020/texmf-dist/tex/latex/url/url.sty
\Urlmuskip=\muskip17
Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc.
)
LaTeX Info: Redefining \url on input line 4804.
\XeTeXLinkMargin=\dimen173
(c:/texlive/2020/texmf-dist/tex/generic/bitset/bitset.sty
Package: bitset 2019/12/09 v1.3 Handle bit-vector datatype (HO)
(c:/texlive/2020/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty
Package: bigintcalc 2019/12/15 v1.5 Expandable calculations on big integers (HO
)
))
\Fld@menulength=\count291
\Field@Width=\dimen174
\Fld@charsize=\dimen175
Package hyperref Info: Hyper figures OFF on input line 6075.
Package hyperref Info: Link nesting OFF on input line 6080.
Package hyperref Info: Hyper index ON on input line 6083.
Package hyperref Info: backreferencing OFF on input line 6090.
Package hyperref Info: Link coloring OFF on input line 6095.
Package hyperref Info: Link coloring with OCG OFF on input line 6100.
Package hyperref Info: PDF/A mode OFF on input line 6105.
LaTeX Info: Redefining \ref on input line 6145.
LaTeX Info: Redefining \pageref on input line 6149.
(c:/texlive/2020/texmf-dist/tex/latex/base/atbegshi-ltx.sty
Package: atbegshi-ltx 2020/08/17 v1.0a Emulation of the original atbegshi packa
ge
with kernel methods
)
\Hy@abspage=\count292
\c@Item=\count293
\c@Hfootnote=\count294
)
Package hyperref Info: Driver (autodetected): hpdftex.
(c:/texlive/2020/texmf-dist/tex/latex/hyperref/hpdftex.def
File: hpdftex.def 2021-02-27 v7.00k Hyperref driver for pdfTeX
(c:/texlive/2020/texmf-dist/tex/latex/base/atveryend-ltx.sty
Package: atveryend-ltx 2020/08/19 v1.0a Emulation of the original atvery packag
e
with kernel methods
)
\Fld@listcount=\count295
\c@bookmark@seq@number=\count296
(c:/texlive/2020/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty
Package: rerunfilecheck 2019/12/05 v1.9 Rerun checks for auxiliary files (HO)
(c:/texlive/2020/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty
Package: uniquecounter 2019/12/15 v1.4 Provide unlimited unique counter (HO)
)
Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2
86.
)
\Hy@SectionHShift=\skip59
)
Package hyperref Info: Option `breaklinks' set `true' on input line 656.
Package hyperref Info: Option `unicode' set `true' on input line 656.
Package hyperref Info: Option `bookmarksnumbered' set `true' on input line 656.
(c:/texlive/2020/texmf-dist/tex/latex/caption/caption.sty
Package: caption 2020/10/26 v3.5g Customizing captions (AR)
(c:/texlive/2020/texmf-dist/tex/latex/caption/caption3.sty
Package: caption3 2020/10/21 v2.2e caption3 kernel (AR)
\captionmargin=\dimen176
\captionmargin@=\dimen177
\captionwidth=\dimen178
\caption@tempdima=\dimen179
\caption@indent=\dimen180
\caption@parindent=\dimen181
\caption@hangindent=\dimen182
Package caption Info: Standard document class detected.
)
\c@caption@flags=\count297
\c@continuedfloat=\count298
Package caption Info: hyperref package is loaded.
Package caption Info: listings package is loaded.
Package caption Info: threeparttable package is loaded.
)
(c:/texlive/2020/texmf-dist/tex/latex/graphics/rotating.sty
Package: rotating 2016/08/11 v2.16d rotated objects in LaTeX
(c:/texlive/2020/texmf-dist/tex/latex/base/ifthen.sty
Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC)
)
\c@r@tfl@t=\count299
\rotFPtop=\skip60
\rotFPbot=\skip61
\rot@float@box=\box56
\rot@mess@toks=\toks35
)
(c:/texlive/2020/texmf-dist/tex/latex/caption/subcaption.sty
Package: subcaption 2020/10/07 v1.3j Sub-captions (AR)
\c@subfigure=\count300
\c@subtable=\count301
)
\c@author=\count302
\c@currentauthor=\count303
(c:/texlive/2020/texmf-dist/tex/generic/xstring/xstring.sty
(c:/texlive/2020/texmf-dist/tex/generic/xstring/xstring.tex
\integerpart=\count304
\decimalpart=\count305
)
Package: xstring 2019/02/06 v1.83 String manipulations (CT)
)
(c:/texlive/2020/texmf-dist/tex/latex/comment/comment.sty
\CommentStream=\write4
Excluding comment 'comment') Excluding comment 'CCSXML'
(c:/texlive/2020/texmf-dist/tex/latex/amscls/amsthm.sty
Package: amsthm 2020/05/29 v2.20.6
\thm@style=\toks36
\thm@bodyfont=\toks37
\thm@headfont=\toks38
\thm@notefont=\toks39
\thm@headpunct=\toks40
\thm@preskip=\skip62
\thm@postskip=\skip63
\thm@headsep=\skip64
\dth@everypar=\toks41
)
(c:/texlive/2020/texmf-dist/tex/latex/thmtools/thm-restate.sty
Package: thm-restate 2020/08/01 v0.72
(c:/texlive/2020/texmf-dist/tex/latex/thmtools/thmtools.sty
Package: thmtools 2020/08/01 v0.72
\thmt@toks=\toks42
\c@thmt@dummyctr=\count306
(c:/texlive/2020/texmf-dist/tex/latex/thmtools/thm-patch.sty
Package: thm-patch 2020/08/01 v0.72
(c:/texlive/2020/texmf-dist/tex/latex/thmtools/parseargs.sty
Package: parseargs 2020/08/01 v0.72
\@parsespec=\toks43
))
(c:/texlive/2020/texmf-dist/tex/latex/thmtools/thm-kv.sty
Package: thm-kv 2020/08/01 v0.72
Package thm-kv Info: Theorem names will be uppercased on input line 42.
Package thm-kv Info: kvsetkeys patch (v1.16 or later) on input line 158.
)
(c:/texlive/2020/texmf-dist/tex/latex/thmtools/thm-autoref.sty
Package: thm-autoref 2020/08/01 v0.72
(c:/texlive/2020/texmf-dist/tex/latex/thmtools/aliasctr.sty
Package: aliasctr 2020/08/01 v0.72
))
(c:/texlive/2020/texmf-dist/tex/latex/thmtools/thm-listof.sty
Package: thm-listof 2020/08/01 v0.72
)
(c:/texlive/2020/texmf-dist/tex/latex/thmtools/thm-amsthm.sty
Package: thm-amsthm 2020/08/01 v0.72
\thmt@style@headstyle=\toks44
)))
(c:/texlive/2020/texmf-dist/tex/latex/cleveref/cleveref.sty
Package: cleveref 2018/03/27 v0.21.4 Intelligent cross-referencing
Package cleveref Info: `hyperref' support loaded on input line 2370.
LaTeX Info: Redefining \cref on input line 2370.
LaTeX Info: Redefining \Cref on input line 2370.
LaTeX Info: Redefining \crefrange on input line 2370.
LaTeX Info: Redefining \Crefrange on input line 2370.
LaTeX Info: Redefining \cpageref on input line 2370.
LaTeX Info: Redefining \Cpageref on input line 2370.
LaTeX Info: Redefining \cpagerefrange on input line 2370.
LaTeX Info: Redefining \Cpagerefrange on input line 2370.
LaTeX Info: Redefining \labelcref on input line 2370.
LaTeX Info: Redefining \labelcpageref on input line 2370.
Package cleveref Info: `amsthm' support loaded on input line 3026.
Package cleveref Info: `listings' support loaded on input line 3131.
Package cleveref Info: always capitalise cross-reference names on input line 78
25.
Package cleveref Info: always capitalise cross-reference names on input line 78
52.
Package cleveref Info: no abbreviation of names on input line 7852.
)
(c:/texlive/2020/texmf-dist/tex/latex/oberdiek/aliascnt.sty
Package: aliascnt 2018/09/07 v1.5 Alias counters (HO)
)
\c@theorem=\count307
)
(./lipics-v2021-sample-article.aux)
\openout1 = `lipics-v2021-sample-article.aux'.
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 65.
LaTeX Font Info: ... okay on input line 65.
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 65.
LaTeX Font Info: ... okay on input line 65.
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 65.
LaTeX Font Info: ... okay on input line 65.
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 65.
LaTeX Font Info: ... okay on input line 65.
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 65.
LaTeX Font Info: ... okay on input line 65.
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 65.
LaTeX Font Info: ... okay on input line 65.
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 65.
LaTeX Font Info: ... okay on input line 65.
LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 65.
LaTeX Font Info: ... okay on input line 65.
LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 65.
LaTeX Font Info: ... okay on input line 65.
LaTeX Info: Redefining \microtypecontext on input line 65.
Package microtype Info: Generating PDF output.
Package microtype Info: Character protrusion enabled (level 2).
Package microtype Info: Using default protrusion set `alltext'.
Package microtype Info: Automatic font expansion enabled (level 2),
(microtype) stretch: 20, shrink: 20, step: 1, non-selected.
Package microtype Info: Using default expansion set `alltext-nott'.
LaTeX Info: Redefining \showhyphens on input line 65.
Package microtype Info: No adjustment of tracking.
Package microtype Info: No adjustment of interword spacing.
Package microtype Info: No adjustment of character kerning.
(c:/texlive/2020/texmf-dist/tex/latex/microtype/mt-cmr.cfg
File: mt-cmr.cfg 2013/05/19 v2.2 microtype config. file: Computer Modern Roman
(RS)
)
(c:/texlive/2020/texmf-dist/tex/latex/xcolor/xcolor.sty
Package: xcolor 2016/05/11 v2.12 LaTeX color extensions (UK)
(c:/texlive/2020/texmf-dist/tex/latex/graphics-cfg/color.cfg
File: color.cfg 2016/01/02 v1.6 sample color configuration
)
Package xcolor Info: Driver file: pdftex.def on input line 225.
LaTeX Info: Redefining \color on input line 709.
Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1348.
Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1352.
Package xcolor Info: Model `RGB' extended on input line 1364.
Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1366.
Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1367.
Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1368.
Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1369.
Package xcolor Info: Model `Gray' substituted by `gray' on input line 1370.
Package xcolor Info: Model `wave' substituted by `hsb' on input line 1371.
)
(c:/texlive/2020/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
[Loading MPS to PDF converter (version 2006.09.02).]
\scratchcounter=\count308
\scratchdimen=\dimen183
\scratchbox=\box57
\nofMPsegments=\count309
\nofMParguments=\count310
\everyMPshowfont=\toks45
\MPscratchCnt=\count311
\MPscratchDim=\dimen184
\MPnumerator=\count312
\makeMPintoPDFobject=\count313
\everyMPtoPDFconversion=\toks46
) (c:/texlive/2020/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf
Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4
85.
(c:/texlive/2020/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv
e
))
\c@lstlisting=\count314
Package hyperref Info: Link coloring OFF on input line 65.
(c:/texlive/2020/texmf-dist/tex/latex/hyperref/nameref.sty
Package: nameref 2021-04-02 v2.47 Cross-referencing by name of section
(c:/texlive/2020/texmf-dist/tex/latex/refcount/refcount.sty
Package: refcount 2019/12/15 v3.6 Data extraction from label references (HO)
)
(c:/texlive/2020/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty
Package: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO)
)
\c@section@level=\count315
)
LaTeX Info: Redefining \ref on input line 65.
LaTeX Info: Redefining \pageref on input line 65.
LaTeX Info: Redefining \nameref on input line 65.
(./lipics-v2021-sample-article.out) (./lipics-v2021-sample-article.out)
\@outlinefile=\write5
\openout5 = `lipics-v2021-sample-article.out'.
Package caption Info: Begin \AtBeginDocument code.
Package caption Info: rotating package is loaded.
Package caption Info: End \AtBeginDocument code.
LaTeX Font Info: Trying to load font information for T1+lmss on input line 6
8.
(c:/texlive/2020/texmf-dist/tex/latex/lm/t1lmss.fd
File: t1lmss.fd 2009/10/30 v1.6 Font defs for Latin Modern
)
Package microtype Info: Loading generic protrusion settings for font family
(microtype) `lmss' (encoding: T1).
(microtype) For optimal results, create family-specific settings.
(microtype) See the microtype manual for details.
LaTeX Font Info: Trying to load font information for OT1+lmr on input line 6
8.
(c:/texlive/2020/texmf-dist/tex/latex/lm/ot1lmr.fd
File: ot1lmr.fd 2009/10/30 v1.6 Font defs for Latin Modern
)
LaTeX Font Info: Trying to load font information for OML+lmm on input line 6
8.
(c:/texlive/2020/texmf-dist/tex/latex/lm/omllmm.fd
File: omllmm.fd 2009/10/30 v1.6 Font defs for Latin Modern
)
LaTeX Font Info: Trying to load font information for OMS+lmsy on input line
68.
(c:/texlive/2020/texmf-dist/tex/latex/lm/omslmsy.fd
File: omslmsy.fd 2009/10/30 v1.6 Font defs for Latin Modern
)
LaTeX Font Info: Trying to load font information for OMX+lmex on input line
68.
(c:/texlive/2020/texmf-dist/tex/latex/lm/omxlmex.fd
File: omxlmex.fd 2009/10/30 v1.6 Font defs for Latin Modern
)
LaTeX Font Info: External font `lmex10' loaded for size
(Font) <10> on input line 68.
LaTeX Font Info: External font `lmex10' loaded for size
(Font) <7> on input line 68.
LaTeX Font Info: External font `lmex10' loaded for size
(Font) <5> on input line 68.
LaTeX Font Info: Trying to load font information for U+msa on input line 68.
(c:/texlive/2020/texmf-dist/tex/latex/amsfonts/umsa.fd
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
)
(c:/texlive/2020/texmf-dist/tex/latex/microtype/mt-msa.cfg
File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS)
)
LaTeX Font Info: Trying to load font information for U+msb on input line 68.
(c:/texlive/2020/texmf-dist/tex/latex/amsfonts/umsb.fd
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
)
(c:/texlive/2020/texmf-dist/tex/latex/microtype/mt-msb.cfg
File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS)
)
LaTeX Font Info: Trying to load font information for U+fontawesomefree1 on i
nput line 68.
(c:/texlive/2020/texmf-dist/tex/latex/fontawesome5/ufontawesomefree1.fd)
<orcid.pdf, id=39, 256.96pt x 256.96pt>
File: orcid.pdf Graphic file (type pdf)
<use orcid.pdf>
Package pdftex.def Info: orcid.pdf used on input line 68.
(pdftex.def) Requested size: 8.99843pt x 9.0pt.
LaTeX Font Info: External font `lmex10' loaded for size
(Font) <12> on input line 68.
LaTeX Font Info: External font `lmex10' loaded for size
(Font) <8> on input line 68.
LaTeX Font Info: External font `lmex10' loaded for size
(Font) <6> on input line 68.
LaTeX Font Info: Calculating math sizes for size <8.5> on input line 68.
LaTeX Font Info: External font `lmex10' loaded for size
(Font) <8.5> on input line 68.
LaTeX Font Info: External font `lmex10' loaded for size
(Font) <5.94997> on input line 68.
LaTeX Font Info: External font `lmex10' loaded for size
(Font) <4.25> on input line 68.
File: orcid.pdf Graphic file (type pdf)
<use orcid.pdf>
Package pdftex.def Info: orcid.pdf used on input line 68.
(pdftex.def) Requested size: 8.99843pt x 9.0pt.
\openout3 = `lipics-v2021-sample-article.vtc'.
LaTeX Font Info: Trying to load font information for T1+lmtt on input line 8
8.
(c:/texlive/2020/texmf-dist/tex/latex/lm/t1lmtt.fd
File: t1lmtt.fd 2009/10/30 v1.6 Font defs for Latin Modern
)
Package microtype Info: Loading generic protrusion settings for font family
(microtype) `lmtt' (encoding: T1).
(microtype) For optimal results, create family-specific settings.
(microtype) See the microtype manual for details.
Underfull \hbox (badness 1072) in paragraph at lines 88--89
[]\T1/lmr/m/n/10 (+20) Use the provided sec-tion-ing mac-ros: [][]\T1/lmtt/m/n/
10 \section[]\T1/lmr/m/n/10 (+20) , [][]\T1/lmtt/m/n/10 \subsection[]\T1/lmr/m/
n/10 (+20) , [][]\T1/lmtt/m/n/10 \subsubsection[]\T1/lmr/m/n/10 (+20) ,
[]
<cc-by.pdf, id=43, 88.33pt x 31.11626pt>
File: cc-by.pdf Graphic file (type pdf)
<use cc-by.pdf>
Package pdftex.def Info: cc-by.pdf used on input line 95.
(pdftex.def) Requested size: 39.74274pt x 14.0pt.
LaTeX Font Info: Trying to load font information for TS1+lmr on input line 9
5.
(c:/texlive/2020/texmf-dist/tex/latex/lm/ts1lmr.fd
File: ts1lmr.fd 2009/10/30 v1.6 Font defs for Latin Modern
)
<lipics-logo-bw.pdf, id=46, 591.44762pt x 144.42657pt>
File: lipics-logo-bw.pdf Graphic file (type pdf)
<use lipics-logo-bw.pdf>
Package pdftex.def Info: lipics-logo-bw.pdf used on input line 95.
(pdftex.def) Requested size: 64.00354pt x 14.0pt.
[1.1
{c:/texlive/2020/texmf-var/fonts/map/pdftex/updmap/pdftex.map} <./cc-by.pdf> <.
/orcid.pdf> <./lipics-logo-bw.pdf>]
LaTeX Font Info: Font shape `T1/lmtt/bx/n' in size <10> not available
(Font) Font shape `T1/lmtt/b/n' tried instead on input line 111.
LaTeX Warning: Citation `DBLP:journals/cacm/Knuth74' on page 2 undefined on inp
ut line 132.
[2.2]
Package hyperref Info: bookmark level for unknown lemma defaults to 0 on input
line 134.
Package hyperref Info: bookmark level for unknown claim defaults to 0 on input
line 146.
LaTeX Warning: Citation `DBLP:books/mk/GrayR93' on page 3 undefined on input li
ne 157.
Package hyperref Info: bookmark level for unknown corollary defaults to 0 on in
put line 157.
Package hyperref Info: bookmark level for unknown proposition defaults to 0 on
input line 162.
LaTeX Warning: Citation `DBLP:journals/cacm/Dijkstra68a' on page 3 undefined on
input line 170.
[3.3]
Package hyperref Info: bookmark level for unknown remark defaults to 0 on input
line 190.
LaTeX Warning: Citation `DBLP:conf/focs/HopcroftPV75' on page 4 undefined on in
put line 195.
No file lipics-v2021-sample-article.bbl.
[4.4]
Package hyperref Info: bookmark level for unknown theorem defaults to 0 on inpu
t line 270.
Package hyperref Info: bookmark level for unknown conjecture defaults to 0 on i
nput line 286.
Package hyperref Info: bookmark level for unknown observation defaults to 0 on
input line 290.
Package hyperref Info: bookmark level for unknown exercise defaults to 0 on inp
ut line 294.
[5.5]
Package hyperref Info: bookmark level for unknown definition defaults to 0 on i
nput line 298.
Package hyperref Info: bookmark level for unknown example defaults to 0 on inpu
t line 302.
Package hyperref Info: bookmark level for unknown note defaults to 0 on input l
ine 306.
Package hyperref Info: bookmark level for unknown note* defaults to 0 on input
line 311.
Package hyperref Info: bookmark level for unknown remark* defaults to 0 on inpu
t line 319.
Package hyperref Info: bookmark level for unknown claim* defaults to 0 on input
line 326.
[6.6] (./lipics-v2021-sample-article.aux)
LaTeX Warning: There were undefined references.
Package rerunfilecheck Info: File `lipics-v2021-sample-article.out' has not cha
nged.
(rerunfilecheck) Checksum: 5BA7AFE1D7627323F9231F3AC0825B3F;1713.
)
Here is how much of TeX's memory you used:
22248 strings out of 479023
370909 string characters out of 5863351
727080 words of memory out of 5000000
38986 multiletter control sequences out of 15000+600000
542726 words of font info for 244 fonts, out of 8000000 for 9000
1141 hyphenation exceptions out of 8191
98i,11n,102p,1049b,1201s stack positions out of 5000i,500n,10000p,200000b,80000s
{c:/texlive/2020/texmf-dist/fonts/enc/dvips/lm/lm-ec.enc}{c:/texlive/2020/tex
mf-dist/fonts/enc/dvips/lm/lm-mathit.enc}{c:/texlive/2020/texmf-dist/fonts/enc/
dvips/lm/lm-ts1.enc}{c:/texlive/2020/texmf-dist/fonts/enc/dvips/fontawesome5/fa
5free1.enc}<c:/texlive/2020/texmf-dist/fonts/type1/public/fontawesome5/FontAwes
ome5Free-Regular.pfb><c:/texlive/2020/texmf-dist/fonts/type1/public/fontawesome
5/FontAwesome5Free-Solid.pfb><c:/texlive/2020/texmf-dist/fonts/type1/public/lm/
lmbx10.pfb><c:/texlive/2020/texmf-dist/fonts/type1/public/lm/lmbx12.pfb><c:/tex
live/2020/texmf-dist/fonts/type1/public/lm/lmmi10.pfb><c:/texlive/2020/texmf-di
st/fonts/type1/public/lm/lmr10.pfb><c:/texlive/2020/texmf-dist/fonts/type1/publ
ic/lm/lmr6.pfb><c:/texlive/2020/texmf-dist/fonts/type1/public/lm/lmr7.pfb><c:/t
exlive/2020/texmf-dist/fonts/type1/public/lm/lmr8.pfb><c:/texlive/2020/texmf-di
st/fonts/type1/public/lm/lmr9.pfb><c:/texlive/2020/texmf-dist/fonts/type1/publi
c/lm/lmri10.pfb><c:/texlive/2020/texmf-dist/fonts/type1/public/lm/lmri9.pfb><c:
/texlive/2020/texmf-dist/fonts/type1/public/lm/lmss10.pfb><c:/texlive/2020/texm
f-dist/fonts/type1/public/lm/lmss8.pfb><c:/texlive/2020/texmf-dist/fonts/type1/
public/lm/lmssbx10.pfb><c:/texlive/2020/texmf-dist/fonts/type1/public/lm/lmtk10
.pfb><c:/texlive/2020/texmf-dist/fonts/type1/public/lm/lmtt10.pfb><c:/texlive/2
020/texmf-dist/fonts/type1/public/lm/lmtt9.pfb><c:/texlive/2020/texmf-dist/font
s/type1/public/amsfonts/symbols/msam10.pfb>
Output written on lipics-v2021-sample-article.pdf (6 pages, 572347 bytes).
PDF statistics:
294 PDF objects out of 1000 (max. 8388607)
233 compressed objects within 3 object streams
77 named destinations out of 1000 (max. 500000)
81494 words of extra memory for PDF output out of 89155 (max. 10000000)

View File

@ -0,0 +1,8 @@
\BOOKMARK [1][-]{section.1}{\376\377\0001\000\040\000T\000y\000p\000e\000s\000e\000t\000t\000i\000n\000g\000\040\000i\000n\000s\000t\000r\000u\000c\000t\000i\000o\000n\000s\000\040\040\023\000\040\000S\000u\000m\000m\000a\000r\000y}{}% 1
\BOOKMARK [1][-]{section.2}{\376\377\0002\000\040\000L\000o\000r\000e\000m\000\040\000i\000p\000s\000u\000m\000\040\000d\000o\000l\000o\000r\000\040\000s\000i\000t\000\040\000a\000m\000e\000t}{}% 2
\BOOKMARK [2][-]{subsection.2.1}{\376\377\0002\000.\0001\000\040\000C\000u\000r\000a\000b\000i\000t\000u\000r\000\040\000d\000i\000c\000t\000u\000m\000\040\000f\000e\000l\000i\000s\000\040\000i\000d\000\040\000s\000a\000p\000i\000e\000n}{section.2}% 3
\BOOKMARK [2][-]{subsection.2.2}{\376\377\0002\000.\0002\000\040\000P\000r\000o\000i\000n\000\040\000a\000c\000\040\000f\000e\000r\000m\000e\000n\000t\000u\000m\000\040\000a\000u\000g\000u\000e}{section.2}% 4
\BOOKMARK [1][-]{section.3}{\376\377\0003\000\040\000P\000e\000l\000l\000e\000n\000t\000e\000s\000q\000u\000e\000\040\000q\000u\000i\000s\000\040\000t\000o\000r\000t\000o\000r}{}% 5
\BOOKMARK [1][-]{section.4}{\376\377\0004\000\040\000M\000o\000r\000b\000i\000\040\000e\000r\000o\000s\000\040\000m\000a\000g\000n\000a}{}% 6
\BOOKMARK [1][-]{appendix.A}{\376\377\000A\000\040\000S\000t\000y\000l\000e\000s\000\040\000o\000f\000\040\000l\000i\000s\000t\000s\000,\000\040\000e\000n\000u\000m\000e\000r\000a\000t\000i\000o\000n\000s\000,\000\040\000a\000n\000d\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000i\000o\000n\000s}{}% 7
\BOOKMARK [1][-]{appendix.B}{\376\377\000B\000\040\000T\000h\000e\000o\000r\000e\000m\000-\000l\000i\000k\000e\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t\000s}{}% 8

View File

@ -0,0 +1 @@
mktexpk --mfmode / --bdpi 600 --mag 0+540/600 --dpi 540 fa5free0solid

Binary file not shown.

417
README
View File

@ -12,154 +12,141 @@ The production version is the one on CTAN and ACM sites.
Changes
version 1.08 SIGPLAN reformatting (Matthew Fluet); bug fixes
Version 1.83 Support for multilanguage papers
ISSN changes for some journals
version 1.09 SIGPLAN: revert caption rules (Matthew Fluet)
Version 1.82 Bug fixes.
New command \anon for anonymization of short strings.
Documentation update.
version 1.10 Bug fixes
Version 1.81 Bug fixes
New bib field distinctURL to print URL even if doi is present.
Reworded samples
version 1.11 Customization of ACM theorem styles and proof
environment (Matthew Fluet).
Version 1.80 New journals: DLT, FAC
version 1.12 Bug fixes and documentation updates.
Footnotes rearranged.
Option natbib is now mostly superfluous: the class
makes a guess based on the format chosen.
Version 1.79 Fixed pages with index
(https://github.com/borisveytsman/acmart/issues/440)
Updated information for TAP, TCPS, TEAC
version 1.13 Formatting changes: headers, folios etc.
Bibliography changes.
Version 1.78 Documentation update.
Magic texcount comments for samples.
Title page now is split if there are too many authors
Bug fixes.
version 1.14 Warn about undefined citation styles; move definitions
of acmauthoryear and acmnumeric citation styles before
use.
Version 1.77 Changed the way to typeset multiple affiliations (Christoph Sommer)
version 1.15 New structured affiliation command.
New commands for acknowledgements.
Version 1.76 Added many journal abbreviations to the bst.
New experimental option: pbalance
ORCID linking code
version 1.16 Formatting changes for headers and footers.
Version 1.75 \country is now obligatory for addresses.
Added \AtBeginMaketitle
version 1.17 Formatting changes for margins and lists. Bug fixes.
Version 1.74 Bug fixes. A regression introduced in the font changes
is reverted.
version 1.18 Natbib is now the default for all versions. A unified bib
file is used for all styles. Better treatment
of multiple affiliations.
Version 1.73 Bug fixes
The elements institution, city and country are now obligatory
for affiliations
version 1.19 Include 'Abstract', 'Acknowledgements', and 'References'
in PDF bookmarks.
Version 1.72 Bug fixes. Better handling of metadata.
version 1.20 Bug fixes, documentation updates
Version 1.71 Bug fixes
Formats sigchi and sigchi-a are retired
Bibliography formatting changes for @inproceedings entries
having both series and volume
LuaLaTeX now uses the same OTF fonts as XeLaTeX
version 1.21 Bibliography changes: added arXiv, some cleanup
Version 1.70 Title change for ACM/IMS Transactions on Data Science
Bug fixes for bibliography
version 1.22 Bibliography changes for Aptara backend; should be
invisible for the users.
Version 1.69 Bug fixes
Compatibility with LaTeX 2020-02-02 release
version 1.23 Added PACM PL journal option.
Version 1.68 Bug fixes
BST now recognizes words `Paper' or 'Article' in
eid or articleno
version 1.24 Added IMWUT journal option.
Version 1.67 Urgent bug fixes:
BibTeX style bug fixed (Michael D. Adams)
Sigplan special section bugfix
version 1.25 Updated PACMPL journal option.
Version 1.66 Bug fixes
BibTeX change: location is now a synonym for city (Feras Saad)
ACM reference format is now mandatory for papers over one page.
CCS concepts and keywords are now mandatory for
papers over two pages.
Authors' addresses are mandatory for journal articles.
version 1.26 Bug fixes
Version 1.65 Bug fixes
New journal: DGOV
DTRAP and HEALTH are now using acmlarge format
version 1.27 Bug fixes
Version 1.64 Produce error if abstract is entered after maketitle
(previously abstract was silently dropped)
Bug fixes for line numbering
version 1.28 Bug fixes: natbib=false now behaves correctly.
Version 1.63a Moved TQUANT to TQC
version 1.29 Documentation changes. Head height increased from 12pt to 13pt.
Removed spurious indent at start of abstract.
Improved kerning in CCS description list.
Version 1.63 New journals: TQUANT, FACMP
version 1.30 Bibtex style now recognizes https:// in doi.
Added \frenchspacing.
\department now has an optional hierarchy level.
Switched to T1 encoding
Updated IMWUT and PACMPL
Version 1.62 Documentation update
New journal: TELO
Bug fixes
version 1.31 Changed default year and month to the current ones
(thanks to Matteo Riondato)
Table of contents now works
Marginalia now work in all formats
New command \additionalaffiliation
Documentation changes
Version 1.61 Bug fixes
New bibtex types for artifacts
version 1.32 New DOI formatting.
Format siggraph is now obsolete, and sigconf
is used instead.
New proceedings title: POMACS.
Version 1.60 New option: urlbreakonhyphens (thanks to Peter Kemp)
Smaller header size for acmsmall
version 1.33 New option `timestamp' (Michael D. Adams)
New option `authordraft'
Documentation updates
Bug fixes
We now use Type 1 versions of Libertine fonts even with XeTeX.
New hook acmart-preload-hook.tex (wizards only!)
Added new options `obeypunctuation' for \affiliation command
Added SubmissionID
Added right line count ruler for two-column formats
Added workaround for Adobe Acrobat bugs in selection
Added eid field to the bibliography
Version 1.59 Now a journal format can be used for conference proceedings
All samples are now generated from the same .dtx file
Bug fixes
version 1.34 Deleted DOI from doi numbers
Changed bibstrip formatting
The command \terms is now obsolete
The rulers in review mode now have continuous numbering
Version 1.58 Suppressed spurious warnings.
New journal: HEALTH.
TDSCI is renamed to TDS.
version 1.35 Author-year bib style now uses square brackets.
Changed defaults for TOG sample
Price is suppressed for usgov and rightsretained modes.
Bugs fixed
Version 1.57 Change of \baselinestretch now produces an error
Booktabs is now always loaded
Added option `balance' to balance last page in two-column mode
E-mail is no longer split in addresses
New samples (Stephen Spencer)
Version 1.36 Bug fixes
Moved PACMPL to acmlarge format
New journal: PACMHCI
Added the possibility to adjust number of author
boxes per row in conference formats
Version 1.56 Bug fixes
Added \flushbottom to two column formats (Philip Quinn)
The final punctuation for the list of concepts
is now a period instead of a semicolon (Philip Quinn)
New command \Description to describe images for visually
impaired users.
Version 1.37 Reduce list indentation (Matthew Fluet)
Version 1.55 Bug fixes
Font changes for SIGCHI table captions
Version 1.38 Increase default font size for SIGPLAN
Version 1.39 Added \authornotemark commmand
Version 1.40 Bibliography changes
Added processing of one-compoment ccsdesc nodes
Bug fixes.
Made the height a multiple of \baselineskip + \topskip
Added cleveref
We no longer print street address in SIGs
Version 1.41 Rearranged bib files
Added new badges
Version 1.42 Deleted ACM badges
Version 1.54 New option: 'nonacm' (Gabriel Scherer)
Deleted indent for subsubsection (suggested by Ross Moore)
Suppressed some obscurious warning in BibTeX processing
Suppressed hyperrerf warnings (Paolo G. Giarrusso)
New code for sections to help with accessibility patches
(Ross Moore)
Submission id, if present, is printed in anon mode
Bug fixes
Version 1.43 Bug fixes
Version 1.53 New journals: PACMCGIT, TIOT, TDSCI
Version 1.44 Bug fixes.
Empty DOI and ISBN suppress printing DOI or ISBN lines
Separated theorem code into acmthm.sty, loaded by default.
Article number can be set for proceedings.
New commands: \acmBooktile, \editor.
Reference citation format updated.
Version 1.52 Another rewording of licenses
Version 1.45 Workaround for a Libertine bug. Thanks to LianTze Lim
from Overleaf
Version 1.51 Journal footers now use abbreviated journal titles.
Corrected the bug with acmPrice.
Do not show price when copyright is set to iw3c2w3 and iw3c2w3g.
The package now is compatible with polyglossia (Joachim Breitner).
Slightly reworded copyright statements.
Version 1.46 Bug fixes for bibliography: label width is now calculated
correctly.
All PACM now use screen option. This requires etoolbox.
Added subtitle to ACM reference format.
Now acmart is compatible with fontspec.
\thanks is now obsolete. The addresses are automatically
added to the journal version; this can be overriden with
\authorsaddresses command.
Deleted the rule at the end of frontmatter for all formats.
Deleted new line before doi in the reference format.
Reintegrated theorem code into acmart.dtx (Matthew Fluet)
Version 1.50 Changes in iw3c2w3 and iw3c2w3g
Version 1.47 New journal: THRI
Version 1.49 New jorunal: DTRAP
Version 1.48 Bug fixes
Review mode now switches on folios
@ -173,128 +160,152 @@ Version 1.48 Bug fixes
Added initial support for Biblatex (Daniel Thomas)
Added support for IW3C2 conferences
Version 1.49 New jorunal: DTRAP
Version 1.47 New journal: THRI
Version 1.50 Changes in iw3c2w3 and iw3c2w3g
Version 1.46 Bug fixes for bibliography: label width is now calculated
correctly.
All PACM now use screen option. This requires etoolbox.
Added subtitle to ACM reference format.
Now acmart is compatible with fontspec.
\thanks is now obsolete. The addresses are automatically
added to the journal version; this can be overriden with
\authorsaddresses command.
Deleted the rule at the end of frontmatter for all formats.
Deleted new line before doi in the reference format.
Reintegrated theorem code into acmart.dtx (Matthew Fluet)
Version 1.51 Journal footers now use abbreviated journal titles.
Corrected the bug with acmPrice.
Do not show price when copyright is set to iw3c2w3 and iw3c2w3g.
The package now is compatible with polyglossia (Joachim Breitner).
Slightly reworded copyright statements.
Version 1.45 Workaround for a Libertine bug. Thanks to LianTze Lim
from Overleaf
Version 1.52 Another rewording of licenses
Version 1.44 Bug fixes.
Empty DOI and ISBN suppress printing DOI or ISBN lines
Separated theorem code into acmthm.sty, loaded by default.
Article number can be set for proceedings.
New commands: \acmBooktile, \editor.
Reference citation format updated.
Version 1.53 New journals: PACMCGIT, TIOT, TDSCI
Version 1.43 Bug fixes
Version 1.54 New option: 'nonacm' (Gabriel Scherer)
Deleted indent for subsubsection (suggested by Ross Moore)
Suppressed some obscurious warning in BibTeX processing
Suppressed hyperrerf warnings (Paolo G. Giarrusso)
New code for sections to help with accessibility patches
(Ross Moore)
Submission id, if present, is printed in anon mode
Version 1.42 Deleted ACM badges
Bug fixes
Version 1.55 Bug fixes
Font changes for SIGCHI table captions
Version 1.41 Rearranged bib files
Added new badges
Version 1.56 Bug fixes
Added \flushbottom to two column formats (Philip Quinn)
The final punctuation for the list of concepts
is now a period instead of a semicolon (Philip Quinn)
New command \Description to describe images for visually
impaired users.
Version 1.40 Bibliography changes
Added processing of one-compoment ccsdesc nodes
Bug fixes.
Made the height a multiple of \baselineskip + \topskip
Added cleveref
We no longer print street address in SIGs
Version 1.57 Change of \baselinestretch now produces an error
Booktabs is now always loaded
Added option `balance' to balance last page in two-column mode
E-mail is no longer split in addresses
New samples (Stephen Spencer)
Version 1.39 Added \authornotemark commmand
Version 1.58 Suppressed spurious warnings.
New journal: HEALTH.
TDSCI is renamed to TDS.
Version 1.38 Increase default font size for SIGPLAN
Version 1.59 Now a journal format can be used for conference proceedings
All samples are now generated from the same .dtx file
Bug fixes
Version 1.37 Reduce list indentation (Matthew Fluet)
version 1.60 New option: urlbreakonhyphens (thanks to Peter Kemp)
Smaller header size for acmsmall
Version 1.36 Bug fixes
Moved PACMPL to acmlarge format
New journal: PACMHCI
Added the possibility to adjust number of author
boxes per row in conference formats
Version 1.61 Bug fixes
New bibtex types for artifacts
Version 1.35 Author-year bib style now uses square brackets.
Changed defaults for TOG sample
Price is suppressed for usgov and rightsretained modes.
Bugs fixed
Version 1.62 Documentation update
New journal: TELO
Bug fixes
Version 1.34 Deleted DOI from doi numbers
Changed bibstrip formatting
The command \terms is now obsolete
The rulers in review mode now have continuous numbering
Version 1.63 New journals: TQUANT, FACMP
Version 1.33 New option `timestamp' (Michael D. Adams)
New option `authordraft'
Documentation updates
Bug fixes
We now use Type 1 versions of Libertine fonts even with XeTeX.
New hook acmart-preload-hook.tex (wizards only!)
Added new options `obeypunctuation' for \affiliation command
Added SubmissionID
Added right line count ruler for two-column formats
Added workaround for Adobe Acrobat bugs in selection
Added eid field to the bibliography
Version 1.63a Moved TQUANT to TQC
Version 1.32 New DOI formatting.
Format siggraph is now obsolete, and sigconf
is used instead.
New proceedings title: POMACS.
Version 1.64 Produce error if abstract is entered after maketitle
(previously abstract was silently dropped)
Bug fixes for line numbering
Version 1.31 Changed default year and month to the current ones
(thanks to Matteo Riondato)
Table of contents now works
Marginalia now work in all formats
New command \additionalaffiliation
Documentation changes
Version 1.65 Bug fixes
New journal: DGOV
DTRAP and HEALTH are now using acmlarge format
Version 1.30 Bibtex style now recognizes https:// in doi.
Added \frenchspacing.
\department now has an optional hierarchy level.
Switched to T1 encoding
Updated IMWUT and PACMPL
Version 1.66 Bug fixes
BibTeX change: location is now a synonym for city (Feras Saad)
ACM reference format is now mandatory for papers over one page.
CCS concepts and keywords are now mandatory for
papers over two pages.
Authors' addresses are mandatory for journal articles.
Version 1.29 Documentation changes. Head height increased from 12pt to 13pt.
Removed spurious indent at start of abstract.
Improved kerning in CCS description list.
Version 1.67 Urgent bug fixes:
BibTeX style bug fixed (Michael D. Adams)
Sigplan special section bugfix
Version 1.28 Bug fixes: natbib=false now behaves correctly.
Version 1.68 Bug fixes
BST now recognizes words `Paper' or 'Article' in
eid or articleno
Version 1.27 Bug fixes
Version 1.69 Bug fixes
Compatibility with LaTeX 2020-02-02 release
Version 1.26 Bug fixes
Version 1.70 Title change for ACM/IMS Transactions on Data Science
Bug fixes for bibliography
Version 1.25 Updated PACMPL journal option.
Version 1.24 Added IMWUT journal option.
Version 1.23 Added PACM PL journal option.
Version 1.22 Bibliography changes for Aptara backend; should be
invisible for the users.
Version 1.21 Bibliography changes: added arXiv, some cleanup
Version 1.20 Bug fixes, documentation updates
Version 1.19 Include 'Abstract', 'Acknowledgements', and 'References'
in PDF bookmarks.
Version 1.18 Natbib is now the default for all versions. A unified bib
file is used for all styles. Better treatment
of multiple affiliations.
Version 1.71 Bug fixes
Formats sigchi and sigchi-a are retired
Bibliography formatting changes for @inproceedings entries
having both series and volume
LuaLaTeX now uses the same OTF fonts as XeLaTeX
Version 1.17 Formatting changes for margins and lists. Bug fixes.
Version 1.72 Bug fixes. Better handling of metadata.
Version 1.16 Formatting changes for headers and footers.
Version 1.73 Bug fixes
The elements institution, city and country are now obligatory
for affiliations
Version 1.15 New structured affiliation command.
New commands for acknowledgements.
Version 1.74 Bug fixes. A regression introduced in the font changes
is reverted.
Version 1.14 Warn about undefined citation styles; move definitions
of acmauthoryear and acmnumeric citation styles before
use.
Version 1.75. \country is now obligatory for addresses.
Added \AtBeginMaketitle
Version 1.13 Formatting changes: headers, folios etc.
Bibliography changes.
Version 1.76. Added many journal abbreviations to the bst.
New experimental option: pbalance
ORCID linking code
Version 1.12 Bug fixes and documentation updates.
Footnotes rearranged.
Option natbib is now mostly superfluous: the class
makes a guess based on the format chosen.
Version 1.77. Changed the way to typeset multiple affiliations (Christoph Sommer)
Version 1.11 Customization of ACM theorem styles and proof
environment (Matthew Fluet).
Version 1.78. Documentation update.
Magic texcount comments for samples.
Title page now is split if there are too many authors
Bug fixes.
Version 1.10 Bug fixes
Version 1.79. Fixed pages with index
(https://github.com/borisveytsman/acmart/issues/440)
Updated information for TAP, TCPS, TEAC
Version 1.09 SIGPLAN: revert caption rules (Matthew Fluet)
Version 1.80. New journals: DLT, FAC
Version 1.08 SIGPLAN reformatting (Matthew Fluet); bug fixes

Binary file not shown.

View File

@ -1,13 +1,11 @@
%root: main.tex
%!TEX root=./main.tex
\begin{abstract}
In this work, we study the problem of computing a tuple's expected multiplicity over probabilistic databases with bag semantics (where each tuple is associated with a multiplicity) exactly and approximately.
We consider bag-\abbrTIDB\xplural where we have a bound $\bound$ on the maximum multiplicity of each tuple and tuples are independent probabilistic events (we refer to such databases as \abbrCTIDB\xplural).
We are specifically interested in the fine-grained complexity of computing expected multiplicities and how it compares to the complexity of deterministic query evaluation algorithms --- if these complexities are comparable, it opens the door to practical deployment of probabilistic databases.
Unfortunately, our results imply that computing expected multiplicities for \abbrCTIDB\xplural based on the results produced by such query evaluation algorithms introduces super-linear overhead (under parameterized complexity hardness assumptions/conjectures).
We proceed to study approximation of expected result tuple multiplicities for positive relational algebra queries ($\raPlus$) over \abbrCTIDB\xplural and for a non-trivial subclass of block-independent databases (\abbrBIDB\xplural).
We develop a sampling algorithm that computes a $(1 \pm \epsilon)$-approximation of the expected multiplicity of an output tuple in time linear in the runtime of the corresponding deterministic query for any $\raPlus$ query.
\end{abstract}
%%% Local Variables:
%%% mode: latex

View File

@ -87,3 +87,9 @@
note = {\url{http://www.ctan.org/pkg/textcase}}
}
@Manual{Braams22:Babel,
title = {Babel},
author = {Johannes L. Braams and Javier Bezos},
year = 2022,
note = {\url{http://www.ctan.org/pkg/babel}}}

View File

@ -37,7 +37,7 @@
%% Right brace \} Tilde \~}
\NeedsTeXFormat{LaTeX2e}
\ProvidesClass{acmart}
[2020/11/15 v1.75 Typesetting articles for the Association for Computing Machinery]
[2022/02/19 v1.83 Typesetting articles for the Association for Computing Machinery]
\def\@classname{acmart}
\InputIfFileExists{acmart-preload-hook.tex}{%
\ClassWarning{\@classname}{%
@ -120,6 +120,10 @@
\PackageError{\@classname}{The option balance can be either true or
false}}
\ExecuteOptionsX{balance}
\define@boolkey+{acmart.cls}[@ACM@]{pbalance}[true]{}{%
\PackageError{\@classname}{The option pbalance can be either true or
false}}
\ExecuteOptionsX{pbalance=false}
\define@boolkey+{acmart.cls}[@ACM@]{natbib}[true]{%
\if@ACM@natbib
\PackageInfo{\@classname}{Explicitly selecting natbib mode}%
@ -159,6 +163,11 @@
\DeclareOptionX{10pt}{\edef\ACM@fontsize{\CurrentOption}}
\DeclareOptionX{11pt}{\edef\ACM@fontsize{\CurrentOption}}
\DeclareOptionX{12pt}{\edef\ACM@fontsize{\CurrentOption}}
\def\ACM@languages{}
\DeclareOptionX{language}{%
\ifx\ACM@languages\@empty
\gdef\ACM@languages{english}\fi
\g@addto@macro\ACM@languages{, #1}}
\DeclareOptionX{draft}{\PassOptionsToClass{\CurrentOption}{amsart}}
\DeclareOptionX{*}{\PassOptionsToClass{\CurrentOption}{amsart}}
\ProcessOptionsX
@ -353,6 +362,77 @@
\if@ACM@natbib
\citestyle{acmnumeric}
\fi
\if@ACM@journal
\renewcommand\keywordsname{Additional Key Words and Phrases}%
\else
\renewcommand\keywordsname{Keywords}%
\fi
\ifx\ACM@languages\@empty
\else
\RequirePackage[\ACM@languages]{babel}%
\addto\captionsenglish{%
\if@ACM@journal
\renewcommand\keywordsname{Additional Key Words and Phrases}%
\else
\renewcommand\keywordsname{Keywords}%
\fi
\renewcommand\acksname{Acknowledgements}%
}%
\addto\captionsfrench{%
\if@ACM@journal
\renewcommand\keywordsname{Mots Clés et Phrases Supplémentaires}%
\else
\renewcommand\keywordsname{Mots clés}%
\fi
\renewcommand\acksname{Remerciements}%
}%
\addto\captionsgerman{%
\if@ACM@journal
\renewcommand\keywordsname{Zusätzliche Schlüsselwörter und Phrasen}%
\else
\renewcommand\keywordsname{Schlüsselwörter}%
\fi
\renewcommand\acksname{Danksagungen}%
}%
\addto\captionsspanish{%
\if@ACM@journal
\renewcommand\keywordsname{Palabras y Frases Claves Adicionales}%
\else
\renewcommand\keywordsname{Palabras claves}%
\fi
\renewcommand\acksname{Expresiones de gratitud}%
}%
\fi
\newcommand\ACM@lang@check[1]{%
\ifx\ACM@languages\@empty\relax
\ClassError{\@classname}{%
Command \string#1 \MessageBreak is used in monlingual document}{%
You used a command (\string#1) \MessageBreak
that does not have a meaning \MessageBreak
unless are languages are defined. \MessageBreak
Please choose the languages in \string\documentclass
\MessageBreak
(e.g. \string\documentclass[languages={french, english}]{acmart}),
\MessageBreak
or delete the command.}%
\fi}
\def\@translatedtitle{}
\newcommand\translatedtitle[2]{\ACM@lang@check{\translatedtitle}%
\g@addto@macro\@translatedtitle{\par\foreignlanguage{#1}{#2}}}
\def\@translatedsubtitle{}
\newcommand\translatedsubtitle[2]{\ACM@lang@check{\translatedsubtitle}%
\g@addto@macro\@translatedsubtitle{\par\foreignlanguage{#1}{#2}}}
\def\@translatedkeywords{}
\newcommand\translatedkeywords[2]{\ACM@lang@check{\translatedkeywords}%
\g@addto@macro\@translatedkeywords{\@mktranslatedkeywords{#1}{#2}}}
\def\@translatedabstracts{}
\newenvironment{translatedabstract}[1]{\Collect@Body
\@savetranslatedabstract\@mktranslatedabstract{#1}}{}
\long\def\@savetranslatedabstract#1{\if@ACM@maketitle@typeset
\ClassError{\@classname}{Abstract must be defined before maketitle
command. Please move it!}\fi
\ACM@lang@check{translatedabstract}%
\g@addto@macro\@translatedabstracts{\bgroup#1\egroup}}
\def\@startsection#1#2#3#4#5#6{%
\if@noskipsec \leavevmode \fi
\par
@ -487,6 +567,7 @@
\rule\z@\footnotesep\ignorespaces#1\@finalstrut\strutbox}%
\color@endgroup}}
\def\@makefnmark{\hbox{\@textsuperscript{\normalfont\@thefnmark}}}
\RequirePackage{hyperxmp}
\let\@footnotemark@nolink\@footnotemark
\let\@footnotetext@nolink\@footnotetext
\RequirePackage[bookmarksnumbered,unicode]{hyperref}
@ -526,7 +607,6 @@
\fi
\hypersetup{pdflang={en},
pdfdisplaydoctitle}}
\RequirePackage{hyperxmp}
\if@ACM@natbib
\let\citeN\cite
\let\cite\citep
@ -935,7 +1015,9 @@
CIE,%
CSUR,%
DGOV,%
DLT,%
DTRAP,%
FAC,%
HEALTH,%
IMWUT,%
JACM,%
@ -1007,10 +1089,19 @@
\def\@journalName{Digital Government: Research and Practice}%
\def\@journalNameShort{Digit. Gov. Res. Pract.}%
\def\@permissionCodeOne{2639-0175}%
\or % DLT
\def\@journalName{Distributed Ledger Technologies: Research and Practice}%
\def\@journalNameShort{Distrib. Ledger Technol.}%
\def\@permissionCodeOne{2769-6472}%
\or % DTRAP
\def\@journalName{Digital Threats: Research and Practice}%
\def\@journalNameShort{Digit. Threat. Res. Pract.}%
\def\@permissionCodeOne{2576-5337}%
\or % FAC
\def\@journalName{Formal Aspects of Computing}%
\def\@journalNameShort{Form. Asp. Comput.}%
\def\@permissionCodeOne{0934-5043}%
\def\@permissionCodeTwo{1433-299X}%
\or % HEALTH
\def\@journalName{ACM Transactions on Computing for Healthcare}%
\def\@journalNameShort{ACM Trans. Comput. Healthcare}%
@ -1045,6 +1136,8 @@
\or % JOCCH
\def\@journalName{ACM Journal on Computing and Cultural Heritage}%
\def\@journalNameShort{ACM J. Comput. Cult. Herit.}%
\def\@permissionCodeOne{1556-4673}%
\def\@permissionCodeTwo{1556-4711}%
\or % PACMCGIT
\def\@journalName{Proceedings of the ACM on Computer Graphics and Interactive Techniques}%
\def\@journalNameShort{Proc. ACM Comput. Graph. Interact. Tech.}%
@ -1080,6 +1173,8 @@
\or % TACO
\def\@journalName{ACM Transactions on Architecture and Code Optimization}%
\def\@journalNameShort{ACM Trans. Arch. Code Optim.}%
\def\@permissionCodeOne{1544-3566}%
\def\@permissionCodeTwo{1544-3973}%
\or % TALG
\def\@journalName{ACM Transactions on Algorithms}%
\def\@journalNameShort{ACM Trans. Algor.}%
@ -1090,14 +1185,20 @@
\def\@permissionCodeOne{2375-4699}%
\or % TAP
\def\@journalName{ACM Transactions on Applied Perception}%
\def\@journalNameShort{ACM Trans. Appl. Percept.}%
\def\@permissionCodeOne{1544-3558}%
\or % TCPS
\def\@journalName{ACM Transactions on Cyber-Physical Systems}%
\def\@journalNameShort{ACM Trans. Cyber-Phys. Syst.}%
\def\@permissionCodeOne{2378-962X}%
\or % TDS
\def\@journalName{ACM/IMS Transactions on Data Science}%
\def\@journalNameShort{ACM/IMS Trans. Data Sci.}%
\def\@permissionCodeOne{2577-3224}%
\or % TEAC
\def\@journalName{ACM Transactions on Economics and Computation}%
\def\@journalNameShort{ACM Trans. Econ. Comput.}%
\def\@permissionCodeOne{2167-8375}%
\or % TECS
\def\@journalName{ACM Transactions on Embedded Computing Systems}%
\def\@journalNameShort{ACM Trans. Embedd. Comput. Syst.}%
@ -1168,6 +1269,7 @@
\def\@permissionCodeOne{0730-0301}
\or % TOIS
\def\@journalName{ACM Transactions on Information Systems}%
\def\@journalNameShort{ACM Trans. Inf. Syst.}%
\def\@permissionCodeOne{1046-8188}%
\or % TOIT
\def\@journalName{ACM Transactions on Internet Technology}%
@ -1176,11 +1278,13 @@
\or % TOMACS
\def\@journalName{ACM Transactions on Modeling and Computer Simulation}%
\def\@journalNameShort{ACM Trans. Model. Comput. Simul.}%
\def\@permissionCodeOne{1049-3301}%
\def\@permissionCodeTwo{1558-1195}%
\or % TOMM
\def\@journalName{ACM Transactions on Multimedia Computing, Communications and Applications}%
\def\@journalNameShort{ACM Trans. Multimedia Comput. Commun. Appl.}%
\def\@permissionCodeOne{1551-6857}%
\def\@permissionCodeTwo{0100}%
\def\@permissionCodeTwo{1551-6865}%
\or % TOMPECS
\def\@journalName{ACM Transactions on Modeling and Performance Evaluation of Computing Systems}%
\def\@journalNameShort{ACM Trans. Model. Perform. Eval. Comput. Syst.}%
@ -1238,8 +1342,8 @@
\def\@journalNameShort{ACM Trans. Web}%
\def\@permissionCodeOne{1559-1131}%
\else % FACMP, a dummy journal
\def\@journalName{Forthcoming ACM Publication}%
\def\@journalNameShort{ACM Forthcoming}%
\def\@journalName{ACM Just Accepted}%
\def\@journalNameShort{ACM Accepted}%
\def\@permissionCodeOne{XXXX-XXXX}%
\fi
\ClassInfo{\@classname}{Using journal code \@journalCode}%
@ -1268,9 +1372,12 @@
DC, USA}%
\fi
\def\acmBooktitle#1{\gdef\@acmBooktitle{#1}}
\acmBooktitle{}
\ifx\acmConference@name\@undefined\else
\acmBooktitle{Proceedings of \acmConference@name
\ifx\acmConference@name\acmConference@shortname\else
\ (\acmConference@shortname)\fi}
\ (\acmConference@shortname)\fi}
\fi
\def\@editorsAbbrev{(Ed.)}
\def\@acmEditors{}
\def\editor#1{\ifx\@acmEditors\@empty
@ -1302,12 +1409,16 @@
\@acmSubmissionID\fi}}%
\gdef\authors{Anonymous Author(s)}%
\else
\gdef\addresses{\@author{#2}}%
\expandafter\gdef\expandafter\addresses\expandafter{%
\expandafter\@author\expandafter{%
\csname typeset@author\the\num@authors\endcsname{#2}}}%
\gdef\authors{#2}%
\fi
\else
\if@ACM@anonymous\else
\g@addto@macro\addresses{\and\@author{#2}}%
\expandafter\g@addto@macro\expandafter\addresses\expandafter{%
\expandafter\and\expandafter\@author\expandafter{%
\csname typeset@author\the\num@authors\endcsname{#2}}}%
\g@addto@macro\authors{\and#2}%
\fi
\fi
@ -1358,9 +1469,20 @@
\if@ACM@anonymous\else
\g@addto@macro\addresses{\email{#1}{#2}}%
\fi}
\def\orcid#1{\unskip\ignorespaces}
\def\orcid#1{\unskip\ignorespaces%
\IfBeginWith{#1}{http}{%
\expandafter\gdef\csname
typeset@author\the\num@authors\endcsname##1{%
\href{#1}{##1}}}{%
\expandafter\gdef\csname
typeset@author\the\num@authors\endcsname##1{%
\href{https://orcid.org/#1}{##1}}}}
\def\authorsaddresses#1{\def\@authorsaddresses{#1}}
\authorsaddresses{\@mkauthorsaddresses}
\newcommand\@mktranslatedkeywords[2]{\bgroup
\selectlanguage{#1}%
{\@specialsection{\keywordsname}%
\noindent#2\par}\egroup}
\def\@titlenotes{}
\def\titlenote#1{%
\g@addto@macro\@title{\footnotemark}%
@ -1466,8 +1588,11 @@
\fi}{\ClassError{\@classname}{The option printacmref can be either true or false}}
\AtEndDocument{\if@ACM@nonacm\else\if@ACM@printacmref\else
\ifnum\getrefnumber{TotPages}>1\relax
\ClassWarningNoLine{\@classname}{ACM reference format is mandatory
for papers over one page}%
\ClassWarningNoLine{\@classname}{%
ACM reference format is mandatory \MessageBreak
for papers over one page. \MessageBreak
Please add printacmref=true to the \MessageBreak
\string\settopmatter\space command.}%
\fi\fi\fi}
\define@boolkey+{@ACM@topmatter@}[@ACM@]{printfolios}[true]{%
\if@ACM@printfolios
@ -1778,6 +1903,12 @@
\else
\g@addto@macro\thankses{\thanks{#1}}%
\fi}}
\newcommand{\anon}[2][ANONYMIZED]{%
\if@ACM@anonymous%
{\color{ACMOrange}#1}%
\else%
#2%
\fi}
\ifx\@beginmaketitlehook\@undefined
\let\@beginmaketitlehook\@empty
\fi
@ -1875,6 +2006,8 @@
\endgroup
\setcounter{footnote}{0}%
\@mkabstract
\ifx\@translatedabstracts\@empty\else
\@translatedabstracts\fi
\if@ACM@printccs
\ifx\@concepts\@empty\else\bgroup
{\@specialsection{CCS Concepts}%
@ -1882,13 +2015,12 @@
\fi
\fi
\ifx\@keywords\@empty\else\bgroup
{\if@ACM@journal
\@specialsection{Additional Key Words and Phrases}%
\else
\@specialsection{Keywords}%
\fi
\noindent\@keywords}\par\egroup
{\@specialsection{\keywordsname}%
\noindent\@keywords\par}\egroup
\fi
\ifx\@translatedkeywords\@empty\else
\@translatedkeywords
\fi
\let\metadata@authors=\authors
\nxandlist{, }{, }{, }\metadata@authors
\def\@ACM@checkaffil{}%
@ -1916,6 +2048,8 @@
\@afterheading
}
\def\@specialsection#1{%
\let\@vspace\@vspace@orig
\let\@vspacer\@vspacer@orig
\ifcase\ACM@format@nr
\relax % manuscript
\par\medskip\small\noindent#1: %
@ -1936,6 +2070,8 @@
\or % sigchi-a
\section*{#1}%
\fi
\let\@vspace\@vspace@acm
\let\@vspacer\@vspacer@acm
}
\def\@printtopmatter{%
\ifx\@startPage\@empty
@ -1943,6 +2079,18 @@
\else
\setcounter{page}{\@startPage}%
\fi
\@tempdima=\ht\mktitle@bx
\advance\@tempdima by \dp\mktitle@bx
\ifdim\@tempdima>0.9\textheight
\loop
\setbox\@tempboxa=\vsplit \mktitle@bx to 0.9\textheight
\thispagestyle{firstpagestyle}%
\noindent\unvbox\@tempboxa
\clearpage
\@tempdima=\ht\mktitle@bx
\advance\@tempdima by \dp\mktitle@bx
\ifdim\@tempdima>0.9\textheight\repeat
\fi
\thispagestyle{firstpagestyle}%
\noindent
\ifcase\ACM@format@nr
@ -2060,9 +2208,9 @@
\fi
\parbox[t]{\@ACM@title@width}{\raggedright
\@titlefont\noindent
\@title
\@title\@translatedtitle%
\ifx\@subtitle\@empty\else
\par\noindent{\@subtitlefont\@subtitle}
\par\noindent{\@subtitlefont\@subtitle\@translatedsubtitle}%
\fi}%
\ifx\@acmBadgeR@image\@empty\else
\hskip\@ACM@badge@skip
@ -2084,9 +2232,9 @@
\hskip\@ACM@badge@skip
\fi
\parbox[t]{\@ACM@title@width}{\centering\@titlefont
\@title
\@title\@translatedtitle%
\ifx\@subtitle\@empty\else
\par\noindent{\@subtitlefont\@subtitle}
\par\noindent{\@subtitlefont\@subtitle\@translatedsubtitle}
\fi
}%
\if@ACM@badge
@ -2101,9 +2249,9 @@
\def\@mktitle@iv{\hsize=\textwidth
\setbox\mktitle@bx=\vbox{\raggedright\leftskip5pc\@titlefont
\noindent\leavevmode\leaders\hrule height 2pt\hfill\kern0pt\par
\noindent\@title
\noindent\@title\@translatedtitle%
\ifx\@subtitle\@empty\else
\par\noindent\@subtitlefont\@subtitle
\par\noindent\@subtitlefont\@subtitle\@translatedsubtitle%
\fi
\par\bigskip}}%
\newbox\@ACM@commabox
@ -2245,7 +2393,7 @@
\fi
\fi
\global\let\and\@typeset@author@line}%
\global\setbox\mktitle@bx=\vbox{\noindent\box\mktitle@bx\par\medskip
\global\setbox\mktitle@bx=\vbox{\noindent\unvbox\mktitle@bx\par\medskip
\noindent\addresses\@typeset@author@line
\par\medskip}%
}
@ -2311,7 +2459,7 @@
}%
\hsize=\textwidth
\global\setbox\mktitle@bx=\vbox{\noindent
\box\mktitle@bx\par\medskip\leavevmode
\unvbox\mktitle@bx\par\medskip\leavevmode
\lineskip=1pc\relax\centering\hspace*{-1em}%
\addresses\let\and\@typeset@author@bx\and\par\bigskip}}
\def\@mkauthors@iv{%
@ -2366,12 +2514,13 @@
\def\streetaddress##1{\unskip, ##1}%
\def\postcode##1{\unskip, ##1}%
\def\position##1{\unskip\ignorespaces}%
\def\institution##1{\unskip, ##1}%
\gdef\@ACM@institution@separator{, }%
\def\institution##1{\unskip\@ACM@institution@separator ##1\gdef\@ACM@institution@separator{ and }}%
\def\city##1{\unskip, ##1}%
\def\state##1{\unskip, ##1}%
\renewcommand\department[2][0]{\unskip\@addpunct, ##2}%
\def\country##1{\unskip, ##1}%
\def\and{\unskip; }%
\def\and{\unskip; \gdef\@ACM@institution@separator{, }}%
\def\@author##1{##1}%
\def\email##1##2{\unskip, \nolinkurl{##2}}%
\addresses
@ -2388,7 +2537,7 @@
\ifx\@teaserfigures\@empty\else
\def\@teaser##1{\par\bigskip\bgroup
\captionsetup{type=figure}##1\egroup\par}
\global\setbox\mktitle@bx=\vbox{\noindent\box\mktitle@bx\par
\global\setbox\mktitle@bx=\vbox{\noindent\unvbox\mktitle@bx\par
\noindent\@Description@presentfalse
\@teaserfigures\par\if@Description@present\else
\global\@undescribed@imagestrue
@ -2398,7 +2547,7 @@
\fi}
\def\@mkabstract{\bgroup
\ifx\@abstract\@lempty\else
{\phantomsection\addcontentsline{toc}{section}{Abstract}%
{\phantomsection\addcontentsline{toc}{section}{\abstractname}%
\if@ACM@journal
\everypar{\setbox\z@\lastbox\everypar{}}\small
\else
@ -2406,6 +2555,13 @@
\fi
\ignorespaces\@abstract\par}%
\fi\egroup}
\def\@mktranslatedabstract#1{\selectlanguage{#1}%
\if@ACM@journal
\everypar{\setbox\z@\lastbox\everypar{}}\small
\else
\section*{\abstractname}%
\fi
\ignorespaces}
\def\@mkbibcitation{\bgroup
\let\@vspace\@vspace@orig
\let\@vspacer\@vspacer@orig
@ -2587,7 +2743,10 @@
\fancyfoot[C]{\if@ACM@printfolios\footnotesize\thepage\fi}%
\fancyhead[LO]{\ACM@linecountL\@headfootfont\shorttitle}%
\fancyhead[RE]{\@headfootfont\@shortauthors\ACM@linecountR}%
\if@ACM@nonacm\else%
\if@ACM@nonacm
\fancyhead[LE]{\ACM@linecountL}%
\fancyhead[RO]{\ACM@linecountR}%
\else%
\fancyhead[LE]{\ACM@linecountL\@headfootfont\footnotesize
\acmConference@shortname,
\acmConference@date, \acmConference@venue}%
@ -2600,7 +2759,10 @@
\fancyfoot[C]{\if@ACM@printfolios\footnotesize\thepage\fi}%
\fancyhead[LO]{\ACM@linecountL\@headfootfont\shorttitle}%
\fancyhead[RE]{\@headfootfont\@shortauthors\ACM@linecountR}%
\if@ACM@nonacm\else%
\if@ACM@nonacm
\fancyhead[LE]{\ACM@linecountL}%
\fancyhead[RO]{\ACM@linecountR}%
\else%
\fancyhead[LE]{\ACM@linecountL\@headfootfont
\acmConference@shortname,
\acmConference@date, \acmConference@venue}%
@ -2931,6 +3093,25 @@
\popQED\endtrivlist\@endpefalse
}
\AtEndPreamble{%
\if@ACM@pbalance
\global\@ACM@balancefalse
\ifcase\ACM@format@nr
\relax % manuscript
\or % acmsmall
\or % acmlarge
\or % acmtog
\RequirePackage{pbalance}%
\or % sigconf
\RequirePackage{pbalance}%
\or % siggraph
\RequirePackage{pbalance}%
\or % sigplan
\RequirePackage{pbalance}%
\or % sigchi
\RequirePackage{pbalance}%
\or % sigchi-a
\fi
\fi
\if@ACM@balance
\ifcase\ACM@format@nr
\relax % manuscript
@ -2988,9 +3169,24 @@
\def\@tempa{#1}%
\ifx\@tempa\@empty\def\@tempa{arxiv}\fi
\def\@tempb{arxiv}%
\ifx\@tempa\@tempb
arXiv:\href{https://arxiv.org/abs/#2}{#2}\else arXiv:#2%
\ifx\@tempa\@tempb\relax
arXiv:\href{https://arxiv.org/abs/#2}{#2}%
\else
\def\@tempb{arXiv}%
\ifx\@tempa\@tempb\relax
arXiv:\href{https://arxiv.org/abs/#2}{#2}%
\else
#1:#2%
\fi
\fi}
\def\theindex{\@restonecoltrue\if@twocolumn\@restonecolfalse\fi
\columnseprule\z@ \columnsep 35\p@
\@indextitlestyle
\let\item\@idxitem
\parindent\z@ \parskip\z@\@plus.3\p@\relax
\raggedright
\hyphenpenalty\@M
\footnotesize}
\let\@vspace@orig=\@vspace
\let\@vspacer@orig=\@vspacer
\apptocmd{\@vspace}{\ClassWarning{\@classname}{\string\vspace\space should

File diff suppressed because it is too large Load Diff

BIN
acmart.pdf Normal file

Binary file not shown.

BIN
acmart/acmart-primary.zip Normal file

Binary file not shown.

31
acmart/acmart-primary/.gitignore vendored Normal file
View File

@ -0,0 +1,31 @@
acmart.cls
acmart.pdf
acmguide.pdf
samples/sample-*.pdf
*.log
*.aux
*.cfg
*.glo
*.idx
*.toc
*.ilg
*.ind
*.out
*.lof
*.lot
*.bbl
*.blg
*.gls
*.cut
*.hd
*.dvi
*.ps
*.thm
*.tgz
*.zip
*.rpi
*~
*.bcf
*.run.xml
samples/ACM-Reference-Format.bst
samples/*.tex

View File

@ -0,0 +1,830 @@
\ProvidesFile{ACM-Reference-Format.bbx}[2017-09-27 v0.1 biblatex bibliography style]
% Inherit a default style
\RequireBibliographyStyle{trad-plain}
%%% Localisation strings for ACM
\DefineBibliographyStrings{american}{%
mathesis = {Master's thesis},
phdthesis = {Ph\adddot{}D\adddotspace Dissertation},
editor = {(Ed\adddot)},
editors = {(Eds\adddot)},
edition = {ed\adddot},
}
%%% Formatting for fields
%\DeclareFieldFormat
% [article,inbook,incollection,inproceedings,patent,thesis,unpublished]
% {title}{#1}
\DeclareFieldFormat{pages}{#1}
\DeclareFieldFormat{numpages}{#1 pages}
\DeclareFieldFormat{number}{#1}
\DeclareFieldFormat{articleno}{Article #1}
\DeclareFieldFormat{key}{#1}
\DeclareFieldFormat{urldate}{Retrieved\space{}#1\space{}from}
\DeclareFieldAlias{lastaccessed}{urldate}
\DeclareFieldFormat{url}{\url{#1}}
\DeclareFieldFormat{edition}{%
\printtext[parens]{\ifinteger{#1}
{\mkbibordedition{#1}~\bibstring{edition}}
{#1\isdot~\bibstring{edition}}}}
% Handle urls field containing 'and' separated list of URLs
% https://github.com/plk/biblatex/issues/229
\DeclareListFormat{urls}{%
\url{#1}%
\ifthenelse{\value{listcount}<\value{liststop}}
{\addcomma\space}
{}}
\renewbibmacro*{url}{\iffieldundef{url}{\printlist{urls}}{\printfield{url}}}
%%% Bibmacro definitions
\renewbibmacro*{translator+others}{%
\ifboolexpr{
test \ifusetranslator
and
not test {\ifnameundef{translator}}
}
{\printnames{translator}%
\setunit{\addcomma\space}%
\usebibmacro{translator+othersstrg}%
\clearname{translator}}
{\printfield{key}}}
\newbibmacro*{year}{%
\iffieldundef{year}%
{\printtext{[n.\ d.]}}%
{\printfield{year}}%
}
\renewbibmacro*{date}{\printtext[parens]{\printdate}}
\renewbibmacro*{url+urldate}{\iffieldundef{urlyear}
{}
{\usebibmacro{urldate}%
\setunit*{\addspace}}%
\usebibmacro{url}%
}
\renewbibmacro*{journal+issuetitle}{%
\usebibmacro{journal}%
\setunit*{\addcomma\space}%
\iffieldundef{series}
{}
{\newunit%
\printfield{series}%
\setunit{\addspace}}%
\usebibmacro{volume+number+date+pages+eid}%
\newcommaunit%
% \setunit{\addspace}%
\usebibmacro{issue-issue}%
\setunit*{\addcolon\space}%
\usebibmacro{issue}%
\newunit}
\newbibmacro*{volume+number+date+pages+eid}{%
\printfield{volume}%
\setunit*{\addcomma\space}%
\printfield{number}%
\setunit*{\addcomma\space}%
\printfield{articleno}
\setunit{\addcomma\space}
\usebibmacro{date-ifmonth}
\setunit{\addcomma\space}%
\iffieldundef{pages}%
{\printfield{numpages}}%
{\printfield{pages}}%
\newcommaunit%
\printfield{eid}}%
\renewbibmacro*{chapter+pages}{%
\printfield{chapter}%
\setunit{\bibpagespunct}%
\iffieldundef{pages}%
{\printfield{numpages}}%
{\printfield{pages}}%
\newunit}
\renewbibmacro*{editor+others}{%
\ifboolexpr{
test \ifuseeditor
and
not test {\ifnameundef{editor}}
}
{\printnames{editor}%
\setunit{\addcomma\space}%
\usebibmacro{editor+othersstrg}%
\clearname{editor}}
{\iflistundef{organization}{}{\printlist{organization}}}}
\newbibmacro*{issue-issue}{%
\iffieldundef{issue}%
{}%
{\printfield{issue}%
\setunit*{\addcomma\space}%
\usebibmacro{date-ifmonth}%
}%
\newunit}
\newbibmacro*{maintitle+booktitle+series+number}{%
\iffieldundef{maintitle}
{}
{\usebibmacro{maintitle}%
\newunit\newblock
\iffieldundef{volume}
{}
{\printfield{volume}%
\printfield{part}%
\setunit{\addcolon\space}}}%
\usebibmacro{booktitle}%
\setunit*{\addspace}
\printfield[parens]{series}%
\setunit*{\addspace}%
\printfield{number}%
\setunit*{\addcomma\space}%
\printfield{articleno}
\newunit
}
\renewbibmacro*{booktitle}{%
\ifboolexpr{
test {\iffieldundef{booktitle}}
and
test {\iffieldundef{booksubtitle}}
}
{}
{\printtext[booktitle]{%
\printfield[titlecase]{booktitle}%
\iffieldundef{booksubtitle}{}{
\setunit{\subtitlepunct}%
\printfield[titlecase]{booksubtitle}}%
}%
}%
\printfield{booktitleaddon}}
\renewbibmacro*{volume+number+eid}{%
\printfield{volume}%
\setunit*{\addcomma\space}%
\printfield{number}%
\setunit*{\addcomma\space}%
\printfield{articleno}
\setunit{\addcomma\space}%
\printfield{eid}}
\renewbibmacro*{publisher+location+date}{%
\printlist{publisher}%
\setunit*{\addcomma\space}%
\printlist{location}%
\setunit*{\addcomma\space}%
\usebibmacro{date-ifmonth}%
\newunit}
\newbibmacro{date-ifmonth}{%
\iffieldundef{month}{}{%
\usebibmacro{date}
}%
}
\renewbibmacro*{institution+location+date}{%
\printlist{school}%
\setunit*{\addcomma\space}%
\printlist{institution}%
\setunit*{\addcomma\space}%
\printlist{location}%
\setunit*{\addcomma\space}%
\usebibmacro{date-ifmonth}%
\newunit}
\renewbibmacro*{periodical}{%
\iffieldundef{title}
{}
{\printtext[title]{%
\printfield[titlecase]{title}%
\setunit{\subtitlepunct}%
\printfield[titlecase]{subtitle}}}%
\newunit%
\usebibmacro{journal}}
\renewbibmacro*{issue+date}{%
\iffieldundef{issue}
{\usebibmacro{date}}
{\printfield{issue}%
\setunit*{\addspace}%
\usebibmacro{date}}%
\newunit}
\renewbibmacro*{title+issuetitle}{%
\usebibmacro{periodical}%
\setunit*{\addspace}%
\iffieldundef{series}
{}
{\newunit
\printfield{series}%
\setunit{\addspace}}%
\printfield{volume}%
\setunit*{\addcomma\space}%
\printfield{number}%
\setunit*{\addcomma\space}%
\printfield{articleno}
\setunit{\addcomma\space}%
\printfield{eid}%
\setunit{\addspace}%
\usebibmacro{issue+date}%
\setunit{\addcolon\space}%
\usebibmacro{issue}%
\newunit}
\renewbibmacro*{doi+eprint+url}{%
\iftoggle{bbx:url}
{\iffieldundef{doi}{\usebibmacro{url+urldate}}{}}
{}%
\newunit\newblock
\iftoggle{bbx:eprint}
{\usebibmacro{eprint}}
{}%
\newunit\newblock
\iftoggle{bbx:doi}
{\printfield{doi}}
{}}
%%% Definitions for drivers (alphabetical)
\DeclareBibliographyDriver{article}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/translator+others}%
\setunit{\labelnamepunct}\newblock%
\usebibmacro{year}%
\newunit%
\usebibmacro{title}%
\newunit%
\printlist{language}%
\newunit\newblock%
\usebibmacro{byauthor}%
\newunit\newblock%
\usebibmacro{bytranslator+others}%
\newunit\newblock%
\printfield{version}%
\newunit\newblock%
\usebibmacro{journal+issuetitle}%
\newunit%
\usebibmacro{byeditor+others}%
\newunit%
\printfield{note}%
\newunit\newblock%
\iftoggle{bbx:isbn}
{\printfield{issn}}
{}%
\newunit\newblock%
\usebibmacro{doi+eprint+url}%
\newunit\newblock%
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock%
\usebibmacro{related}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{book}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/editor+others/translator+others}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}%
\newunit%
\usebibmacro{maintitle+title}%
\newunit%
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{byeditor+others}%
\newunit\newblock
\printfield{edition}%
\newunit
\usebibmacro{series+number}%
\iffieldundef{maintitle}
{\printfield{volume}%
\printfield{part}}
{}%
\newunit
\newunit\newblock
\printfield{volumes}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\usebibmacro{publisher+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit
\printfield{pagetotal}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isbn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{inbook}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\iffieldundef{author}%
{\usebibmacro{byeditor+others}}%
{\usebibmacro{author/translator+others}}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit\newblock
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
% \usebibmacro{in:}%
\usebibmacro{bybookauthor}%
\newunit\newblock
\usebibmacro{maintitle+booktitle}%
\newunit\newblock
\iffieldundef{author}{}%if undef then we already printed editor
{\usebibmacro{byeditor+others}}%
\newunit\newblock
\printfield{edition}%
\newunit
\iffieldundef{maintitle}
{\printfield{volume}%
\printfield{part}}
{}%
\newunit
\printfield{volumes}%
\newunit\newblock
\usebibmacro{series+number}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\usebibmacro{publisher+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isbn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{incollection}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/translator+others}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit\newblock
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{in:}%
\usebibmacro{maintitle+booktitle}%
\newunit\newblock
\usebibmacro{series+number}%
\newunit\newblock
\printfield{edition}%
\newunit
\iffieldundef{maintitle}
{\printfield{volume}%
\printfield{part}}
{}%
\newunit
\printfield{volumes}%
\newunit\newblock
\usebibmacro{byeditor+others}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\usebibmacro{publisher+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isbn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{inproceedings}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/translator+others}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit\newblock
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{in:}%
\usebibmacro{maintitle+booktitle+series+number}%
\newunit\newblock
\usebibmacro{event+venue+date}%
\newunit\newblock
\usebibmacro{byeditor+others}%
\newunit\newblock
\iffieldundef{maintitle}
{\printfield{volume}%
\printfield{part}}
{}%
\newunit
\printfield{volumes}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\printlist{organization}%
\newunit
\usebibmacro{publisher+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isbn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{manual}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/editor+others}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit\newblock
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{byeditor}%
\newunit\newblock
\printfield{edition}%
\newunit\newblock
\usebibmacro{series+number}%
\newunit\newblock
\printfield{type}%
\newunit
\printfield{version}%
\newunit
\printfield{note}%
\newunit\newblock
\printlist{organization}%
\newunit
\usebibmacro{publisher+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit
\printfield{pagetotal}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isbn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{misc}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/editor+others/translator+others}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit\newblock
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{byeditor+others}%
\newunit\newblock
\printfield{howpublished}%
\newunit\newblock
\printfield{type}%
\newunit
\printfield{version}%
\newunit
\printfield{note}%
\newunit\newblock
\usebibmacro{organization+location+date}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{online}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/editor+others/translator+others}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{byeditor+others}%
\newunit\newblock
\printfield{version}%
\newunit
\printfield{note}%
\newunit\newblock
\printlist{organization}%
\newunit\newblock
\usebibmacro{date-ifmonth}%
\newunit\newblock
\iftoggle{bbx:eprint}
{\usebibmacro{eprint}}
{}%
\newunit\newblock
\usebibmacro{url+urldate}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareFieldFormat[patent]{number}{Patent No.~#1}
\DeclareBibliographyDriver{patent}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}%
\newunit
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{date}%
\newunit\newblock
\printfield{type}%
\setunit*{\addspace}%
\printfield{number}%
\iflistundef{location}
{}
{\setunit*{\addspace}%
\printtext[parens]{%
\printlist[][-\value{listtotal}]{location}}}%
\newunit\newblock
\usebibmacro{byholder}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{periodical}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{editor}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit
\usebibmacro{title+issuetitle}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byeditor}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{issn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{report}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\printfield{type}%
\setunit*{\addspace}%
\printfield{number}%
\newunit\newblock
\printfield{version}%
\newunit
\printfield{note}%
\newunit\newblock
\usebibmacro{institution+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit
\printfield{pagetotal}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isrn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{thesis}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\printfield{type}%
\newunit
\usebibmacro{institution+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit
\printfield{pagetotal}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isbn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,112 @@
#
# Makefile for acmart package
#
# This file is in public domain
#
# $Id: Makefile,v 1.10 2016/04/14 21:55:57 boris Exp $
#
PACKAGE=acmart
PDF = $(PACKAGE).pdf acmguide.pdf
all: ${PDF} ALLSAMPLES
%.pdf: %.dtx $(PACKAGE).cls
pdflatex $<
- bibtex $*
pdflatex $<
- makeindex -s gind.ist -o $*.ind $*.idx
- makeindex -s gglo.ist -o $*.gls $*.glo
pdflatex $<
while ( grep -q '^LaTeX Warning: Label(s) may have changed' $*.log) \
do pdflatex $<; done
acmguide.pdf: $(PACKAGE).dtx $(PACKAGE).cls
pdflatex -jobname acmguide $(PACKAGE).dtx
- bibtex acmguide
pdflatex -jobname acmguide $(PACKAGE).dtx
while ( grep -q '^LaTeX Warning: Label(s) may have changed' acmguide.log) \
do pdflatex -jobname acmguide $(PACKAGE).dtx; done
%.cls: %.ins %.dtx
pdflatex $<
ALLSAMPLES:
cd samples; pdflatex samples.ins; cd ..
for texfile in samples/*.tex; do \
pdffile=$${texfile%.tex}.pdf; \
${MAKE} $$pdffile; \
done
samples/%: %
cp $^ samples
samples/$(PACKAGE).cls: $(PACKAGE).cls
samples/ACM-Reference-Format.bst: ACM-Reference-Format.bst
samples/%.pdf: samples/%.tex samples/$(PACKAGE).cls samples/ACM-Reference-Format.bst
cd $(dir $@) && pdflatex-dev $(notdir $<)
- cd $(dir $@) && bibtex $(notdir $(basename $<))
cd $(dir $@) && pdflatex-dev $(notdir $<)
cd $(dir $@) && pdflatex-dev $(notdir $<)
while ( grep -q '^LaTeX Warning: Label(s) may have changed' $(basename $<).log) \
do cd $(dir $@) && pdflatex-dev $(notdir $<); done
samples/sample-xelatex.pdf: samples/sample-xelatex.tex samples/$(PACKAGE).cls samples/ACM-Reference-Format.bst
cd $(dir $@) && xelatex-dev $(notdir $<)
- cd $(dir $@) && bibtex $(notdir $(basename $<))
cd $(dir $@) && xelatex-dev $(notdir $<)
cd $(dir $@) && xelatex-dev $(notdir $<)
while ( grep -q '^LaTeX Warning: Label(s) may have changed' $(basename $<).log) \
do cd $(dir $@) && xelatex-dev $(notdir $<); done
samples/sample-lualatex.pdf: samples/sample-lualatex.tex samples/$(PACKAGE).cls samples/ACM-Reference-Format.bst
cd $(dir $@) && lualatex-dev $(notdir $<)
- cd $(dir $@) && bibtex $(notdir $(basename $<))
cd $(dir $@) && lualatex-dev $(notdir $<)
cd $(dir $@) && lualatex-dev $(notdir $<)
while ( grep -q '^LaTeX Warning: Label(s) may have changed' $(basename $<).log) \
do cd $(dir $@) && lualatex-dev $(notdir $<); done
.PRECIOUS: $(PACKAGE).cfg $(PACKAGE).cls
docclean:
$(RM) *.log *.aux \
*.cfg *.glo *.idx *.toc \
*.ilg *.ind *.out *.lof \
*.lot *.bbl *.blg *.gls *.cut *.hd \
*.dvi *.ps *.thm *.tgz *.zip *.rpi \
samples/$(PACKAGE).cls samples/ACM-Reference-Format.bst \
samples/*.log samples/*.aux samples/*.out \
samples/*.bbl samples/*.blg samples/*.cut
clean: docclean
$(RM) $(PACKAGE).cls \
samples/*.tex
distclean: clean
$(RM) *.pdf samples/sample-*.pdf
#
# Archive for the distribution. Includes typeset documentation
#
archive: all clean
COPYFILE_DISABLE=1 tar -C .. -czvf ../$(PACKAGE).tgz --exclude '*~' --exclude '*.tgz' --exclude '*.zip' --exclude CVS --exclude '.git*' $(PACKAGE); mv ../$(PACKAGE).tgz .
zip: all clean
zip -r $(PACKAGE).zip * -x '*~' -x '*.tgz' -x '*.zip' -x CVS -x 'CVS/*'
documents.zip: all docclean
zip -r $@ acmart.pdf acmguide.pdf samples *.cls ACM-Reference-Format.*
.PHONY: all ALLSAMPLES docclean clean distclean archive zip

View File

@ -0,0 +1,311 @@
This package provides a class for typesetting publications of the
Association for Computing Machinery.
Your TeX distribution probably includes the latest released version of
this package. If you decide to install it yourself, please see the
Installation section of the User's Guide.
Please note that the version on Github is a development (or
experimental) version: please download it for testing new features.
The production version is the one on CTAN and ACM sites.
Changes
Version 1.83 Support for multilanguage papers
ISSN changes for some journals
Version 1.82 Bug fixes.
New command \anon for anonymization of short strings.
Documentation update.
Version 1.81 Bug fixes
New bib field distinctURL to print URL even if doi is present.
Reworded samples
Version 1.80 New journals: DLT, FAC
Version 1.79 Fixed pages with index
(https://github.com/borisveytsman/acmart/issues/440)
Updated information for TAP, TCPS, TEAC
Version 1.78 Documentation update.
Magic texcount comments for samples.
Title page now is split if there are too many authors
Bug fixes.
Version 1.77 Changed the way to typeset multiple affiliations (Christoph Sommer)
Version 1.76 Added many journal abbreviations to the bst.
New experimental option: pbalance
ORCID linking code
Version 1.75 \country is now obligatory for addresses.
Added \AtBeginMaketitle
Version 1.74 Bug fixes. A regression introduced in the font changes
is reverted.
Version 1.73 Bug fixes
The elements institution, city and country are now obligatory
for affiliations
Version 1.72 Bug fixes. Better handling of metadata.
Version 1.71 Bug fixes
Formats sigchi and sigchi-a are retired
Bibliography formatting changes for @inproceedings entries
having both series and volume
LuaLaTeX now uses the same OTF fonts as XeLaTeX
Version 1.70 Title change for ACM/IMS Transactions on Data Science
Bug fixes for bibliography
Version 1.69 Bug fixes
Compatibility with LaTeX 2020-02-02 release
Version 1.68 Bug fixes
BST now recognizes words `Paper' or 'Article' in
eid or articleno
Version 1.67 Urgent bug fixes:
BibTeX style bug fixed (Michael D. Adams)
Sigplan special section bugfix
Version 1.66 Bug fixes
BibTeX change: location is now a synonym for city (Feras Saad)
ACM reference format is now mandatory for papers over one page.
CCS concepts and keywords are now mandatory for
papers over two pages.
Authors' addresses are mandatory for journal articles.
Version 1.65 Bug fixes
New journal: DGOV
DTRAP and HEALTH are now using acmlarge format
Version 1.64 Produce error if abstract is entered after maketitle
(previously abstract was silently dropped)
Bug fixes for line numbering
Version 1.63a Moved TQUANT to TQC
Version 1.63 New journals: TQUANT, FACMP
Version 1.62 Documentation update
New journal: TELO
Bug fixes
Version 1.61 Bug fixes
New bibtex types for artifacts
Version 1.60 New option: urlbreakonhyphens (thanks to Peter Kemp)
Smaller header size for acmsmall
Version 1.59 Now a journal format can be used for conference proceedings
All samples are now generated from the same .dtx file
Bug fixes
Version 1.58 Suppressed spurious warnings.
New journal: HEALTH.
TDSCI is renamed to TDS.
Version 1.57 Change of \baselinestretch now produces an error
Booktabs is now always loaded
Added option `balance' to balance last page in two-column mode
E-mail is no longer split in addresses
New samples (Stephen Spencer)
Version 1.56 Bug fixes
Added \flushbottom to two column formats (Philip Quinn)
The final punctuation for the list of concepts
is now a period instead of a semicolon (Philip Quinn)
New command \Description to describe images for visually
impaired users.
Version 1.55 Bug fixes
Font changes for SIGCHI table captions
Version 1.54 New option: 'nonacm' (Gabriel Scherer)
Deleted indent for subsubsection (suggested by Ross Moore)
Suppressed some obscurious warning in BibTeX processing
Suppressed hyperrerf warnings (Paolo G. Giarrusso)
New code for sections to help with accessibility patches
(Ross Moore)
Submission id, if present, is printed in anon mode
Bug fixes
Version 1.53 New journals: PACMCGIT, TIOT, TDSCI
Version 1.52 Another rewording of licenses
Version 1.51 Journal footers now use abbreviated journal titles.
Corrected the bug with acmPrice.
Do not show price when copyright is set to iw3c2w3 and iw3c2w3g.
The package now is compatible with polyglossia (Joachim Breitner).
Slightly reworded copyright statements.
Version 1.50 Changes in iw3c2w3 and iw3c2w3g
Version 1.49 New jorunal: DTRAP
Version 1.48 Bug fixes
Review mode now switches on folios
Code prettying (Michael D. Adams)
Bibliography changes: @MISC entries no longer have a
separate date
Sigch-a sample bibliography renamed
Bib code cleanup (Zack Weinberg)
Acmart and version info are added to pdfcreator tag
\citeyear no longer produces parenthetical year
Added initial support for Biblatex (Daniel Thomas)
Added support for IW3C2 conferences
Version 1.47 New journal: THRI
Version 1.46 Bug fixes for bibliography: label width is now calculated
correctly.
All PACM now use screen option. This requires etoolbox.
Added subtitle to ACM reference format.
Now acmart is compatible with fontspec.
\thanks is now obsolete. The addresses are automatically
added to the journal version; this can be overriden with
\authorsaddresses command.
Deleted the rule at the end of frontmatter for all formats.
Deleted new line before doi in the reference format.
Reintegrated theorem code into acmart.dtx (Matthew Fluet)
Version 1.45 Workaround for a Libertine bug. Thanks to LianTze Lim
from Overleaf
Version 1.44 Bug fixes.
Empty DOI and ISBN suppress printing DOI or ISBN lines
Separated theorem code into acmthm.sty, loaded by default.
Article number can be set for proceedings.
New commands: \acmBooktile, \editor.
Reference citation format updated.
Version 1.43 Bug fixes
Version 1.42 Deleted ACM badges
Bug fixes
Version 1.41 Rearranged bib files
Added new badges
Version 1.40 Bibliography changes
Added processing of one-compoment ccsdesc nodes
Bug fixes.
Made the height a multiple of \baselineskip + \topskip
Added cleveref
We no longer print street address in SIGs
Version 1.39 Added \authornotemark commmand
Version 1.38 Increase default font size for SIGPLAN
Version 1.37 Reduce list indentation (Matthew Fluet)
Version 1.36 Bug fixes
Moved PACMPL to acmlarge format
New journal: PACMHCI
Added the possibility to adjust number of author
boxes per row in conference formats
Version 1.35 Author-year bib style now uses square brackets.
Changed defaults for TOG sample
Price is suppressed for usgov and rightsretained modes.
Bugs fixed
Version 1.34 Deleted DOI from doi numbers
Changed bibstrip formatting
The command \terms is now obsolete
The rulers in review mode now have continuous numbering
Version 1.33 New option `timestamp' (Michael D. Adams)
New option `authordraft'
Documentation updates
Bug fixes
We now use Type 1 versions of Libertine fonts even with XeTeX.
New hook acmart-preload-hook.tex (wizards only!)
Added new options `obeypunctuation' for \affiliation command
Added SubmissionID
Added right line count ruler for two-column formats
Added workaround for Adobe Acrobat bugs in selection
Added eid field to the bibliography
Version 1.32 New DOI formatting.
Format siggraph is now obsolete, and sigconf
is used instead.
New proceedings title: POMACS.
Version 1.31 Changed default year and month to the current ones
(thanks to Matteo Riondato)
Table of contents now works
Marginalia now work in all formats
New command \additionalaffiliation
Documentation changes
Version 1.30 Bibtex style now recognizes https:// in doi.
Added \frenchspacing.
\department now has an optional hierarchy level.
Switched to T1 encoding
Updated IMWUT and PACMPL
Version 1.29 Documentation changes. Head height increased from 12pt to 13pt.
Removed spurious indent at start of abstract.
Improved kerning in CCS description list.
Version 1.28 Bug fixes: natbib=false now behaves correctly.
Version 1.27 Bug fixes
Version 1.26 Bug fixes
Version 1.25 Updated PACMPL journal option.
Version 1.24 Added IMWUT journal option.
Version 1.23 Added PACM PL journal option.
Version 1.22 Bibliography changes for Aptara backend; should be
invisible for the users.
Version 1.21 Bibliography changes: added arXiv, some cleanup
Version 1.20 Bug fixes, documentation updates
Version 1.19 Include 'Abstract', 'Acknowledgements', and 'References'
in PDF bookmarks.
Version 1.18 Natbib is now the default for all versions. A unified bib
file is used for all styles. Better treatment
of multiple affiliations.
Version 1.17 Formatting changes for margins and lists. Bug fixes.
Version 1.16 Formatting changes for headers and footers.
Version 1.15 New structured affiliation command.
New commands for acknowledgements.
Version 1.14 Warn about undefined citation styles; move definitions
of acmauthoryear and acmnumeric citation styles before
use.
Version 1.13 Formatting changes: headers, folios etc.
Bibliography changes.
Version 1.12 Bug fixes and documentation updates.
Footnotes rearranged.
Option natbib is now mostly superfluous: the class
makes a guess based on the format chosen.
Version 1.11 Customization of ACM theorem styles and proof
environment (Matthew Fluet).
Version 1.10 Bug fixes
Version 1.09 SIGPLAN: revert caption rules (Matthew Fluet)
Version 1.08 SIGPLAN reformatting (Matthew Fluet); bug fixes

View File

@ -0,0 +1,95 @@
@Misc{TeXFAQ,
title = {{UK} List of {\TeX} Frequently Asked Questions},
author = {{UK \TeX{} Users Group}},
year = 2019,
howpublished = {\url{https://texfaq.org}}
}
@Manual{Downes04:amsart,
title = {The \textsf{amsart}, \textsf{amsproc}, and
\textsf{amsbook} document~classes},
author = {Michael Downes and Barbara Beeton},
organization = {American Mathematical Society},
year = 2004,
month = aug,
note = {\url{http://www.ctan.org/pkg/amslatex}}
}
@Manual{Fiorio15,
title = {{a}lgorithm2e.sty---package for algorithms},
author = {Cristophe Fiorio},
year = 2015,
month = oct,
note = {\url{http://www.ctan.org/pkg/algorithm2e}}
}
@Manual{Brito09,
title = {The algorithms bundle},
author = {Rog\'erio Brito},
year = 2009,
month = aug,
note = {\url{http://www.ctan.org/pkg/algorithms}}
}
@Manual{Heinz15,
title = {The Listings Package},
author = {Carsten Heinz and Brooks Moses and Jobst Hoffmann},
year = 2015,
month = jun,
note = {\url{http://www.ctan.org/pkg/listings}}
}
@Manual{Fear05,
title = {Publication quality tables in {\LaTeX}},
author = {Simon Fear},
year = 2005,
month = apr,
note = {\url{http://www.ctan.org/pkg/booktabs}}
}
@Manual{ACMIdentityStandards,
title = {{ACM} Visual Identity Standards},
organization = {Association for Computing Machinery},
year = 2007,
note = {\url{http://identitystandards.acm.org}}
}
@Manual{Sommerfeldt13:Subcaption,
title = {The subcaption package},
author = {Axel Sommerfeldt},
year = 2013,
month = apr,
note = {\url{http://www.ctan.org/pkg/subcaption}}
}
@Manual{Nomencl,
title = {A package to create a nomenclature},
author = {Boris Veytsman and Bern Schandl and Lee Netherton
and C. V. Radhakrishnan},
year = 2005,
month = sep,
note = {\url{http://www.ctan.org/pkg/nomencl}}
}
@Manual{Talbot16:Glossaries,
title = {User Manual for glossaries.sty v4.44},
author = {Nicola L. C. Talbot},
year = 2019,
month = dec,
note = {\url{http://www.ctan.org/pkg/glossaries}}
}
@Manual{Carlisle04:Textcase,
title = {The \textsl{textcase} package},
author = {David Carlisle},
month = oct,
year = 2004,
note = {\url{http://www.ctan.org/pkg/textcase}}
}
@Manual{Braams22:Babel,
title = {Babel},
author = {Johannes L. Braams and Javier Bezos},
year = 2022,
note = {\url{http://www.ctan.org/pkg/babel}}}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

Before

Width:  |  Height:  |  Size: 93 KiB

After

Width:  |  Height:  |  Size: 93 KiB

View File

@ -0,0 +1,882 @@
%<*manuscript|acmsmall|acmsmall-submission|acmlarge|acmtog|sigconf|authordraft|sigplan|sigchi|sigchi-a|acmsmall-conf|sigconf-i13n>
%%
%%
%% Commands for TeXCount
%<<TCMACROS
%TC:macro \cite [option:text,text]
%TC:macro \citep [option:text,text]
%TC:macro \citet [option:text,text]
%TC:envir table 0 1
%TC:envir table* 0 1
%TC:envir tabular [ignore] word
%TC:envir displaymath 0 word
%TC:envir math 0 word
%TC:envir comment 0 0
%TCMACROS
%%
%%
%% The first command in your LaTeX source must be the \documentclass command.
%<manuscript>\documentclass[manuscript,screen,review]{acmart}
%<acmsmall|acmsmall-conf>\documentclass[acmsmall]{acmart}
%<acmsmall-submission>\documentclass[acmsmall,screen,review]{acmart}
%<acmlarge>\documentclass[acmlarge]{acmart}
%<acmtog>\documentclass[acmtog]{acmart}
%<sigconf>\documentclass[sigconf]{acmart}
%<authordraft>\documentclass[sigconf,authordraft]{acmart}
%<sigplan>\documentclass[sigplan,screen]{acmart}
%<sigchi>\documentclass[sigchi]{acmart}
%<sigchi-a>\documentclass[sigchi-a, nonacm]{acmart}
%<sigconf-i13n>\documentclass[sigconf, language=french,
%<sigconf-i13n>language=german, language=spanish, language=english]{acmart}
%%
%% \BibTeX command to typeset BibTeX logo in the docs
\AtBeginDocument{%
\providecommand\BibTeX{{%
\normalfont B\kern-0.5em{\scshape i\kern-0.25em b}\kern-0.8em\TeX}}}
%% Rights management information. This information is sent to you
%% when you complete the rights form. These commands have SAMPLE
%% values in them; it is your responsibility as an author to replace
%% the commands and values with those provided to you when you
%% complete the rights form.
%
\setcopyright{acmcopyright}
\copyrightyear{2018}
\acmYear{2018}
\acmDOI{XXXXXXX.XXXXXXX}
%</manuscript|acmsmall|acmsmall-submission|acmlarge|acmtog|sigconf|authordraft|sigplan|sigchi|sigchi-a|acmsmall-conf|sigconf-i13n>
%<*manuscript|sigconf|authordraft|sigplan|sigchi|sigchi-a|acmsmall-conf|sigconf-i13n>
%% These commands are for a PROCEEDINGS abstract or paper.
\acmConference[Conference acronym 'XX]{Make sure to enter the correct
conference title from your rights confirmation emai}{June 03--05,
2018}{Woodstock, NY}
%
% Uncomment \acmBooktitle if th title of the proceedings is different
% from ``Proceedings of ...''!
%
%\acmBooktitle{Woodstock '18: ACM Symposium on Neural Gaze Detection,
% June 03--05, 2018, Woodstock, NY}
\acmPrice{15.00}
\acmISBN{978-1-4503-XXXX-X/18/06}
%</manuscript|sigconf|authordraft|sigplan|sigchi|sigchi-a|acmsmall-conf|sigconf-i13n>
%<*acmsmall|acmsmall-submission|acmlarge|acmtog>
%%
%% These commands are for a JOURNAL article.
%<acmsmall|acmsmall-submission>\acmJournal{JACM}
%<acmlarge>\acmJournal{POMACS}
%<acmtog>\acmJournal{TOG}
\acmVolume{37}
\acmNumber{4}
\acmArticle{111}
\acmMonth{8}
%</acmsmall|acmsmall-submission|acmlarge|acmtog>
%<*manuscript|acmsmall|acmsmall-submission|acmlarge|acmtog|sigconf|authordraft|sigplan|sigchi|sigchi-a|acmsmall-conf|sigconf-i13n>
%%
%% Submission ID.
%% Use this when submitting an article to a sponsored event. You'll
%% receive a unique submission ID from the organizers
%% of the event, and this ID should be used as the parameter to this command.
%%\acmSubmissionID{123-A56-BU3}
%%
%% The majority of ACM publications use numbered citations and
%% references. The command \citestyle{authoryear} switches to the
%% "author year" style.
%%
%% If you are preparing content for an event
%% sponsored by ACM SIGGRAPH, you must use the "author year" style of
%% citations and references.
%<!acmtog>%% Uncommenting
%<!acmtog>%% the next command will enable that style.
%<!acmtog>%%\citestyle{acmauthoryear}
%<acmtog>\citestyle{acmauthoryear}
%%
%% end of the preamble, start of the body of the document source.
\begin{document}
%%
%% The "title" command has an optional parameter,
%% allowing the author to define a "short title" to be used in page headers.
\title{The Name of the Title is Hope}
%<sigconf-i13n>\translatedtitle{french}{Le nom du titre est l'espoir}
%<sigconf-i13n>\translatedtitle{german}{Der Name des Titels ist Hoffnung}
%<sigconf-i13n>\translatedtitle{spanish}{El nombre del título es esperanza}
%%
%% The "author" command and its associated commands are used to define
%% the authors and their affiliations.
%% Of note is the shared affiliation of the first two authors, and the
%% "authornote" and "authornotemark" commands
%% used to denote shared contribution to the research.
\author{Ben Trovato}
\authornote{Both authors contributed equally to this research.}
\email{trovato@corporation.com}
\orcid{1234-5678-9012}
\author{G.K.M. Tobin}
\authornotemark[1]
\email{webmaster@marysville-ohio.com}
\affiliation{%
\institution{Institute for Clarity in Documentation}
\streetaddress{P.O. Box 1212}
\city{Dublin}
\state{Ohio}
\country{USA}
\postcode{43017-6221}
}
\author{Lars Th{\o}rv{\"a}ld}
\affiliation{%
\institution{The Th{\o}rv{\"a}ld Group}
\streetaddress{1 Th{\o}rv{\"a}ld Circle}
\city{Hekla}
\country{Iceland}}
\email{larst@affiliation.org}
\author{Valerie B\'eranger}
\affiliation{%
\institution{Inria Paris-Rocquencourt}
\city{Rocquencourt}
\country{France}
}
\author{Aparna Patel}
\affiliation{%
\institution{Rajiv Gandhi University}
\streetaddress{Rono-Hills}
\city{Doimukh}
\state{Arunachal Pradesh}
\country{India}}
\author{Huifen Chan}
\affiliation{%
\institution{Tsinghua University}
\streetaddress{30 Shuangqing Rd}
\city{Haidian Qu}
\state{Beijing Shi}
\country{China}}
\author{Charles Palmer}
\affiliation{%
\institution{Palmer Research Laboratories}
\streetaddress{8600 Datapoint Drive}
\city{San Antonio}
\state{Texas}
\country{USA}
\postcode{78229}}
\email{cpalmer@prl.com}
\author{John Smith}
\affiliation{%
\institution{The Th{\o}rv{\"a}ld Group}
\streetaddress{1 Th{\o}rv{\"a}ld Circle}
\city{Hekla}
\country{Iceland}}
\email{jsmith@affiliation.org}
\author{Julius P. Kumquat}
\affiliation{%
\institution{The Kumquat Consortium}
\city{New York}
\country{USA}}
\email{jpkumquat@consortium.net}
%%
%% By default, the full list of authors will be used in the page
%% headers. Often, this list is too long, and will overlap
%% other information printed in the page headers. This command allows
%% the author to define a more concise list
%% of authors' names for this purpose.
\renewcommand{\shortauthors}{Trovato and Tobin, et al.}
%%
%% The abstract is a short summary of the work to be presented in the
%% article.
\begin{abstract}
A clear and well-documented \LaTeX\ document is presented as an
article formatted for publication by ACM in a conference proceedings
or journal publication. Based on the ``acmart'' document class, this
article presents and explains many of the common variations, as well
as many of the formatting elements an author may use in the
preparation of the documentation of their work.
\end{abstract}
%</manuscript|acmsmall|acmsmall-submission|acmlarge|acmtog|sigconf|authordraft|sigplan|sigchi|sigchi-a|acmsmall-conf|sigconf-i13n>
%<*sigconf-i13n>
\begin{translatedabstract}{french}
Un document \LaTeX\ clair et bien documenté est présenté comme un
article formaté pour publication par ACM dans les actes d'une
conférence ou parution dans une revue. Basé sur la classe de
document ``acmart'', ce l'article présente et explique de nombreuses
variations courantes, ainsi que autant d'éléments de mise en forme
qu'un auteur peut utiliser dans le préparation de la documentation
de leur travail.
\end{translatedabstract}
\begin{translatedabstract}{german}
Ein übersichtliches und gut dokumentiertes \LaTeX\-Dokument wird als
Artikel, der für die Veröffentlichung durch ACM in einem Tagungsband
formatiert wurde oder Zeitschriftenveröffentlichung. Basierend auf
der Dokumentenklasse ``acmart'', this Artikel präsentiert und
erklärt auch viele der gängigen Variationen so viele der
Formatierungselemente, die ein Autor in der verwenden darf
Vorbereitung der Dokumentation ihrer Arbeit.
\end{translatedabstract}
\begin{translatedabstract}{spanish}
Un documento \LaTeX\ claro y bien documentado se presenta como un
artículo formateado para su publicación por ACM en las actas de una
conferencia o publicación de una revista. Basado en la clase de
documento ``acmart'', este artículo presenta y explica muchas de las
variaciones comunes, así como tantos de los elementos de formato que
un autor puede usar en el preparación de la documentación de su
trabajo.
\end{translatedabstract}
%</sigconf-i13n>
%<*manuscript|acmsmall|acmsmall-submission|acmlarge|acmtog|sigconf|authordraft|sigplan|sigchi|sigchi-a|acmsmall-conf|sigconf-i13n>
%%
%% The code below is generated by the tool at http://dl.acm.org/ccs.cfm.
%% Please copy and paste the code instead of the example below.
%%
\begin{CCSXML}
<ccs2012>
<concept>
<concept_id>10010520.10010553.10010562</concept_id>
<concept_desc>Computer systems organization~Embedded systems</concept_desc>
<concept_significance>500</concept_significance>
</concept>
<concept>
<concept_id>10010520.10010575.10010755</concept_id>
<concept_desc>Computer systems organization~Redundancy</concept_desc>
<concept_significance>300</concept_significance>
</concept>
<concept>
<concept_id>10010520.10010553.10010554</concept_id>
<concept_desc>Computer systems organization~Robotics</concept_desc>
<concept_significance>100</concept_significance>
</concept>
<concept>
<concept_id>10003033.10003083.10003095</concept_id>
<concept_desc>Networks~Network reliability</concept_desc>
<concept_significance>100</concept_significance>
</concept>
</ccs2012>
\end{CCSXML}
\ccsdesc[500]{Computer systems organization~Embedded systems}
\ccsdesc[300]{Computer systems organization~Redundancy}
\ccsdesc{Computer systems organization~Robotics}
\ccsdesc[100]{Networks~Network reliability}
%%
%% Keywords. The author(s) should pick words that accurately describe
%% the work being presented. Separate the keywords with commas.
\keywords{datasets, neural networks, gaze detection, text tagging}
%<sigconf-i13n>\translatedkeywords{french}{ensembles de données,
%<sigconf-i13n> réseaux de neurones,
%<sigconf-i13n> détection du regard, marquage de texte}
%<sigconf-i13n> \translatedkeywords{german}{Datensätze,
%<sigconf-i13n> neuronale Netze, Blickerkennung, Text-Tagging}
%<sigconf-i13n> \translatedkeywords{spanish}{conjuntos de datos,
%<sigconf-i13n> redes neuronales, detección de mirada, etiquetado de texto}
%</manuscript|acmsmall|acmsmall-submission|acmlarge|acmtog|sigconf|authordraft|sigplan|sigchi|sigchi-a|acmsmall-conf|sigconf-i13n>
%<*sigconf|authordraft|sigplan|acmsmall-conf|sigconf-i13n>
%% A "teaser" image appears between the author and affiliation
%% information and the body of the document, and typically spans the
%% page.
\begin{teaserfigure}
\includegraphics[width=\textwidth]{sampleteaser}
\caption{Seattle Mariners at Spring Training, 2010.}
\Description{Enjoying the baseball game from the third-base
seats. Ichiro Suzuki preparing to bat.}
\label{fig:teaser}
\end{teaserfigure}
%</sigconf|authordraft|sigplan|acmsmall-conf|sigconf-i13n>
%<*manuscript|acmsmall|acmsmall-submission|acmlarge|acmtog|sigconf|authordraft|sigplan|sigchi|sigchi-a|acmsmall-conf|sigconf-i13n>
%%
%% This command processes the author and affiliation and title
%% information and builds the first part of the formatted document.
\maketitle
\section{Introduction}
ACM's consolidated article template, introduced in 2017, provides a
consistent \LaTeX\ style for use across ACM publications, and
incorporates accessibility and metadata-extraction functionality
necessary for future Digital Library endeavors. Numerous ACM and
SIG-specific \LaTeX\ templates have been examined, and their unique
features incorporated into this single new template.
If you are new to publishing with ACM, this document is a valuable
guide to the process of preparing your work for publication. If you
have published with ACM before, this document provides insight and
instruction into more recent changes to the article template.
The ``\verb|acmart|'' document class can be used to prepare articles
for any ACM publication --- conference or journal, and for any stage
of publication, from review to final ``camera-ready'' copy, to the
author's own version, with {\itshape very} few changes to the source.
\section{Template Overview}
As noted in the introduction, the ``\verb|acmart|'' document class can
be used to prepare many different kinds of documentation --- a
double-blind initial submission of a full-length technical paper, a
two-page SIGGRAPH Emerging Technologies abstract, a ``camera-ready''
journal article, a SIGCHI Extended Abstract, and more --- all by
selecting the appropriate {\itshape template style} and {\itshape
template parameters}.
This document will explain the major features of the document
class. For further information, the {\itshape \LaTeX\ User's Guide} is
available from
\url{https://www.acm.org/publications/proceedings-template}.
\subsection{Template Styles}
The primary parameter given to the ``\verb|acmart|'' document class is
the {\itshape template style} which corresponds to the kind of publication
or SIG publishing the work. This parameter is enclosed in square
brackets and is a part of the {\verb|documentclass|} command:
\begin{verbatim}
\documentclass[STYLE]{acmart}
\end{verbatim}
Journals use one of three template styles. All but three ACM journals
use the {\verb|acmsmall|} template style:
\begin{itemize}
\item {\texttt{acmsmall}}: The default journal template style.
\item {\texttt{acmlarge}}: Used by JOCCH and TAP.
\item {\texttt{acmtog}}: Used by TOG.
\end{itemize}
The majority of conference proceedings documentation will use the {\verb|acmconf|} template style.
\begin{itemize}
\item {\texttt{acmconf}}: The default proceedings template style.
\item{\texttt{sigchi}}: Used for SIGCHI conference articles.
\item{\texttt{sigchi-a}}: Used for SIGCHI ``Extended Abstract'' articles.
\item{\texttt{sigplan}}: Used for SIGPLAN conference articles.
\end{itemize}
\subsection{Template Parameters}
In addition to specifying the {\itshape template style} to be used in
formatting your work, there are a number of {\itshape template parameters}
which modify some part of the applied template style. A complete list
of these parameters can be found in the {\itshape \LaTeX\ User's Guide.}
Frequently-used parameters, or combinations of parameters, include:
\begin{itemize}
\item {\texttt{anonymous,review}}: Suitable for a ``double-blind''
conference submission. Anonymizes the work and includes line
numbers. Use with the \texttt{\acmSubmissionID} command to print the
submission's unique ID on each page of the work.
\item{\texttt{authorversion}}: Produces a version of the work suitable
for posting by the author.
\item{\texttt{screen}}: Produces colored hyperlinks.
\end{itemize}
This document uses the following string as the first command in the
source file:
\begin{verbatim}
%<manuscript>\documentclass[manuscript,screen,review]{acmart}
%<acmsmall|acmsmall-conf>\documentclass[acmsmall]{acmart}
%<acmsmall-submission>\documentclass[acmsmall,screen,review]{acmart}
%<acmlarge>\documentclass[acmlarge]{acmart}
%<acmtog>\documentclass[acmtog]{acmart}
%<sigconf>\documentclass[sigconf]{acmart}
%<authordraft>\documentclass[sigconf,authordraft]{acmart}
%<sigplan>\documentclass[sigplan,screen]{acmart}
%<sigchi>\documentclass[sigchi]{acmart}
%<sigchi-a>\documentclass[sigchi-a]{acmart}
%<sigconf-i13n>\documentclass[sigconf, language=french,
%<sigconf-i13n>language=german, language=spanish, language=english]{acmart}
\end{verbatim}
\section{Modifications}
Modifying the template --- including but not limited to: adjusting
margins, typeface sizes, line spacing, paragraph and list definitions,
and the use of the \verb|\vspace| command to manually adjust the
vertical spacing between elements of your work --- is not allowed.
{\bfseries Your document will be returned to you for revision if
modifications are discovered.}
\section{Typefaces}
The ``\verb|acmart|'' document class requires the use of the
``Libertine'' typeface family. Your \TeX\ installation should include
this set of packages. Please do not substitute other typefaces. The
``\verb|lmodern|'' and ``\verb|ltimes|'' packages should not be used,
as they will override the built-in typeface families.
\section{Title Information}
The title of your work should use capital letters appropriately -
\url{https://capitalizemytitle.com/} has useful rules for
capitalization. Use the {\verb|title|} command to define the title of
your work. If your work has a subtitle, define it with the
{\verb|subtitle|} command. Do not insert line breaks in your title.
If your title is lengthy, you must define a short version to be used
in the page headers, to prevent overlapping text. The \verb|title|
command has a ``short title'' parameter:
\begin{verbatim}
\title[short title]{full title}
\end{verbatim}
\section{Authors and Affiliations}
Each author must be defined separately for accurate metadata
identification. As an exception, multiple authors may share one
affiliation. Authors' names should not be abbreviated; use full first
names wherever possible. Include authors' e-mail addresses whenever
possible.
Grouping authors' names or e-mail addresses, or providing an ``e-mail
alias,'' as shown below, is not acceptable:
\begin{verbatim}
\author{Brooke Aster, David Mehldau}
\email{dave,judy,steve@university.edu}
\email{firstname.lastname@phillips.org}
\end{verbatim}
The \verb|authornote| and \verb|authornotemark| commands allow a note
to apply to multiple authors --- for example, if the first two authors
of an article contributed equally to the work.
If your author list is lengthy, you must define a shortened version of
the list of authors to be used in the page headers, to prevent
overlapping text. The following command should be placed just after
the last \verb|\author{}| definition:
\begin{verbatim}
\renewcommand{\shortauthors}{McCartney, et al.}
\end{verbatim}
Omitting this command will force the use of a concatenated list of all
of the authors' names, which may result in overlapping text in the
page headers.
The article template's documentation, available at
\url{https://www.acm.org/publications/proceedings-template}, has a
complete explanation of these commands and tips for their effective
use.
Note that authors' addresses are mandatory for journal articles.
\section{Rights Information}
Authors of any work published by ACM will need to complete a rights
form. Depending on the kind of work, and the rights management choice
made by the author, this may be copyright transfer, permission,
license, or an OA (open access) agreement.
Regardless of the rights management choice, the author will receive a
copy of the completed rights form once it has been submitted. This
form contains \LaTeX\ commands that must be copied into the source
document. When the document source is compiled, these commands and
their parameters add formatted text to several areas of the final
document:
\begin{itemize}
\item the ``ACM Reference Format'' text on the first page.
\item the ``rights management'' text on the first page.
\item the conference information in the page header(s).
\end{itemize}
Rights information is unique to the work; if you are preparing several
works for an event, make sure to use the correct set of commands with
each of the works.
The ACM Reference Format text is required for all articles over one
page in length, and is optional for one-page articles (abstracts).
\section{CCS Concepts and User-Defined Keywords}
Two elements of the ``acmart'' document class provide powerful
taxonomic tools for you to help readers find your work in an online
search.
The ACM Computing Classification System ---
\url{https://www.acm.org/publications/class-2012} --- is a set of
classifiers and concepts that describe the computing
discipline. Authors can select entries from this classification
system, via \url{https://dl.acm.org/ccs/ccs.cfm}, and generate the
commands to be included in the \LaTeX\ source.
User-defined keywords are a comma-separated list of words and phrases
of the authors' choosing, providing a more flexible way of describing
the research being presented.
CCS concepts and user-defined keywords are required for for all
articles over two pages in length, and are optional for one- and
two-page articles (or abstracts).
\section{Sectioning Commands}
Your work should use standard \LaTeX\ sectioning commands:
\verb|section|, \verb|subsection|, \verb|subsubsection|, and
\verb|paragraph|. They should be numbered; do not remove the numbering
from the commands.
Simulating a sectioning command by setting the first word or words of
a paragraph in boldface or italicized text is {\bfseries not allowed.}
\section{Tables}
The ``\verb|acmart|'' document class includes the ``\verb|booktabs|''
package --- \url{https://ctan.org/pkg/booktabs} --- for preparing
high-quality tables.
Table captions are placed {\itshape above} the table.
Because tables cannot be split across pages, the best placement for
them is typically the top of the page nearest their initial cite. To
ensure this proper ``floating'' placement of tables, use the
environment \textbf{table} to enclose the table's contents and the
table caption. The contents of the table itself must go in the
\textbf{tabular} environment, to be aligned properly in rows and
columns, with the desired horizontal and vertical rules. Again,
detailed instructions on \textbf{tabular} material are found in the
\textit{\LaTeX\ User's Guide}.
Immediately following this sentence is the point at which
Table~\ref{tab:freq} is included in the input file; compare the
placement of the table here with the table in the printed output of
this document.
%<!sigchi-a>\begin{table}
%<sigchi-a>\begin{margintable}
\caption{Frequency of Special Characters}
\label{tab:freq}
\begin{tabular}{ccl}
\toprule
Non-English or Math&Frequency&Comments\\
\midrule
\O & 1 in 1,000& For Swedish names\\
$\pi$ & 1 in 5& Common in math\\
\$ & 4 in 5 & Used in business\\
$\Psi^2_1$ & 1 in 40,000& Unexplained usage\\
\bottomrule
\end{tabular}
%<!sigchi-a>\end{table}
%<sigchi-a>\end{margintable}
To set a wider table, which takes up the whole width of the page's
live area, use the environment \textbf{table*} to enclose the table's
contents and the table caption. As with a single-column table, this
wide table will ``float'' to a location deemed more
desirable. Immediately following this sentence is the point at which
Table~\ref{tab:commands} is included in the input file; again, it is
instructive to compare the placement of the table here with the table
in the printed output of this document.
\begin{table*}
\caption{Some Typical Commands}
\label{tab:commands}
\begin{tabular}{ccl}
\toprule
Command &A Number & Comments\\
\midrule
\texttt{{\char'134}author} & 100& Author \\
\texttt{{\char'134}table}& 300 & For tables\\
\texttt{{\char'134}table*}& 400& For wider tables\\
\bottomrule
\end{tabular}
\end{table*}
Always use midrule to separate table header rows from data rows, and
use it only for this purpose. This enables assistive technologies to
recognise table headers and support their users in navigating tables
more easily.
\section{Math Equations}
You may want to display math equations in three distinct styles:
inline, numbered or non-numbered display. Each of the three are
discussed in the next sections.
\subsection{Inline (In-text) Equations}
A formula that appears in the running text is called an inline or
in-text formula. It is produced by the \textbf{math} environment,
which can be invoked with the usual
\texttt{{\char'134}begin\,\ldots{\char'134}end} construction or with
the short form \texttt{\$\,\ldots\$}. You can use any of the symbols
and structures, from $\alpha$ to $\omega$, available in
\LaTeX~\cite{Lamport:LaTeX}; this section will simply show a few
examples of in-text equations in context. Notice how this equation:
\begin{math}
\lim_{n\rightarrow \infty}x=0
\end{math},
set here in in-line math style, looks slightly different when
set in display style. (See next section).
\subsection{Display Equations}
A numbered display equation---one set off by vertical space from the
text and centered horizontally---is produced by the \textbf{equation}
environment. An unnumbered display equation is produced by the
\textbf{displaymath} environment.
Again, in either environment, you can use any of the symbols and
structures available in \LaTeX\@; this section will just give a couple
of examples of display equations in context. First, consider the
equation, shown as an inline equation above:
\begin{equation}
\lim_{n\rightarrow \infty}x=0
\end{equation}
Notice how it is formatted somewhat differently in
the \textbf{displaymath}
environment. Now, we'll enter an unnumbered equation:
\begin{displaymath}
\sum_{i=0}^{\infty} x + 1
\end{displaymath}
and follow it with another numbered equation:
\begin{equation}
\sum_{i=0}^{\infty}x_i=\int_{0}^{\pi+2} f
\end{equation}
just to demonstrate \LaTeX's able handling of numbering.
\section{Figures}
The ``\verb|figure|'' environment should be used for figures. One or
more images can be placed within a figure. If your figure contains
third-party material, you must clearly identify it as such, as shown
in the example below.
%<!sigchi-a>\begin{figure}[h]
%<sigchi-a>\begin{marginfigure}
\centering
\includegraphics[width=\linewidth]{sample-franklin}
\caption{1907 Franklin Model D roadster. Photograph by Harris \&
Ewing, Inc. [Public domain], via Wikimedia
Commons. (\url{https://goo.gl/VLCRBB}).}
\Description{A woman and a girl in white dresses sit in an open car.}
%<!sigchi-a>\end{figure}
%<sigchi-a>\end{marginfigure}
Your figures should contain a caption which describes the figure to
the reader.
Figure captions are placed {\itshape below} the figure.
Every figure should also have a figure description unless it is purely
decorative. These descriptions convey whats in the image to someone
who cannot see it. They are also used by search engine crawlers for
indexing images, and when images cannot be loaded.
A figure description must be unformatted plain text less than 2000
characters long (including spaces). {\bfseries Figure descriptions
should not repeat the figure caption their purpose is to capture
important information that is not already provided in the caption or
the main text of the paper.} For figures that convey important and
complex new information, a short text description may not be
adequate. More complex alternative descriptions can be placed in an
appendix and referenced in a short figure description. For example,
provide a data table capturing the information in a bar chart, or a
structured list representing a graph. For additional information
regarding how best to write figure descriptions and why doing this is
so important, please see
\url{https://www.acm.org/publications/taps/describing-figures/}.
\subsection{The ``Teaser Figure''}
A ``teaser figure'' is an image, or set of images in one figure, that
are placed after all author and affiliation information, and before
the body of the article, spanning the page. If you wish to have such a
figure in your article, place the command immediately before the
\verb|\maketitle| command:
\begin{verbatim}
\begin{teaserfigure}
\includegraphics[width=\textwidth]{sampleteaser}
\caption{figure caption}
\Description{figure description}
\end{teaserfigure}
\end{verbatim}
\section{Citations and Bibliographies}
The use of \BibTeX\ for the preparation and formatting of one's
references is strongly recommended. Authors' names should be complete
--- use full first names (``Donald E. Knuth'') not initials
(``D. E. Knuth'') --- and the salient identifying features of a
reference should be included: title, year, volume, number, pages,
article DOI, etc.
The bibliography is included in your source document with these two
commands, placed just before the \verb|\end{document}| command:
\begin{verbatim}
\bibliographystyle{ACM-Reference-Format}
\bibliography{bibfile}
\end{verbatim}
where ``\verb|bibfile|'' is the name, without the ``\verb|.bib|''
suffix, of the \BibTeX\ file.
Citations and references are numbered by default. A small number of
ACM publications have citations and references formatted in the
``author year'' style; for these exceptions, please include this
command in the {\bfseries preamble} (before the command
``\verb|\begin{document}|'') of your \LaTeX\ source:
\begin{verbatim}
\citestyle{acmauthoryear}
\end{verbatim}
Some examples. A paginated journal article \cite{Abril07}, an
enumerated journal article \cite{Cohen07}, a reference to an entire
issue \cite{JCohen96}, a monograph (whole book) \cite{Kosiur01}, a
monograph/whole book in a series (see 2a in spec. document)
\cite{Harel79}, a divisible-book such as an anthology or compilation
\cite{Editor00} followed by the same example, however we only output
the series if the volume number is given \cite{Editor00a} (so
Editor00a's series should NOT be present since it has no vol. no.),
a chapter in a divisible book \cite{Spector90}, a chapter in a
divisible book in a series \cite{Douglass98}, a multi-volume work as
book \cite{Knuth97}, a couple of articles in a proceedings (of a
conference, symposium, workshop for example) (paginated proceedings
article) \cite{Andler79, Hagerup1993}, a proceedings article with
all possible elements \cite{Smith10}, an example of an enumerated
proceedings article \cite{VanGundy07}, an informally published work
\cite{Harel78}, a couple of preprints \cite{Bornmann2019,
AnzarootPBM14}, a doctoral dissertation \cite{Clarkson85}, a
master's thesis: \cite{anisi03}, an online document / world wide web
resource \cite{Thornburg01, Ablamowicz07, Poker06}, a video game
(Case 1) \cite{Obama08} and (Case 2) \cite{Novak03} and \cite{Lee05}
and (Case 3) a patent \cite{JoeScientist001}, work accepted for
publication \cite{rous08}, 'YYYYb'-test for prolific author
\cite{SaeediMEJ10} and \cite{SaeediJETC10}. Other cites might
contain 'duplicate' DOI and URLs (some SIAM articles)
\cite{Kirschmer:2010:AEI:1958016.1958018}. Boris / Barbara Beeton:
multi-volume works as books \cite{MR781536} and \cite{MR781537}. A
couple of citations with DOIs:
\cite{2004:ITE:1009386.1010128,Kirschmer:2010:AEI:1958016.1958018}. Online
citations: \cite{TUGInstmem, Thornburg01, CTANacmart}. Artifacts:
\cite{R} and \cite{UMassCitations}.
\section{Acknowledgments}
Identification of funding sources and other support, and thanks to
individuals and groups that assisted in the research and the
preparation of the work should be included in an acknowledgment
section, which is placed just before the reference section in your
document.
This section has a special environment:
\begin{verbatim}
\begin{acks}
...
\end{acks}
\end{verbatim}
so that the information contained therein can be more easily collected
during the article metadata extraction phase, and to ensure
consistency in the spelling of the section heading.
Authors should not prepare this section as a numbered or unnumbered {\verb|\section|}; please use the ``{\verb|acks|}'' environment.
\section{Appendices}
If your work needs an appendix, add it before the
``\verb|\end{document}|'' command at the conclusion of your source
document.
Start the appendix with the ``\verb|appendix|'' command:
\begin{verbatim}
\appendix
\end{verbatim}
and note that in the appendix, sections are lettered, not
numbered. This document has two appendices, demonstrating the section
and subsection identification method.
\section{Multi-language papers}
Papers may be written in languages other than English or include
titles, subtitles, keywords and abstracts in different languages (as a
rule, a paper in a language other than English should include an
English title and an English abstract). Use \verb|language=...| for
every language used in the paper. The last language indicated is the
main language of the paper. For example, a French paper with
additional titles and abstracts in English and German may start with
the following command
\begin{verbatim}
\documentclass[sigconf, language=english, language=german,
language=french]{acmart}
\end{verbatim}
The title, subtitle, keywords and abstract will be typeset in the main
language of the paper. The commands \verb|\translatedXXX|, \verb|XXX|
begin title, subtitle and keywords, can be used to set these elements
in the other languages. The environment \verb|translatedabstract| is
used to set the translation of the abstract. These commands and
environment have a mandatory first argument: the language of the
second argument. See \verb|sample-sigconf-i13n.tex| file for examples
of their usage.
\section{SIGCHI Extended Abstracts}
The ``\verb|sigchi-a|'' template style (available only in \LaTeX\ and
not in Word) produces a landscape-orientation formatted article, with
a wide left margin. Three environments are available for use with the
``\verb|sigchi-a|'' template style, and produce formatted output in
the margin:
\begin{description}
\item[\texttt{sidebar}:] Place formatted text in the margin.
\item[\texttt{marginfigure}:] Place a figure in the margin.
\item[\texttt{margintable}:] Place a table in the margin.
\end{description}
%%
%% The acknowledgments section is defined using the "acks" environment
%% (and NOT an unnumbered section). This ensures the proper
%% identification of the section in the article metadata, and the
%% consistent spelling of the heading.
\begin{acks}
To Robert, for the bagels and explaining CMYK and color spaces.
\end{acks}
%%
%% The next two lines define the bibliography style to be used, and
%% the bibliography file.
\bibliographystyle{ACM-Reference-Format}
\bibliography{sample-base}
%%
%% If your work has an appendix, this is the place to put it.
\appendix
\section{Research Methods}
\subsection{Part One}
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi
malesuada, quam in pulvinar varius, metus nunc fermentum urna, id
sollicitudin purus odio sit amet enim. Aliquam ullamcorper eu ipsum
vel mollis. Curabitur quis dictum nisl. Phasellus vel semper risus, et
lacinia dolor. Integer ultricies commodo sem nec semper.
\subsection{Part Two}
Etiam commodo feugiat nisl pulvinar pellentesque. Etiam auctor sodales
ligula, non varius nibh pulvinar semper. Suspendisse nec lectus non
ipsum convallis congue hendrerit vitae sapien. Donec at laoreet
eros. Vivamus non purus placerat, scelerisque diam eu, cursus
ante. Etiam aliquam tortor auctor efficitur mattis.
\section{Online Resources}
Nam id fermentum dui. Suspendisse sagittis tortor a nulla mollis, in
pulvinar ex pretium. Sed interdum orci quis metus euismod, et sagittis
enim maximus. Vestibulum gravida massa ut felis suscipit
congue. Quisque mattis elit a risus ultrices commodo venenatis eget
dui. Etiam sagittis eleifend elementum.
Nam interdum magna at lectus dignissim, ac dignissim lorem
rhoncus. Maecenas eu arcu ac neque placerat aliquam. Nunc pulvinar
massa et mattis lacinia.
\end{document}
%</manuscript|acmsmall|acmsmall-submission|acmlarge|acmtog|sigconf|authordraft|sigplan|sigchi|sigchi-a|acmsmall-conf|sigconf-i13n>

View File

@ -0,0 +1,23 @@
\def\batchfile{samples.ins}
\input docstrip
\keepsilent
\showprogress
\askforoverwritefalse
\generate{%
\file{sample-manuscript.tex}{\from{samples.dtx}{manuscript}}
\file{sample-acmsmall.tex}{\from{samples.dtx}{acmsmall}}
\file{sample-acmsmall-submission.tex}{\from{samples.dtx}{acmsmall-submission}}
\file{sample-acmlarge.tex}{\from{samples.dtx}{acmlarge}}
\file{sample-acmtog.tex}{\from{samples.dtx}{acmtog}}
\file{sample-sigconf.tex}{\from{samples.dtx}{sigconf}}
\file{sample-authordraft.tex}{\from{samples.dtx}{authordraft}}
\file{sample-xelatex.tex}{\from{samples.dtx}{sigconf}}
\file{sample-lualatex.tex}{\from{samples.dtx}{sigconf}}
\file{sample-sigplan.tex}{\from{samples.dtx}{sigplan}}
\file{sample-acmsmall-conf.tex}{\from{samples.dtx}{acmsmall-conf}}
\file{sample-sigconf-i13n.tex}{\from{samples.dtx}{sigconf-i13n}}
}

Binary file not shown.

View File

0
app_hardness-results.log Normal file
View File

0
approx_alg.log Normal file
View File

15
arXiv/abstract.tex Normal file
View File

@ -0,0 +1,15 @@
%root: main.tex
%!TEX root=./main.tex
\begin{abstract}
In this work, we study the problem of computing a tuple's expected multiplicity over probabilistic databases with bag semantics (where each tuple is associated with a multiplicity) exactly and approximately.
We consider bag-\abbrTIDB\xplural where we have a bound $\bound$ on the maximum multiplicity of each tuple and tuples are independent probabilistic events (we refer to such databases as \abbrCTIDB\xplural).
We are specifically interested in the fine-grained complexity of computing expected multiplicities and how it compares to the complexity of deterministic query evaluation algorithms --- if these complexities are comparable, it opens the door to practical deployment of probabilistic databases.
Unfortunately, our results imply that computing expected multiplicities for \abbrCTIDB\xplural based on the results produced by such query evaluation algorithms introduces super-linear overhead (under parameterized complexity hardness assumptions/conjectures).
We proceed to study approximation of expected result tuple multiplicities for positive relational algebra queries ($\raPlus$) over \abbrCTIDB\xplural and for a non-trivial subclass of block-independent databases (\abbrBIDB\xplural).
We develop a sampling algorithm that computes a $(1 \pm \epsilon)$-approximation of the expected multiplicity of an output tuple in time linear in the runtime of the corresponding deterministic query for any $\raPlus$ query.
\end{abstract}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

View File

@ -0,0 +1,3 @@
%root: main.tex
\section{Acknowledgements}
We thank Virginia Williams for showing us \Cref{eq:3p-3tri}, which greatly simplified our earlier proof of Lemma 3.8, and for graciously allowing us to use it.

View File

@ -0,0 +1,195 @@
%root: main.tex
The following results assume input circuit \circuit computed from an arbitrary $\raPlus$ query $\query$ and arbitrary \abbrBIDB $\pdb$. We refer to \circuit as a \abbrBIDB circuit.
\begin{Theorem}\label{lem:approx-alg}
Let \circuit be an arbitrary \abbrBIDB circuit
and define $\poly(\vct{X})=\polyf(\circuit)$ and let $k=\degree(\circuit)$.
Then an estimate $\mathcal{E}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in time
{\small
\[O\left(\left(\size(\circuit) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\circuit}^2(1,\ldots, 1)\cdot k\cdot \log{k} \cdot \depth(\circuit))}{\inparen{\error}^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right)\cdot\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)\]
}
such that
\begin{equation}
\label{eq:approx-algo-bound}
\probOf\left(\left|\mathcal{E} - \rpoly(\prob_1,\dots,\prob_\numvar)\right|> \error \cdot \rpoly(\prob_1,\dots,\prob_\numvar)\right) \leq \conf.
\end{equation}
\end{Theorem}
The slight abuse of notation seen in $\abs{\circuit}\inparen{1,\ldots,1}$ is explained after \Cref{def:positive-circuit} and an example is given in \Cref{ex:def-pos-circ}. The only difference in the use of this notation in \Cref{lem:approx-alg} is that we include an additional exponent to square the quantity.
\subsection{Proof of Theorem \ref{lem:approx-alg}}\label{sec:proof-lem-approx-alg}
\input{app_approx_alg-pseudo-code}
We prove \Cref{lem:approx-alg} constructively by presenting an algorithm \approxq (\Cref{alg:mon-sam}) which has the desired runtime and computes an approximation with the desired approximation guarantee. Algorithm \approxq uses Algorithm \onepass to compute weights on the edges of a circuits. These weights are then used to sample a set of monomials of $\poly(\circuit)$ from the circuit $\circuit$ by traversing the circuit using the weights to ensure that monomials are sampled with an appropriate probability. The correctness of \approxq relies on the correctness (and runtime behavior) of auxiliary algorithms \onepass and \sampmon that we state in the following lemmas (and prove later in this part of the appendix).
\begin{Lemma}\label{lem:one-pass}
The $\onepass$ function completes in time:
$$O\left(\size(\circuit) \cdot \multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit)}}\right)$$
$\onepass$ guarantees two post-conditions: First, for each subcircuit $\vari{S}$ of $\circuit$, we have that $\vari{S}.\vari{partial}$ is set to $\abs{\vari{S}}(1,\ldots, 1)$. Second, when $\vari{S}.\type = \circplus$, \subcircuit.\lwght $= \frac{\abs{\subcircuit_\linput}(1,\ldots, 1)}{\abs{\subcircuit}(1,\ldots, 1)}$ and likewise for \subcircuit.\rwght.
\end{Lemma}
To prove correctness of \Cref{alg:mon-sam}, we only use the following fact that follows from the above lemma: for the modified circuit ($\circuit_{\vari{mod}}$) output by \onepass, $\circuit_{\vari{mod}}.\vari{partial}=\abs{\circuit}(1,\dots,1)$.
\begin{Lemma}\label{lem:sample}
The function $\sampmon$ completes in time
$$O(\log{k} \cdot k \cdot \depth(\circuit)\cdot\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log{\size(\circuit)}})$$
where $k = \degree(\circuit)$. The function returns every $\left(\monom, sign(\coef)\right)$ for $(\monom, \coef)\in \expansion{\circuit}$ with probability $\frac{|\coef|}{\abs{\circuit}(1,\ldots, 1)}$.
\end{Lemma}
With the above two lemmas, we are ready to argue the following result:
\begin{Theorem}\label{lem:mon-samp}
For any $\circuit$ with
$\degree(poly(|\circuit|)) = k$, algorithm \ref{alg:mon-sam} outputs an estimate $\vari{acc}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ such that
\[\probOf\left(\left|\vari{acc} - \rpoly(\prob_1,\ldots, \prob_\numvar)\right|> \error \cdot \abs{\circuit}(1,\ldots, 1)\right) \leq \conf,\]
in $O\left(\left(\size(\circuit)+\frac{\log{\frac{1}{\conf}}}{\error^2} \cdot k \cdot\log{k} \cdot \depth(\circuit)\right)\cdot \multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log{\size(\circuit)}}\right)$ time.
\end{Theorem}
Before proving \Cref{lem:mon-samp}, we use it to argue the claimed runtime of our main result, \Cref{lem:approx-alg}.
\begin{proof}[Proof of \Cref{lem:approx-alg}]
Set $\mathcal{E}=\approxq({\circuit}, (\prob_1,\dots,\prob_\numvar),$ $\conf, \error')$, where
\[\error' = \error \cdot \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{{\circuit}}(1,\ldots, 1)},\]
which achieves the claimed error bound on $\mathcal{E}$ (\vari{acc}) trivially due to the assignment to $\error'$ and \cref{lem:mon-samp}, since $\error' \cdot \abs{\circuit}(1,\ldots, 1) = \error\cdot\frac{\rpoly(1,\ldots, 1)}{\abs{\circuit}(1,\ldots, 1)} \cdot \abs{\circuit}(1,\ldots, 1) = \error\cdot\rpoly(1,\ldots, 1)$.
The claim on the runtime follows from \Cref{lem:mon-samp} since
\begin{align*}
\frac 1{\inparen{\error'}^2}\cdot \log\inparen{\frac 1\conf}=&\frac{\log{\frac{1}{\conf}}}{\error^2 \left(\frac{\rpoly(\prob_1,\ldots, \prob_N)}{\abs{{\circuit}}(1,\ldots, 1)}\right)^2}\\
= &\frac{\log{\frac{1}{\conf}}\cdot \abs{{\circuit}}^2(1,\ldots, 1)}{\error^2 \cdot \rpoly^2(\prob_1,\ldots, \prob_\numvar)}.
\end{align*}
\qed
\end{proof}
Let us now prove \Cref{lem:mon-samp}:
\subsection{Proof of Theorem \ref{lem:mon-samp}}\label{app:subsec-th-mon-samp}
\begin{proof}
Consider now the random variables $\randvar_1,\dots,\randvar_\numsamp$, where each $\randvar_\vari{i}$ is the value of $\vari{Y}_{\vari{i}}$ in \cref{alg:mon-sam} after \cref{alg:mon-sam-product} is executed. Overloading $\isInd{\cdot}$ to receive monomial input (recall $\encMon$ is the monomial composed of the variables in the set $\monom$), we have
\[\randvar_\vari{i}= \indicator{\inparen{\isInd{\encMon}}}\cdot \prod_{X_i\in \var\inparen{v}} p_i,\]
where the indicator variable handles the check in \Cref{alg:check-duplicate-block}
Then for random variable $\randvar_i$, it is the case that
\begin{align*}
\expct\pbox{\randvar_\vari{i}} &= \sum\limits_{(\monom, \coef) \in \expansion{{\circuit}} }\frac{\indicator{\inparen{\isInd{\encMon}}}\cdot c\cdot\prod_{X_i\in \var\inparen{v}} p_i }{\abs{{\circuit}}(1,\dots,1)} \\
&= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{{\circuit}}(1,\ldots, 1)},
\end{align*}
where in the first equality we use the fact that $\vari{sgn}_{\vari{i}}\cdot \abs{\coef}=\coef$ and the second equality follows from \Cref{eq:tilde-Q-bi} with $X_i$ substituted by $\prob_i$.
Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_\vari{i}$. It is also true that
\[\expct\pbox{\empmean}
= \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\expct\pbox{\randvar_\vari{i}}
= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{{\circuit}}(1,\ldots, 1)}.\]
Hoeffding's inequality states that if we know that each $\randvar_i$ (which are all independent) always lie in the intervals $[a_i, b_i]$, then it is true that
\begin{equation*}
\probOf\left(\left|\empmean - \expct\pbox{\empmean}\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{\sum_{i = 1}^{\samplesize}(b_i -a_i)^2}\right)}.
\end{equation*}
Line~\ref{alg:mon-sam-sample} shows that $\vari{sgn}_\vari{i}$ has a value in $\{-1, 1\}$ that is multiplied with $O(k)$ $\prob_i\in [0, 1]$, which implies the range for each $\randvar_i$ is $[-1, 1]$.
Using Hoeffding's inequality, we then get:
\begin{equation*}
\probOf\left(~\left| \empmean - \expct\pbox{\empmean} ~\right| \geq \error\right) \leq 2\exp{\left(-\frac{2\samplesize^2\error^2}{2^2 \samplesize}\right)} = 2\exp{\left(-\frac{\samplesize\error^2}{2 }\right)}\leq \conf,
\end{equation*}
where the last inequality dictates our choice of $\samplesize$ in \Cref{alg:mon-sam-global2}.
For the claimed probability bound of $\probOf\left(\left|\vari{acc} - \rpoly(\prob_1,\ldots, \prob_\numvar)\right|> \error \cdot \abs{\circuit}(1,\ldots, 1)\right) \leq \conf$, note that in the algorithm, \vari{acc} is exactly $\empmean \cdot \abs{\circuit}(1,\ldots, 1)$. Multiplying the rest of the terms by the additional factor $\abs{\circuit}(1,\ldots, 1)$ yields the said bound.
This concludes the proof for the first claim of theorem~\ref{lem:mon-samp}. Next, we prove the claim on the runtime.
\paragraph*{Run-time Analysis}
The runtime of the algorithm is dominated first by \Cref{alg:mon-sam-onepass} (which by \Cref{lem:one-pass} takes time $O\left({\size(\circuit)}\cdot \multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)$) and then by $\samplesize$ iterations of the loop in \Cref{alg:sampling-loop}. Each iteration's run time is dominated by the call to \sampmon in \Cref{alg:mon-sam-sample} (which by \Cref{lem:sample} takes $O\left(\log{k} \cdot k \cdot {\depth(\circuit)}\cdot \multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)$
) and the check \Cref{alg:check-duplicate-block}, which by the subsequent argument takes $O(k\log{k})$ time. We sort the $O(k)$ variables by their block IDs and then check if there is a duplicate block ID or not. Combining all the times discussed here gives us the desired overall runtime.
\qed
\end{proof}
\subsection{Proof of \Cref{cor:approx-algo-const-p}}
\begin{proof}
The result follows by first noting that by definition of $\gamma$, we have
\[\rpoly(1,\dots,1)= (1-\gamma)\cdot \abs{{\circuit}}(1,\dots,1).\]
Further, since each $\prob_i\ge \prob_0$ and $\poly(\vct{X})$ (and hence $\rpoly(\vct{X})$) has degree at most $k$, we have that
\[ \rpoly(1,\dots,1) \ge \prob_0^k\cdot \rpoly(1,\dots,1).\]
The above two inequalities implies $\rpoly(1,\dots,1) \ge \prob_0^k\cdot (1-\gamma)\cdot \abs{{\circuit}}(1,\dots,1)$.
Applying this bound in the runtime bound in \Cref{lem:approx-alg} gives the first claimed runtime. The final runtime of $O_k\left(\frac 1{\eps^2}\cdot\size(\circuit)\cdot \log{\frac{1}{\conf}}\cdot \multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)$ follows by noting that $\depth({\circuit})\le \size({\circuit})$ and absorbing all factors that just depend on $k$.
\qed
\end{proof}
\subsection{Proof of~\Cref{lem:ctidb-gamma}}
\begin{proof}
The circuit \circuit' is built from \circuit in the following manner. For each input gate $\gate_i$ with $\gate_i.\val = X_\tup$, replace $\gate_i$ with the circuit \subcircuit encoding the sum $\sum_{j = 1}^\bound j\cdot X_{\tup, j}$. We argue that \circuit' is a valid circuit by the following facts. Let $\pdb = \inparen{\worlds, \bpd}$ be the original \abbrCTIDB \circuit was generated from. Then, by~\Cref{prop:ctidb-reduct} there exists a \abbrOneBIDB $\pdb' = \inparen{\onebidbworlds{\tupset'}, \bpd'}$, with $\tupset' = \inset{\intup{\tup, j}~|~\tup\in\tupset, j\in\pbox{\bound}}$, from which the conversion from \circuit to \circuit' follows. Both $\polyf\inparen{\circuit}$ and $\polyf\inparen{\circuit'}$ have the same expected multiplicity since (by~\Cref{prop:ctidb-reduct}) the distributions $\bpd$ and $\bpd'$ are equivalent and each $j\cdot\worldvec'_{\tup, j} = \worldvec_\tup$ for $\worldvec'\in\inset{0, 1}^{\bound\numvar}$ and $\worldvec\in\worlds$. Finally, note that because there exists a (sub) circuit encoding $\sum_{j = 1}^\bound j\cdot X_{\tup, j}$ that is a \emph{balanced} binary tree, the above conversion implies the claimed size and depth bounds of the lemma.
Next we argue the claim on $\gamma\inparen{\circuit'}$. Consider the list of expanded monomials $\expansion{\circuit}$ for \abbrCTIDB circuit \circuit. Let \monom be an arbitrary monomial such that the set of variables in \monom is $\encMon = X_{\tup_1}^{d_1},\ldots,X_{\tup_\ell}^{d_\ell}$ with $\ell$ variables. Then \monom yields the set of monomials $\vari{E}_\monom\inparen{\circuit'}=\inset{j_1^{d_1}\cdot X_{\tup, j_1}^{d_1}\times\cdots\times j_\ell^{d_\ell}\cdot X_{\tup, j_\ell}^{d_\ell}}_{j_1,\ldots, j_\ell \in \pbox{0, \bound}}$ in $\expansion{\circuit'}$. Recall that a cancellation occurs when we have a monomial \monom' such that there exists $\tup\neq\tup'$ in the same block $\block$ where variables $X_\tup, X_{\tup'}$ are in the set of variables $\encMon'$ of \monom'. Observe that cancellations can only occur for each $X_{\tup}^{d_\tup}\in \encMon$, where the expansion $\inparen{\sum_{j = 1}^\bound j\cdot X_{\tup, j}}^{d_\tup}$ represents the monomial $X_\tup^{d_\tup}$ in $\tupset'$. Consider the number of cancellations for $\inparen{\sum_{j = 1}^\bound j\cdot X_{\tup, j}^{d_\tup}}^{d_\ell}$. Then $\gamma \leq 1 - \bound^{d_\tup - 1}$, since for each element in the set of cross products $\inset{\bigtimes_{i\in\pbox{d_\tup}, j_i\in\pbox{\bound}}X_{\tup, j_i}}$ there are \emph{exactly} $\bound$ surviving elements with $j_1=\cdots=j_{d_\tup}$, i.e. $X_j^{d_\tup}$ for each $j\in\pbox{\bound}$. The rest of the $\inparen{\bound}^{d_\tup-1}$ cross terms cancel. Regarding the whole monomial \monom', it is the case that the proportion of non-cancellations across each $X_\tup^{d_\tup}\in\encMon'$ multiply because non-cancelling terms for $X_\tup$ can only be joined with non-cancelling terms of $X_{\tup'}^{d_{\tup'}}\in\encMon'$ for $\tup\neq\tup'$. This then yields the fraction of cancelled monomials $1 - \prod_{i = 1}^{\ell}\bound^{d_i - 1}\leq \gamma \leq 1 - \bound^{-\inparen{k - 1}}$ where the inequalities take into account the fact that $\sum_{i = 1}^\ell d_i \leq k$.
Since this is true for arbitrary \monom, the bound follows for $\polyf\inparen{\circuit'}$.
\end{proof}
\qed
\subsection{Proof of \Cref{lem:val-ub}}\label{susec:proof-val-up}
\label{app:proof-lem-val-ub}
We will prove \Cref{lem:val-ub} by considering the two cases separately. We start by considering the case when $\circuit$ is a tree:
\begin{Lemma}
\label{lem:C-ub-tree}
Let $\circuit$ be a tree (i.e. the sub-circuits corresponding to two children of a node in $\circuit$ are completely disjoint). Then we have
\[\abs{\circuit}(1,\dots,1)\le \left(\size(\circuit)\right)^{\degree(\circuit)+1}.\]
\end{Lemma}
\begin{proof}[Proof of \Cref{lem:C-ub-tree}]
For notational simplicity define $N=\size(\circuit)$ and $k=\degree(\circuit)$.
We use induction on $\depth(\circuit)$ to show that $\abs{\circuit}(1,\ldots, 1) \leq N^{k+1 }$.
For the base case, we have that \depth(\circuit) $= 0$, and there can only be one node which must contain a coefficient or constant. In this case, $\abs{\circuit}(1,\ldots, 1) = 1$, and \size(\circuit) $= 1$, and by \Cref{def:degree} it is the case that $0 \leq k = \degree\inparen{\circuit} \leq 1$, and it is true that $\abs{\circuit}(1,\ldots, 1) = 1 \leq N^{k+1} = 1^{k + 1} = 1$ for $k \in \inset{0, 1}$.
Assume for $\ell > 0$ an arbitrary circuit \circuit of $\depth(\circuit) \leq \ell$ that it is true that $\abs{\circuit}(1,\ldots, 1) \leq N^{k+1 }$.
For the inductive step we consider a circuit \circuit such that $\depth(\circuit) = \ell + 1$. The sink can only be either a $\circmult$ or $\circplus$ gate. Let $k_\linput, k_\rinput$ denote \degree($\circuit_\linput$) and \degree($\circuit_\rinput$) respectively. Consider when sink node is $\circmult$.
Then note that
\begin{align}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1)\cdot \abs{\circuit_\rinput}(1,\ldots, 1) \nonumber\\
&\leq (N-1)^{k_\linput+1} \cdot (N - 1)^{k_\rinput+1}\nonumber\\
&= (N-1)^{k+1}\label{eq:sumcoeff-times-upper}\\
&\leq N^{k + 1}.\nonumber
\end{align}
In the above the first inequality follows from the inductive hypothesis (and the fact that the size of either subtree is at most $N-1$) and \Cref{eq:sumcoeff-times-upper} follows by \cref{def:degree} which states that for $k = \degree(\circuit)$ we have $k=k_\linput+k_\rinput+1$.
For the case when the sink gate is a $\circplus$ gate, then for $N_\linput = \size(\circuit_\linput)$ and $N_\rinput = \size(\circuit_\rinput)$ we have
\begin{align}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1) \circplus \abs{\circuit_\rinput}(1,\ldots, 1) \nonumber\\
&\leq
N_\linput^{k+1} + N_\rinput^{k+1}\nonumber\\
&\leq (N-1)^{k+1 } \label{eq:sumcoeff-plus-upper}\\
&\leq N^{k+1}.\nonumber
\end{align}
In the above, the first inequality follows from the inductive hypothes and \cref{def:degree} (which implies the fact that $k_\linput,k_\rinput\le k$). Note that the RHS of this inequality is maximized when the base and exponent of one of the terms is maximized. The second inequality follows from this fact as well as the fact that since $\circuit$ is a tree we have $N_\linput+N_\rinput=N-1$ and, lastly, the fact that $k\ge 0$. This completes the proof.
\end{proof}
The upper bound in \Cref{lem:val-ub} for the general case is a simple variant of the above proof (but we present a proof sketch of the bound below for completeness):
\begin{Lemma}
\label{lem:C-ub-gen}
Let $\circuit$ be a (general) circuit.
Then we have
\[\abs{\circuit}(1,\dots,1)\le 2^{2^{\degree(\circuit)}\cdot \depth(\circuit)}.\]
\end{Lemma}
\begin{proof}[Proof Sketch of \Cref{lem:C-ub-gen}]
We use the same notation as in the proof of \Cref{lem:C-ub-tree} and further define $d=\depth(\circuit)$. We will prove by induction on $\depth(\circuit)$ that $\abs{\circuit}(1,\ldots, 1) \leq 2^{2^k\cdot d }$. The base case argument is similar to that in the proof of \Cref{lem:C-ub-tree}. In the inductive case we have that $d_\linput,d_\rinput\le d-1$.
For the case when the sink node is $\times$, we get that
\begin{align*}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1)\circmult \abs{\circuit_\rinput}(1,\ldots, 1) \\
&\leq {2^{2^{k_\linput}\cdot d_\linput}} \circmult {2^{2^{k_\rinput}\cdot d_\rinput}}\\
&\leq 2^{2\cdot 2^{k-1}\cdot (d-1)}\\
&\leq 2^{2^k d}.
\end{align*}
In the above the first inequality follows from inductive hypothesis while the second inequality follows from the fact that $k_\linput,k_\rinput\le k-1$ and $d_\linput, d_\rinput\le d-1$, where we substitute the upperbound into every respective term.
Now consider the case when the sink node is $+$, we get that
\begin{align*}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1) \circplus \abs{\circuit_\rinput}(1,\ldots, 1) \\
&\leq 2^{2^{k_\linput}\cdot d_\linput} + 2^{2^{k_\rinput}\cdot d_\rinput}\\
&\leq 2\cdot {2^{2^k(d-1)} } \\
&\leq 2^{2^kd}.
\end{align*}
In the above the first inequality follows from the inductive hypothesis while the second inequality follows from the facts that $k_\linput,k_\rinput\le k$ and $d_\linput,d_\rinput\le d-1$. The final inequality follows from the fact that $k\ge 0$.
\qed
\end{proof}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

View File

@ -0,0 +1,27 @@
%root: main.tex
In the following definitions and examples, we use the following polynomial as an example:
\begin{equation}
\label{eq:poly-eg}
\poly(X, Y) = 2X^2 + 3XY - 2Y^2.
\end{equation}
\begin{Definition}[Pure Expansion]
The pure expansion of a polynomial $\poly$ is formed by computing all product of sums occurring in $\poly$, without combining like monomials. The pure expansion of $\poly$ generalizes \Cref{def:smb} by allowing monomials $m_i = m_j$ for $i \neq j$.
\end{Definition}
Note that similar in spirit to \Cref{def:reduced-bi-poly}, $\expansion{\circuit}$ \Cref{def:expand-circuit} reduces all variable exponents $e > 1$ to $e = 1$. Further, it is true that $\expansion{\circuit}$ is the pure expansion of $\circuit$.
\begin{Example}[Example of Pure Expansion]\label{example:expr-tree-T}
Consider the factorized representation $(X+ 2Y)(2X - Y)$ of the polynomial in \Cref{eq:poly-eg}.
Its circuit $\circuit$ is illustrated in \Cref{fig:circuit}.
The pure expansion of the product is $2X^2 - XY + 4XY - 2Y^2$. As an additional example of \Cref{def:expand-circuit}, $\expansion{\circuit}=[(X, 2), (XY, -1), (XY, 4), (Y, -2)]$.
\end{Example}
$\expansion{\circuit}$ effectively\footnote{The minor difference here is that $\expansion{\circuit}$ encodes the \emph{reduced} form over the SOP pure expansion of the compressed representation, as opposed to the \abbrSMB representation} encodes the \emph{reduced} form of $\polyf\inparen{\circuit}$, decoupling each monomial into a set of variables $\monom$ and a real coefficient $\coef$.
However, unlike the constraint on the input $\poly$ to compute $\rpoly$, the input circuit $\circuit$ does not need to be in \abbrSMB/SOP form.
\begin{Example}[Example for \Cref{def:positive-circuit}]\label{ex:def-pos-circ}
Using the same factorization from \Cref{example:expr-tree-T}, $\polyf(\abs{\circuit}) = (X + 2Y)(2X + Y) = 2X^2 +XY +4XY + 2Y^2 = 2X^2 + 5XY + 2Y^2$. Note that this \textit{is not} the same as the polynomial from \Cref{eq:poly-eg}. As an example of the slight abuse of notation we alluded to, $\polyf\inparen{\abs{\circuit}\inparen{1,\ldots, 1}} =2\inparen{1}^2 + 5\inparen{1}\inparen{1} + 2\inparen{1}^2 = 9$.
\end{Example}
\begin{Definition}[Subcircuit]
A subcircuit of a circuit $\circuit$ is a circuit \subcircuit such that \subcircuit is a DAG \textit{subgraph} of the DAG representing \circuit. The sink of \subcircuit has exactly one gate \gate.
\end{Definition}

View File

@ -0,0 +1,28 @@
%root:main.tex
\begin{algorithm}[t]
\caption{$\approxq(\circuit, \vct{p}, \conf, \error)$}
\label{alg:mon-sam}
\begin{algorithmic}[1]
\Require \circuit: Circuit
\Require $\vct{p} = (\prob_1,\ldots, \prob_\numvar)$ $\in [0, 1]^N$
\Require $\conf$ $\in [0, 1]$
\Require $\error$ $\in [0, 1]$
\Ensure \vari{acc} $\in \mathbb{R}$
\State $\accum \gets 0$\label{alg:mon-sam-global1}
\State $\numsamp \gets \ceil{\frac{2 \log{\frac{2}{\conf}}}{\error^2}}$\label{alg:mon-sam-global2}
\State $(\circuit_\vari{mod}, \vari{size}) \gets $ \onepass($\circuit$)\label{alg:mon-sam-onepass}\Comment{$\onepass$ is \Cref{alg:one-pass-iter}}
\For{$\vari{i} \in 1 \text{ to }\numsamp$}\label{alg:sampling-loop}\Comment{Perform the required number of samples}
\State $(\vari{M}, \vari{sgn}_\vari{i}) \gets $ \sampmon($\circuit_\vari{mod}$)\label{alg:mon-sam-sample}\Comment{\sampmon is \Cref{alg:sample}. Note that $\vari{sgn}_\vari{i}$ is the \emph{sign} of the monomial's coefficient and \emph{not} the coefficient itself}
\If{$\vari{M}$ has at most one variable from each block}\label{alg:check-duplicate-block}
\State $\vari{Y}_\vari{i} \gets \prod_{X_j\in\vari{M}}p_j$\label{alg:mon-sam-assign1}\Comment{\vari{M} is the sampled monomial's set of variables (cref. \cref{subsec:sampmon-remarks})}
\State $\vari{Y}_\vari{i} \gets \vari{Y}_\vari{i} \times\; \vari{sgn}_\vari{i}$\label{alg:mon-sam-product}
\State $\accum \gets \accum + \vari{Y}_\vari{i}$\Comment{Store the sum over all samples}\label{alg:mon-sam-add}
\EndIf
\EndFor
\State $\vari{acc} \gets \vari{acc} \times \frac{\vari{size}}{\numsamp}$\label{alg:mon-sam-global3}
\State \Return \vari{acc}
\end{algorithmic}
\end{algorithm}

View File

@ -0,0 +1,22 @@
%root: main.tex
\begin{proof}
We first argue that $\rpoly_{G}^\kElem(\prob,\ldots, \prob) = \sum\limits_{i = 0}^{2\kElem} c_i \cdot \prob^i$. First, since $\poly_G(\vct{X})$ has degree $2$, it follows that $\poly_G^\kElem(\vct{X})$ has degree $2\kElem$. By definition, $\rpoly_{G}^{\kElem}(\vct{X})$ sets every exponent $e > 1$ to $e = 1$, which means that $\degree(\rpoly_{G}^\kElem)\le \degree(\poly_G^\kElem)= 2k$. Thus, if we think of $\prob$ as a variable, then $\rpoly_{G}^{\kElem}(\prob,\dots,\prob)$ is a univariate polynomial of degree at most $\degree(\rpoly_{G}^\kElem)\le 2k$. Thus, we can write
\begin{equation*}
\rpoly_{G}^{\kElem}(\prob,\ldots, \prob) = \sum_{i = 0}^{2\kElem} c_i \prob^i
\end{equation*}
We note that $c_i$ is {\em exactly} the number of monomials in the SMB expansion of $\poly_{G}^{\kElem}(\vct{X})$ composed of $i$ distinct variables.\footnote{Since $\rpoly_G^\kElem(\vct{X})$ does not have any monomial with degree $< 2$, it is the case that $c_0 = c_1 = 0$ but for the sake of simplcity we will ignore this observation.}
Given that we then have $2\kElem + 1$ distinct values of $\rpoly_{G}^\kElem(\prob,\ldots, \prob)$ for $0\leq i\leq2\kElem$, it follows that we have a linear system of the form $\vct{M} \cdot \vct{c} = \vct{b}$ where the $i$th row of $\vct{M}$ is $\inparen{\prob_i^0\ldots\prob_i^{2\kElem}}$, $\vct{c}$ is the coefficient vector $\inparen{c_0,\ldots, c_{2\kElem}}$, and $\vct{b}$ is the vector such that $\vct{b}[i] = \rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$. In other words, matrix $\vct{M}$ is the Vandermonde matrix, from which it follows that we have a matrix with full rank (the $p_i$'s are distinct), and we can solve the linear system in $O(k^3)$ time (e.g., using Gaussian Elimination) to determine $\vct{c}$ exactly.
Thus, after $O(k^3)$ work, we know $\vct{c}$ and in particular, $c_{2k}$ exactly.
Next, we show why we can compute $\numocc{G}{\kmatch}$ from $c_{2k}$ in $O(1)$ additional time.
We claim that $c_{2\kElem}$ is $\kElem! \cdot \numocc{G}{\kmatch}$. This can be seen intuitively by looking at the expansion of the original factorized representation
\[\poly_{G}^\kElem(\vct{X}) = \sum_{\substack{(i_1, j_1),\cdots,(i_\kElem, j_\kElem) \in E}}X_{i_1}X_{j_1}\cdots X_{i_\kElem}X_{j_\kElem},\]
where a unique $\kElem$-matching in the multi-set of product terms can be selected $\prod_{i = 1}^\kElem i = \kElem!$ times.
Indeed, note that each $\kElem$-matching $(i_1, j_1)\ldots$ $(i_k, j_k)$ in $G$ corresponds to the monomial $\prod_{\ell = 1}^\kElem X_{i_\ell}X_{j_\ell}$ in $\poly_{G}^\kElem(\vct{X})$, with distinct indexes, and this implies that each distinct $\kElem$-matching appears the exact number of permutations that exist for its particular set of $\kElem$ edges, or $k!$.
Since, as noted earlier, $c_{2\kElem}$ represents the number of monomials with $2\kElem$ distinct variables, then it must be that $c_{2\kElem}$ is the overall number of $\kElem$-matchings. And since we have $\kElem!$ copies of each distinct $\kElem$-matching, it follows that
$c_{2\kElem}= \kElem! \cdot \numocc{G}{\kmatch}$.
Thus, simply dividing $c_{2\kElem}$ by $\kElem!$ gives us $\numocc{G}{\kmatch}$, as needed. \qed
\end{proof}

78
arXiv/app_hard_linsys.tex Normal file
View File

@ -0,0 +1,78 @@
%root: main.tex
\begin{proof}
The proof consists of two parts. First we need to show that a vector $\vct{b}$ satisfying the linear system exists and further can be computed in $O(m)$ time. Second we need to show that $\numocc{G}{\tri}, \numocc{G}{\threedis}$ can indeed be computed in time $O(1)$.
The lemma claims that for $\vct{M} =
\begin{pmatrix}
1 - 3p & -(3\prob^2 - \prob^3)\\
10(3\prob^2 - \prob^3) & 10(3\prob^2 - \prob^3)
\end{pmatrix}$, $\vct{x} =
\begin{pmatrix}
\numocc{G}{\tri}]\\
\numocc{G}{\threedis}
\end{pmatrix}$
satisfies the linear system $\vct{M} \cdot \vct{x} = \vct{b}$.
To prove the first step, we use \Cref{lem:qE3-exp} to derive the following equality (dropping the superscript and referring to $G^{(1)}$ as $G$):
\begin{align}
\numocc{G}{\ed}\prob^2 &+ 6\numocc{G}{\twopath}\prob^3 + 6\numocc{G}{\twodis}\prob^4 + 6\numocc{G}{\tri}\prob^3 + 6\numocc{G}{\oneint}\prob^4 \nonumber\\
&+ 6\numocc{G}{\threepath}\prob^4 + 6\numocc{G}{\twopathdis}\prob^5 + 6\numocc{G}{\threedis}\prob^6 = \rpoly_{G}^3(\prob,\ldots, \prob)\label{eq:lem-qE3-exp}\\
\numocc{G}{\tri}&+\numocc{G}{\threepath}\prob+\numocc{G}{\twopathdis}\prob^2+\numocc{G}{\threedis}\prob^3\nonumber\\
&= \frac{\rpoly_{G}^3(\prob,\ldots, \prob)}{6\prob^3} - \frac{\numocc{G}{\ed}}{6\prob} - \numocc{G}{\twopath}-\numocc{G}{\twodis}\prob-\numocc{G}{\oneint}\prob\label{eq:b1-alg-1}\\
\numocc{G}{\tri}(1-3p) &- \numocc{G}{\threedis}(3\prob^2 -\prob^3) = \nonumber\\
\frac{\rpoly_{G}^3(\prob,\ldots, \prob)}{6\prob^3} &- \frac{\numocc{G}{\ed}}{6\prob} - \numocc{G}{\twopath}-\numocc{G}{\twodis}\prob-\numocc{G}{\oneint}\prob\nonumber\\
&-\left[\numocc{G}{\threepath}\prob+3\numocc{G}{\tri}\prob\right]-\left[\numocc{G}{\twopathdis}\prob^2+3\numocc{G}{\threedis}\prob^2\right]\label{eq:b1-alg-2}
\end{align}
\Cref{eq:lem-qE3-exp} is the result of \Cref{lem:qE3-exp}. We obtain the remaining equations through standard algebraic manipulations.
Note that the LHS of \Cref{eq:b1-alg-2} is obtained using \cref{eq:2pd-3d} and \cref{eq:3p-3tri} and is indeed the product $\vct{M}[1] \cdot \vct{x}[1]$. Further note that this product is equal to the RHS of \Cref{eq:b1-alg-2}, where every term is computable in $O(m)$ time (by equations (\ref{eq:1e})-(\ref{eq:3p-3tri})). We set $\vct{b}[1]$ to the RHS of \Cref{eq:b1-alg-2}.
We follow the same process in deriving an equality for $G^{(2)}$. Replacing occurrences of $G$ with $G^{(2)}$, we obtain an equation (below) of the form of \cref{eq:b1-alg-2} for $G^{(2)}$. Substituting identities from \cref{lem:3m-G2} and \Cref{lem:tri} we obtain
\begin{align}
0-\left(8\numocc{G}{\threedis}\right.&\left.+6\numocc{G}{\twopathdis}+4\numocc{G}{\oneint}+4\numocc{G}{\threepath}+2\numocc{G}{\tri}\right)(3\prob^2 -\prob^3)=\nonumber\\
&\frac{\rpoly_{\graph{2}}^3(\prob,\ldots, \prob)}{6\prob^3} - \frac{\numocc{\graph{2}}{\ed}}{6\prob} - \numocc{\graph{2}}{\twopath}-\numocc{\graph{2}}{\twodis}\prob-\numocc{\graph{2}}{\oneint}\prob\nonumber\\
&-\left[\numocc{\graph{2}}{\twopathdis}\prob^2+3\numocc{\graph{2}}{\threedis}\prob^2\right]-\left[\numocc{\graph{2}}{\threepath}\prob + 3\numocc{\graph{2}}{\tri}\prob\right]\label{eq:b2-sub-lem}\\
(10\numocc{G}{\tri} &+ 10{G}{\threedis})(3\prob^2 -\prob^3) = \nonumber\\
&\frac{\rpoly_{\graph{2}}^3(\prob,\ldots, \prob)}{6\prob^3} - \frac{\numocc{\graph{2}}{\ed}}{6\prob} - \numocc{\graph{2}}{\twopath}-\numocc{\graph{2}}{\twodis}\prob-\numocc{\graph{2}}{\oneint}\prob\nonumber\\
&-\left[\numocc{\graph{2}}{\threepath}\prob+3\numocc{\graph{2}}{\tri}\prob\right]-\left[\numocc{\graph{2}}{\twopathdis}\prob^2-3\numocc{\graph{2}}{\threedis}\prob^2\right]\nonumber\\
&+\left(4\numocc{G}{\oneint}+\left[6\numocc{G}{\twopathdis}+18\numocc{G}{\threedis}\right]+\left[4\numocc{G}{\threepath}+12\numocc{G}{\tri}\right]\right)(3\prob^2 - \prob^3)\label{eq:b2-final}
\end{align}
The steps to obtaining \cref{eq:b2-final} are analogous to the derivation immediately preceding. As in the previous derivation, note that the LHS of \Cref{eq:b2-final} is the same as $\vct{M}[2]\cdot \vct{x}[2]$. The RHS of \Cref{eq:b2-final} has terms all computable (by equations (\ref{eq:1e})-(\ref{eq:3p-3tri})) in $O(m)$ time. Setting $\vct{b}[2]$ to the RHS then completes the proof of step 1.
Note that if $\vct{M}$ has full rank then one can compute $\numocc{G}{\tri}$ and $\numocc{G}{\threedis}$ in $O(1)$ using Gaussian elimination.
To show that $\vct{M}$ indeed has full rank, we show in what follows that $\dtrm{\vct{M}}\ne 0$ for every $\prob\in (0,1)$.
$\dtrm{\vct{M}} = $
\begin{align}
&\begin{vmatrix}
1-3\prob &-(3\prob^2 - \prob^3)\\
10(3\prob^2 - \prob^3) &10(3\prob^2 - \prob^3)
\end{vmatrix}
= (1-3\prob)\cdot 10(3\prob^2-\prob^3) +10(3\prob^2-\prob^3)\cdot(3\prob^2 - \prob^3)\nonumber\\
&=10(3\prob^2-\prob^3)\cdot(1-3\prob+3\prob^2-\prob^3) = 10(3\prob^2-\prob^3)\cdot(-\prob^3+3\prob^2-3\prob + 1)\nonumber\\
&=10\prob^2(3 - \prob)\cdot(1-\prob)^3\label{eq:det-final}
\end{align}
From \Cref{eq:det-final} it can easily be seen that the roots of $\dtrm{\vct{M}}$ are $0, 1,$ and $3$. Hence there are no roots in $(0, 1)$ and \Cref{lem:lin-sys} follows.
\qed
\end{proof}
\subsection{Proof of \Cref{th:single-p}}
\begin{proof}
We can compute $\graph{2}$ from $\graph{1}$ in $O(m)$ time. Additionally, if in time $O(T(m))$, we have $\rpoly_{\graph{\ell}}^3(\prob,\dots,\prob)$ for $\ell\in [2]$, then the theorem follows by \Cref{lem:lin-sys}.
\qed
\end{proof}
In other words, if \Cref{th:single-p} holds, then so must \Cref{th:single-p-hard}.
\subsection{Proof of \Cref{th:single-p-hard}}
\begin{proof}
For the sake of contradiction, assume that for any $G$, we can compute $\rpoly_{G}^3(\prob,\dots,\prob)$ in $o\inparen{m^{1+\eps_0}}$ time.
Let $G$ be the input graph.
Then by \Cref{th:single-p} we can compute $\numocc{G}{\tri}$ in further time $o\inparen{m^{1+\eps_0}}+O(m)$. Thus, the overall, reduction takes $o\inparen{m^{1+\eps_0}}+O(m)= o\inparen{m^{1+\eps_0}}$ time, which violates \Cref{conj:graph}.
\qed
\end{proof}

View File

@ -0,0 +1,21 @@
%root: main.tex
We need all the possible edge patterns in an arbitrary $G$ with at most three distinct edges. We have already seen $\tri,\threepath$ and $\threedis$, so we define the remaining patterns:
\begin{itemize}
\item Single Edge $\left(\ed\right)$
\item 2-path ($\twopath$)
\item 2-matching ($\twodis$)
\item 3-star ($\oneint$)--this is the graph that results when all three edges share exactly one common endpoint. The remaining endpoint for each edge is disconnected from any endpoint of the remaining two edges.
\item Disjoint Two-Path ($\twopathdis$)--this subgraph consists of a two-path and a remaining disjoint edge.
\end{itemize}
For any graph $G$, the following formulas for $\numocc{G}{H}$ compute their respective patterns exactly in $O(\numedge)$ time, with $d_i$ representing the degree of vertex $i$ (proofs are in \Cref{app:easy-counts}):
\begin{align}
&\numocc{G}{\ed} = \numedge, \label{eq:1e}\\
&\numocc{G}{\twopath} = \sum_{i \in V} \binom{d_i}{2} \label{eq:2p}\\
&\numocc{G}{\twodis} = \sum_{(i, j) \in E} \frac{\numedge - d_i - d_j + 1}{2}\label{eq:2m}\\
&\numocc{G}{\oneint} = \sum_{i \in V} \binom{d_i}{3}\label{eq:3s}\\
&\numocc{G}{\twopathdis} + 3\numocc{G}{\threedis} = \sum_{(i, j) \in E} \binom{\numedge - d_i - d_j + 1}{2}\label{eq:2pd-3d}\\
&\numocc{G}{\threepath} + 3\numocc{G}{\tri} = \sum_{(i, j) \in E} (d_i - 1) \cdot (d_j - 1)\label{eq:3p-3tri}
\end{align}

View File

@ -0,0 +1,79 @@
%root: main.tex
\subsection{Tools to prove \Cref{th:single-p-hard}}
Note that $\rpoly_{G}^3(\prob,\ldots, \prob)$ as a polynomial in $\prob$ has degree at most six. Next, we figure out the exact coefficients since this would be useful in our arguments:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Lemma}\label{lem:qE3-exp}
For any $\prob$, we have:
{\small
\begin{align}
\rpoly_{G}^3(\prob,\ldots, \prob) &= \numocc{G}{\ed}\prob^2 + 6\numocc{G}{\twopath}\prob^3 + 6\numocc{G}{\twodis}\prob^4 + 6\numocc{G}{\tri}\prob^3\nonumber\\
&+ 6\numocc{G}{\oneint}\prob^4 + 6\numocc{G}{\threepath}\prob^4 + 6\numocc{G}{\twopathdis}\prob^5 + 6\numocc{G}{\threedis}\prob^6.\label{claim:four-one}
\end{align}}
\end{Lemma}
\subsubsection{Proof for \Cref{lem:qE3-exp}}
\begin{proof}
By definition we have that
\[\poly_{G}^3(\vct{X}) = \sum_{\substack{(i_1, j_1), (i_2, j_2), (i_3, j_3) \in E}}~\; \prod_{\ell = 1}^{3}X_{i_\ell}X_{j_\ell}.\]
Hence $\rpoly_{G}^3(\vct{X})$ has degree six. Note that the monomial $\prod_{\ell = 1}^{3}X_{i_\ell}X_{j_\ell}$ will contribute to the coefficient of $\prob^\nu$ in $\rpoly_{G}^3(\vct{X})$, where $\nu$ is the number of distinct variables in the monomial.
Let $e_1 = (i_1, j_1), e_2 = (i_2, j_2),$ and $e_3 = (i_3, j_3)$.
We compute $\rpoly_{G}^3(\vct{X})$ by considering each of the three forms that the triple $(e_1, e_2, e_3)$ can take.
\textsc{case 1:} $e_1 = e_2 = e_3$ (all edges are the same). When we have that $e_1 = e_2 = e_3$, then the monomial corresponds to $\numocc{G}{\ed}$. There are exactly $\numedge$ such triples, each with a $\prob^2$ factor in $\rpoly_{G}^3\left(\prob,\ldots, \prob\right)$.
\textsc{case 2:} This case occurs when there are two distinct edges of the three, call them $e$ and $e'$. When there are two distinct edges, there is then the occurence when $2$ variables in the triple $(e_1, e_2, e_3)$ are bound to $e$. There are three combinations for this occurrence in $\poly_{G}^3(\vct{X})$. Analogusly, there are three such occurrences in $\poly_{G}^3(\vct{X})$ when there is only one occurrence of $e$, i.e. $2$ of the variables in $(e_1, e_2, e_3)$ are $e'$.
This implies that all $3 + 3 = 6$ combinations of two distinct edges $e$ and $e'$ contribute to the same monomial in $\rpoly_{G}^3$.
Since $e\ne e'$, this case produces the following edge patterns: $\twopath, \twodis$, which contribute $6\prob^3$ and $6\prob^4$ respectively to $\rpoly_{G}^3\left(\prob,\ldots, \prob\right)$.
\textsc{case 3:} All $e_1,e_2$ and $e_3$ are distinct. For this case, we have $3! = 6$ permutations of $(e_1, e_2, e_3)$, each of which contribute to the same monomial in the \textsc{SMB} representation of $\poly_{G}^3(\vct{X})$. This case consists of the following edge patterns: $\tri, \oneint, \threepath, \twopathdis, \threedis$, which contribute $6\prob^3, 6\prob^4, 6\prob^4, 6\prob^5$ and $6\prob^6$ respectively to $\rpoly_{G}^3\left(\prob,\ldots, \prob\right)$.
\qed
\end{proof}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Since $\prob$ is fixed, \Cref{lem:qE3-exp} gives us one linear equation in $\numocc{G}{\tri}$ and $\numocc{G}{\threedis}$ (we can handle the other counts due to equations (\ref{eq:1e})-(\ref{eq:3p-3tri})). However, we need to generate one more independent linear equation in these two variables. Towards this end we generate another graph related to $G$:
\begin{Definition}\label{def:Gk}
For $\ell \geq 1$, let graph $\graph{\ell}$ be a graph generated from an arbitrary graph $G$, by replacing every edge $e$ of $G$ with an $\ell$-path, such that all inner vertexes of an $\ell$-path replacement edge are disjoint from all other vertexes.\footnote{Note that $G\equiv \graph{1}$.}.
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We will prove \Cref{th:single-p-hard} by the following reduction:
\begin{Theorem}\label{th:single-p}
Fix $\prob\in (0,1)$. Let $G$ be a graph on $\numedge$ edges.
If we can compute $\rpoly_{G}^3(\prob,\dots,\prob)$ exactly in $T(\numedge)$ time, then we can exactly compute $\numocc{G}{\tri}$
in $O\inparen{T(\numedge) + \numedge}$ time.
\end{Theorem}
For clarity, we repeat the notion of $\numocc{G}{H}$ to mean the count of subgraphs in $G$ isomorphic to $H$.
The following lemmas relate these counts in $\graph{2}$ to $\graph{1}$ ($G$). The lemmas are used to prove \Cref{lem:lin-sys}.
\begin{Lemma}\label{lem:3m-G2}
The $3$-matchings in graph $\graph{2}$ satisfy the identity:
\begin{align*}
\numocc{\graph{2}}{\threedis} &= 8 \cdot \numocc{\graph{1}}{\threedis} + 6 \cdot \numocc{\graph{1}}{\twopathdis}\\
&+ 4 \cdot \numocc{\graph{1}}{\oneint} + 4 \cdot \numocc{\graph{1}}{\threepath} + 2 \cdot \numocc{\graph{1}}{\tri}.
\end{align*}
\end{Lemma}
\begin{Lemma}\label{lem:tri}
For $\ell > 1$ and any graph $\graph{\ell}$, $\numocc{\graph{\ell}}{\tri} = 0$.
\end{Lemma}
Finally, the following result immediately implies \Cref{th:single-p}:
\begin{Lemma}\label{lem:lin-sys}
Fix $\prob\in (0,1)$. Given $\rpoly_{\graph{\ell}}^3(\prob,\dots,\prob)$ for $\ell\in [2]$, we can compute in $O(m)$ time a vector $\vct{b}\in\mathbb{R}^3$ such that
\[ \begin{pmatrix}
1 - 3p & -(3\prob^2 - \prob^3)\\
10(3\prob^2 - \prob^3) & 10(3\prob^2 - \prob^3)
\end{pmatrix}
\cdot
\begin{pmatrix}
\numocc{G}{\tri}]\\
\numocc{G}{\threedis}
\end{pmatrix}
=\vct{b},
\]
allowing us to compute $\numocc{G}{\tri}$ and $\numocc{G}{\threedis}$ in $O(1)$ time.
\end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

View File

@ -0,0 +1,157 @@
%root: main.tex
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{\Cref{lem:pdb-for-def-qk}}
\begin{Lemma}\label{lem:pdb-for-def-qk}
Assuming that each $v \in \vset$ has degree $\geq 1$,\footnote{This is WLOG, since any vertex with degree $0$ can be dropped without affecting the result of our hard query.} the \abbrPDB relations encoding the edges for $\poly_G^\kElem$ of \Cref{def:qk} can be computed in $\bigO{\numedge}$ time.
\end{Lemma}
\begin{proof}[Proof of \Cref{lem:pdb-for-def-qk}]
Only two relations need be constructed, one for the set $\vset$ and one for the set $\edgeSet$. By a simple linear scan, each can be constructed in time $\bigO{\numedge + \numvar}$. Given that the degree of each $v \in \vset$ is at least $1$, we have that $m\ge \Omega(n)$,
and this yields the claimed runtime.
\qed
\end{proof}
\subsection{Proof of \Cref{lem:tdet-om}}
\begin{proof}
By the recursive defintion of $\qruntimenoopt{\cdot, \cdot}$ (see \Cref{sec:gen}), we have the following equation for our hard query $\query$ when $k = 1$, (we denote this as $\query^1$).
\begin{equation*}
\qruntimenoopt{\query^1, \tupset} = \abs{\tupset.\vset} + \abs{\tupset.\edgeSet} + \abs{\tupset.\vset} + \jointime{\tupset.\vset , \tupset.\edgeSet , \tupset.\vset}.
\end{equation*}
We argue that $\jointime{\tupset.\vset , \tupset.\edgeSet , \tupset.\vset}$ is at most $O(\numedge)$ by noting that there exists an algorithm that computes $\tupset.\vset\join\tupset.\edgeSet\join\tupset.\vset$ in the same runtime\footnote{Indeed the trivial algorithm that computes the obvious pair-wise joins has the claimed runtime. That is, we first compute $\tupset.\vset\join\tupset.\edgeSet$, which takes $O(m)$ (assuming $\tupset.\vset$ is stored in hash map) since tuples in $\tupset.\vset$ can only filter tuples in $\tupset.\edgeSet$. The resulting subset of tuples in $\tupset.\edgeSet$ are then again joined (on the right) with $\tupset.\vset$, which by the same argument as before also takes $O(m)$ time, as desried.}. Then by the assumption of \Cref{lem:pdb-for-def-qk} (each $v \in \vset$ has degree $\geq 1$), the sum of the first three terms is $\bigO{\numedge}$. We then obtain that $\qruntimenoopt{\query^1, \tupset} = \bigO{\numedge} + \bigO{\numedge} = \bigO{\numedge}$. For $\query^k = \query_1^1 \times\cdots\times\query_k^1$, we have the recurrence $\qruntimenoopt{\query^k, \tupset} = \qruntimenoopt{\query_1^1, \tupset} + \cdots +\qruntimenoopt{\query_k^1, \tupset} + \jointime{\query_1^1,\cdots,\query_k^1}$. Since $\query^1$ outputs a count, computing the join $\query_1^1\join\cdots\join\query_k^1$ is just multiplying $k$ numbers, which takes $O(k)$ time. Thus, we have
\[\qruntimenoopt{\query^k, \tupset} \le k\cdot O(m)+O(k)\le O(km),\]
as desired.
\qed
\end{proof}
\subsection{\Cref{lem:qEk-multi-p}}
\noindent The following lemma reduces the problem of counting $\kElem$-matchings in a graph to our problem (and proves \Cref{thm:mult-p-hard-result}):
\begin{Lemma}\label{lem:qEk-multi-p}
Let $\prob_0,\ldots, \prob_{2\kElem}$ be distinct values in $(0, 1]$. Then given the values $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for $0\leq i\leq 2\kElem$, the number of $\kElem$-matchings in $G$ can be computed in $\bigO{\kElem^3}$ time.
\end{Lemma}
\subsection{Proof of Lemma~\ref{lem:qEk-multi-p}}\label{subsec:c2k-proportional}
\input{app_hard_lem-mult-p}
\subsection{Proof of Theorem~\ref{thm:mult-p-hard-result}}
\begin{proof}
For the sake of contradiction, assume we can solve our problem in $\littleo{\kmatchtime}$ time. Given a graph $G$ by \Cref{lem:pdb-for-def-qk} we can compute the \abbrPDB encoding in $\bigO{\numedge}$ time. Then after we run our algorithm on $\rpoly_G^\kElem$, we get $\rpoly_{G}^\kElem(\prob_i,\ldots, \prob_i)$ for every $0\leq i\leq 2\kElem$ in additional $\bigO{k}\cdot \littleo{\kmatchtime}$ time. \Cref{lem:qEk-multi-p} then computes the number of $k$-matchings in $G$ in $O(\kElem^3)$ time. Adding the runtime of all of these steps, we have an algorithm for computing the number of $k$-matchings that runs in time
\begin{align}
&\bigO{\numedge} + \bigO{k}\cdot \littleo{\kmatchtime} + O(\kElem^3)\label{eq:proof-omega-kmatch2}\\
&\le \littleo{\kmatchtime}\label{eq:proof-omega-kmatch4}.
\end{align}
We obtain \Cref{eq:proof-omega-kmatch4} from the facts that $k$ is fixed (related to $m$) and the assumption that $\kmatchtime\ge\omega(m)$.
Thus we obtain the contradiction that we can achieve a runtime $\littleo{\kmatchtime}$ that is better than the optimal time $\kmatchtime$ required to compute $k$-matchings.
\qed
\end{proof}
\subsection{Subgraph Notation and $O(1)$ Closed Formulas}
\input{app_hard_notation-easy-counts}
\subsection{Proofs of \Cref{eq:1e}-\Cref{eq:3p-3tri}}
\label{app:easy-counts}
The proofs for \Cref{eq:1e}, \Cref{eq:2p} and \Cref{eq:3s} are immediate.
\begin{proof}[Proof of \Cref{eq:2m}]
For edge $(i, j)$ connecting arbitrary vertices $i$ and $j$, finding all other edges in $G$ disjoint to $(i, j)$ is equivalent to finding all edges that are not connected to either vertex $i$ or $j$. The number of such edges is $m - d_i - d_j + 1$, where we add $1$ since edge $(i, j)$ is removed twice when subtracting both $d_i$ and $d_j$. Since the summation is iterating over all edges such that a pair $\left((i, j), (k, \ell)\right)$ will also be counted as $\left((k, \ell), (i, j)\right)$, division by $2$ then eliminates this double counting. Note that $m$ and $d_i$ for all $i \in V$ can be computed in one pass over the set of edges by simply maintaining counts for each quantity. Finally, the summation is also one traversal through the set of edges where each operation is either a lookup ($O(1)$ time) or an addition operation (also $O(1)$) time.
\qed
\end{proof}
\begin{proof}[Proof of \Cref{eq:2pd-3d}]
\Cref{eq:2pd-3d} is true for similar reasons. For edge $(i, j)$, it is necessary to find two additional edges, disjoint or connected. As in our argument for \Cref{eq:2m}, once the number of edges disjoint to $(i, j)$ have been computed, then we only need to consider all possible combinations of two edges from the set of disjoint edges, since it doesn't matter if the two edges are connected or not. Note, the factor $3$ of $\threedis$ is necessary to account for the triple counting of $3$-matchings, since it is indistinguishable to the closed form expression which of the remaining edges are either disjoint or connected to each of the edges in the {\emph{initial}} set of edges disjoint to the edge under consideration. Observe that the disjoint case will be counted $3$ times since each edge of a $3$-path is visited once, and the same $3$-path counted in each visitation. For the latter case however, it is true that since the two path in $\twopathdis$ is connected, there will be no multiple counting by the fact that the summation automatically disconnects the current edge, meaning that a two matching at the current vertex under consideration will not be counted. Thus, $\twopathdis$ will only be counted once, precisely when the single disjoint edge is visited in the summation. The sum over all such edge combinations is precisely then $\numocc{G}{\twopathdis} + 3\numocc{G}{\threedis}$. Note that all factorials can be computed in $O(m)$ time, and then each combination $\binom{n}{2}$ can be performed with constant time operations, yielding the claimed $O(m)$ run time.
\qed
\end{proof}
\begin{proof}[Proof of \Cref{eq:3p-3tri}]
To compute $\numocc{G}{\threepath}$, note that for an arbitrary edge $(i, j)$, a 3-path exists for edge pair $(i, \ell)$ and $(j, k)$ where $i, j, k, \ell$ are distinct. Further, the quantity $(d_i - 1) \cdot (d_j - 1)$ represents the number of 3-edge subgraphs with middle edge $(i, j)$ and outer edges $(i, \ell), (j, k)$ such that $\ell \neq j$ and $k \neq i$. When $k = \ell$, the resulting subgraph is a triangle, and when $k \neq \ell$, the subgraph is a 3-path. Summing over all edges (i, j) gives \Cref{eq:3p-3tri} by observing that each triangle is counted thrice, while each 3-path is counted just once. For reasons similar to \Cref{eq:2m}, all $d_i$ can be computed in $O(m)$ time and each summand can then be computed in $O(1)$ time, yielding an overall $O(m)$ run time.
\qed
\end{proof}
\input{app_hard_single-p-proof-defs}
\subsection{Proofs for \Cref{lem:3m-G2}, \Cref{lem:tri}, and \Cref{lem:lin-sys}}\label{subsec:proofs-struc-lemmas}
Before proceeding, let us introduce a few more helpful definitions.
\begin{Definition}[$\esetType{\ell}$]\label{def:ed-nota}
For $\ell > 1$, we use $\esetType{\ell}$ to denote the set of edges in $\graph{\ell}$. For any graph $\graph{\ell}$, its edges are denoted by the a pair $(e, b)$, such that $b \in \{0,\ldots, \ell-1\}$ where $(e,0),\dots,(e,\ell-1)$ is the $\ell$-path that replaces the edge $e$ for $e\in \esetType{1}$.
\end{Definition}
\begin{Definition}[$\eset{\ell}$]
Given an arbitrary subgraph $\sg{1}$ of $\graph{1}$, let $\eset{1}$ denote the set of edges in $\sg{1}$. Define then $\eset{\ell}$ for $\ell > 1$ as the set of edges in the generated subgraph $\sg{\ell}$ (i.e. when we apply \Cref{def:Gk} to $S$ to generate $\sg{\ell}$).
\end{Definition}
For example, consider $\sg{1}$ with edges $\eset{1} = \{e_1\}$. Then the edge set of $\sg{2}$ is defined as $\eset{2} = \{(e_1, 0), (e_1, 1)\}$.
\begin{Definition}[$\binom{\edgeSet}{t}$ and $\binom{\edgeSet}{\leq t}$]\label{def:ed-sub}
Let $\binom{E}{t}$ denote the set of subsets in $E$ with exactly $t$ edges. In a similar manner, $\binom{E}{\leq t}$ is used to mean the subsets of $E$ with $t$ or fewer edges.
\end{Definition}
The following function $f_\ell$ is a mapping from every $3$-edge shape in $\graph{\ell}$ to its `projection' in $\graph{1}$.
\begin{Definition}\label{def:fk}
Let $f_\ell: \binom{\esetType{\ell}}{3} \rightarrow \binom{\esetType{1}}{\leq3}$ be defined as follows. For any element $s \in \binom{\esetType{\ell}}{3}$ such that $s = \pbrace{(e_1, b_1), (e_2, b_2), (e_3, b_3)}$, define:
\[ f_\ell\left(\pbrace{(e_1, b_1), (e_2, b_2), (e_3, b_3)}\right) = \pbrace{e_1, e_2, e_3}.\]
\end{Definition}
\begin{Definition}[$f_\ell^{-1}$]\label{def:fk-inv}
For an arbitrary subgraph $\sg{1}$ of $\graph{1}$ with at most $m \leq 3$ edges, the inverse function $f_\ell^{-1}: \binom{\esetType{1}}{\leq 3}\rightarrow 2^{\binom{\esetType{\ell}}{3}}$ takes $\eset{1}$ and outputs the set of all elements $s \in \binom{\eset{\ell}}{3}$ such that
$f_\ell(s) = \eset{1}$.
\end{Definition}
Note, importantly, that when we discuss $f_\ell^{-1}$, that each \textit{edge} present in $\eset{1}$ must have an edge in $s\in f_\ell^{-1}(\eset{1})$ that projects down to it. In particular, if $|\eset{1}| = 3$, then it must be the case that each $s\in f_\ell^{-1}(\eset{1})$ consists of the following set of edges: $\{ (e_i, b), (e_j, b'), (e_m, b'') \}$, where $i,j$ and $m$ are distinct.
We are now ready to prove the structural lemmas.
To prove the structural lemmas, we will
count the number of occurrences of $\tri$ and $\threedis$ in $\graph{\ell}$ we count for each $S\in\binom{E_1}{\le 3}$, how many $\threedis$ and $\tri$ subgraphs appear in $f_\ell^{-1}(\eset{1})$.
\subsubsection{Proof of Lemma \ref{lem:3m-G2}}
\begin{proof}
For each subset $\eset{1}\in \binom{E_1}{\le 3}$, we count the number of {\emph{$3$-matchings }}in the $3$-edge subgraphs of $\graph{2}$ in $f_2^{-1}(\eset{1})$. We first consider the case of $\eset{1} \in \binom{E_1}{3}$, where $\eset{1}$ is composed of the edges $e_1, e_2, e_3$ and $f_2^{-1}(\eset{1})$ is the set of all $3$-edge subsets $s \in \{(e_1, 0), (e_1, 1), (e_2, 0), (e_2, 1),$ $(e_3, 0), (e_3, 1)\}$ such that $f_\ell(s) = \{e_1, e_2, e_3\}$. The size of the output is denoted $\abs{f_2^{-1}(\esetType{1})}$. For the case where each set of edges of the form $\{(e_1, b_1), (e_2, b_2), (e_3, b_3)\}$ for $b_i \in [2], i \in [3]$ is present, we have $\abs{f_2^{-1}(\esetType{1})} = 8$. We count the number of $3$-matchings from the set $f_2^{-1}(\eset{1})$.
We do a case analysis based on the subgraph $\sg{1}$ induced by $\eset{1}$.
\begin{itemize}
\item $3$-matching ($\threedis$)
\end{itemize}
When $\sg{1}$ is isomorphic to $\threedis$, it is the case that edges in $\eset{2}$ are {\em not} disjoint only for the pairs $(e_i, 0), (e_i, 1)$ for $i\in \{1,2,3\}$. By definition, each set of edges in $f_2^{-1}\inparen{\eset{1}}$ is a three matching and $\abs{f_2^{-1}\inparen{\eset{1}}} = 8$ possible 3-matchings.
\begin{itemize}
\item Disjoint Two-Path ($\twopathdis$)
\end{itemize}
For $\sg{1}$ isomorphic to $\twopathdis$ edges $e_2, e_3$ form a $2$-path with $e_1$ being disjoint. This means that in $\sg{2}$ edges $(e_2, 0), (e_2, 1), (e_3, 0), (e_3, 1)$ form a $4$-path while $(e_1, 0), (e_1, 1)$ is its own disjoint $2$-path. We can pick either $(e_1, 0)$ or $(e_1, 1)$ for the first edge in the $3$-matching, while it is necessary to have a $2$-matching from path $(e_2, 0),\ldots(e_3, 1)$. Note that the $4$-path allows for three possible $2$-matchings, specifically,
\begin{equation*}
\pbrace{(e_2, 0), (e_3, 0)}, \pbrace{(e_2, 0), (e_3, 1)}, \pbrace{(e_2, 1), (e_3, 1)}.
\end{equation*}
Since these two selections can be made independently, $\abs{f_2^{-1}\inparen{\eset{1}}} = 2 \cdot 3 = 6$ \emph{distinct} $3$-matchings in $f_2^{-1}(\eset{1})$.
\begin{itemize}
\item $3$-star ($\oneint$)
\end{itemize}
When $\sg{1}$ is isomorphic to $\oneint$, the inner edges $(e_i, 1)$ of $\sg{2}$ are all connected, and the outer edges $(e_i, 0)$ are all disjoint. Note that for a valid $3$-matching it must be the case that at most one inner edge can be part of the set of disjoint edges. For the case of when exactly one inner edge is chosen, there exist $3$ possiblities, based on which inner edge is chosen. Note that if $(e_i, 1)$ is chosen, the matching has to choose $(e_j, 0)$ for $j \neq i$ and $(e_{j'}, 0)$ for $j' \neq i, j' \neq j$. The remaining possible 3-matching occurs when all 3 outer edges are chosen, and $\abs{f_2^{-1}\inparen{\eset{1}}} = 4$.
\begin{itemize}
\item $3$-path ($\threepath$)
\end{itemize}
When $\sg{1}$ is isomorphic to $\threepath$ it is the case that all edges beginning with $e_1$ and ending with $e_3$ are successively connected. This means that the edges of $\eset{2}$ form a $6$-path. For a $3$-matching to exist in $f_2^{-1}(\eset{1})$, we cannot pick both $(e_i,0)$ and $(e_i,1)$ or both $(e_i, 1)$ and $(e_j, 0)$ where $j = i + 1$.
There are four such possibilities: $\pbrace{(e_1, 0), (e_2, 0), (e_3, 0)}$, $\pbrace{(e_1, 0), (e_2, 0), (e_3, 1)}$, $\pbrace{(e_1, 0), (e_2, 1), (e_3, 1)},$ $\pbrace{(e_1, 1), (e_2, 1), (e_3, 1)}$ and $\abs{f^{-1}_2\inparen{\eset{1}}} = 4.$
\begin{itemize}
\item Triangle ($\tri$)
\end{itemize}
For $\sg{1}$ isomorphic to $\tri$, note that it is the case that the edges in $\eset{2}$ are connected in a successive manner, but this time in a cycle, such that $(e_1, 0)$ and $(e_3, 1)$ are also connected. While this is similar to the discussion of the three path above, the first and last edges are not disjoint.
This rules out both subsets of $(e_1, 0), (e_2, 0), (e_3, 1)$ and $(e_1, 0), (e_2, 1), (e_3, 1)$, so that $\abs{f_2^{-1}\inparen{\eset{1}}} = 2$.
\noindent Let us now consider when $\eset{1} \in \binom{E_1}{\leq 2}$, i.e. fixed subgraphs among
\begin{itemize}
\item $2$-matching ($\twodis$), $2$-path ($\twopath$), $1$ edge ($\ed$)
\end{itemize}
When $|\eset{1}| = 2$, we can only pick one from each of two pairs, $\pbrace{(e_1, 0), (e_1, 1)}$ and $\pbrace{(e_2, 0), (e_2, 1)}$. The third edge choice in $\eset{2}$ will break the disjoint property of a $3$-matching. Thus, a $3$-matching cannot exist in $f_2^{-1}(\eset{1})$. A similar argument holds for $|\eset{1}| = 1$, where the output of $f_2^{-1}$ is $\{\emptyset\}$ since there are not enough edges in the input to produce any other output.
Observe that all of the arguments above focused solely on the property of subgraph $\sg{1}$ being isomorphmic. In other words, all $\eset{1}$ of a given ``shape'' yield the same number of $3$-matchings in $f_2^{-1}(\eset{1})$, and this is why we get the required identity using the above case analysis.
\qed
\end{proof}
\subsubsection{Proof of \Cref{lem:tri}}
\begin{proof}
The number of triangles in $\graph{\ell}$ for $\ell \geq 2$ will always be $0$ for the simple fact that all cycles in $\graph{\ell}$ will have at least six edges.
\qed
\end{proof}
\subsubsection{Proof of \Cref{lem:lin-sys}}
\input{app_hard_linsys}

16
arXiv/app_k-relations.tex Normal file
View File

@ -0,0 +1,16 @@
%!TEX root=./main.tex
We can use $\semK$-relations to model bags. A \emph{$\semK$-relation}~\cite{DBLP:conf/pods/GreenKT07} is a relation whose tuples are annotated with elements from a commutative semiring $\semK = \inset{\domK, \addK, \multK, \zeroK, \oneK}$. A commutative semiring is a structure with a domain $\domK$ and associative and commutative binary operations $\addK$ and $\multK$ such that $\multK$ distributes over $\addK$, $\zeroK$ is the identity of $\addK$, $\oneK$ is the identity of $\multK$, and $\zeroK$ annihilates all elements of $\domK$ when combined by $\multK$.
Let $\udom$ be a countable domain of values.
Formally, an n-ary $\semK$-relation $\rel$ over $\udom$ is a function $\rel: \udom^n \to \domK$ with finite support $\support{\rel} = \{ \tup \mid \rel(\tup) \neq \zeroK \}$. A $\semK$-database is defined similarly, where we view the $\semK$-database (relation) as a function mapping tuples to their respective annotations.
$\raPlus$ query semantics over $\semK$-relations are analogous to the lineage construction semantics of \Cref{fig:nxDBSemantics}, with the exception of replacing $+$ with $\addK$ and $\cdot$ with $\multK$.
Consider the semiring $\semN = \inset{\domN,+,\times,0,1}$ of natural numbers. $\semN$-databases model bag semantics by annotating each tuple with its multiplicity. A probabilistic $\semN$-database ($\semN$-PDB) is a PDB where each possible world is an $\semN$-database. We study the problem of computing statistical moments for query results over such databases. Given an $\semN$-\abbrPDB $\pdb = (\idb, \pd)$, ($\raPlus$) query $\query$, and possible result tuple $\tup$, we sum $\query(\db)(\tup)\cdot\pd\inparen{\db}$ for all $\db \in \idb$ to compute the expected multiplicity of $\tup$. Intuitively, the expectation of $\query(\db)(t)$ is the number of duplicates of $t$ we expect to find in result of query $\query$.
Let $\semNX$ denote the set of polynomials over variables $\vct{X}=(X_1,\dots,X_n)$ with natural number coefficients and exponents.
Consider now the semiring (abusing notation) $\semNX = \inset{\semNX, +, \cdot, 0, 1}$ whose domain is $\semNX$, with the standard addition and multiplication of polynomials.
We define an \abbrNXPDB $\pxdb$ as the tuple $(\db_{\semNX}, \pd)$, where $\semNX$-database $\db_{\semNX}$ is paired with the probability distribution $\pd$ across the set of possible worlds \emph{represented} by $\db_{\semNX}$, i.e. the one induced from $\mathcal{P}_{\semNX}$, the probability distribution over $\vct{X}$. Note that the notation is slightly abused since the first element of the pair is an encoded set of possible worlds, i.e. $\db_{\semNX}$ is the \dbbaseName.
We denote by $\nxpolyqdt$ the annotation of tuple $t$ in the result of $\query(\db_{\semNX})(t)$, and as before, interpret it as a function $\nxpolyqdt: \{0,1\}^{|\vct X|} \rightarrow \semN$ from vectors of variable assignments to the corresponding value of the annotating polynomial.
\abbrNXPDB\xplural and a function $\rmod$ (which transforms an \abbrNXPDB to an equivalent $\semN$-PDB) are both formalized next.

View File

@ -0,0 +1,156 @@
%root: main.tex
%!TEX root=./main.tex
To justify the use of $\semNX$-databases, we need to show that we can encode any $\semN$-PDB in this way and that the query semantics over this representation coincides with query semantics over its respective $\semN$-PDB. For that it will be opportune to define representation systems for $\semN$-PDBs.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Representation System]\label{def:representation-syste}
A representation system for $\semN$-PDBs is a tuple $(\reprs, \rmod)$ where $\reprs$ is a set of representations and $\rmod$ associates with each $\repr \in \reprs$ an $\semN$-PDB $\pdb$. We say that a representation system is \emph{closed} under a class of queries $\qClass$ if for any query $\query \in \qClass$ and $\repr \in \reprs$ we have:
%
\[ \rmod(\query(\repr)) = \query(\rmod(\repr)) \]
A representation system is \emph{complete} if for every $\semN$-PDB $\pdb$ there exists $\repr \in \reprs$ such that:
%
\[ \rmod(\repr) = \pdb \]
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
As mentioned above we will use $\semNX$-databases paired with a probability distribution as a representation system, referring to such databases as \abbrNXPDB\xplural.
Given \abbrNXPDB $\pxdb$, one can think of the of $\pd$ as the probability distribution across all worlds $\inset{0, 1}^\numvar$. Denote a particular world to be $\vct{w}$. For convenience let $\assign_\vct{w}: \pxdb\rightarrow\pndb$ be a function that computes the corresponding $\semN$-\abbrPDB upon assigning all values $w_i \in \vct{w}$ to $X_i \in \vct{X}$ of $\db_{\semNX}$. Note the one-to-one correspondence between elements $\vct{w}\in\inset{0, 1}^\numvar$ to the worlds encoded by $\db_{\semNX}$ when $\vct{w}$ is assigned to $\vct{X}$ (assuming a domain of $\inset{0, 1}$ for each $X_i$).
We can think of $\assign_\vct{w}(\pxdb)\inparen{\tup}$ as the semiring homomorphism $\semNX \to \semN$ that applies the assignment $\vct{w}$ to all variables $\vct{X}$ of a polynomial and evaluates the resulting expression in $\semN$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[$\rmod\inparen{\pxdb}$]\label{def:semnx-pdbs}
Given an \abbrNXPDB$\pxdb$, we compute its equivalent $\semN$-\abbrPDB $\pndb = \rmod\inparen{\pxdb} = \inparen{\idb, \pd'}$ as:
\begin{align*}
\idb & = \{ \assign_{\vct{w}}(\pxdb) \mid \vct{w} \in \{0,1\}^n \} \\
\forall \db \in \idb: \probOf(\db) & = \sum_{\vct{w} \in \{0,1\}^n: \assign_{\vct{w}}(\pxdb) = \db} \probOf(\vct{w})
\end{align*}
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
For instance, consider a $\pxdb$ consisting of a single tuple $\tup_1 = (1)$ annotated with $X_1 + X_2$ with probability distribution $\probOf([0,0]) = 0$, $\probOf([0,1]) = 0$, $\probOf([1,0]) = 0.3$ and $\probOf([1,1]) = 0.7$. This \abbrNXPDB encodes two possible worlds (with non-zero probability) that we denote using their world vectors.
%
\[
D_{[0,1]}(\tup_1) = 1 \hspace{0.3cm} \mathbf{and} \hspace{0.3cm} D_{[1,1]}(\tup_1) = 2
\]
%
Importantly, as the following proposition shows, any finite $\semN$-PDB can be encoded as an \abbrNXPDB and \abbrNXPDB\xplural are closed under $\raPlus$\cite{DBLP:conf/pods/GreenKT07}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Proposition}\label{prop:semnx-pdbs-are-a-}
\abbrNXPDB\xplural are a complete representation system for $\semN$-PDBs that is closed under $\raPlus$ queries.
\end{Proposition}
\begin{proof}
To prove that \abbrNXPDB\xplural are complete consider the following construction that for any $\semN$-PDB $\pdb = (\idb, \pd)$ produces an \abbrNXPDB $\pxdb = (\db_{\semNX}, \pd')$ such that $\rmod(\pxdb) = \pdb$. Let $\idb = \{D_1, \ldots, D_{\abs{\idb}}\}.$
For each world $D_i$ we create a corresponding variable $X_i$.
In $\db_{\semNX}$ we assign each tuple $\tup$ the polynomial:
%
\[
\db_{\semNX}(\tup) = \sum_{i=1}^{\abs{\idb}} D_i(\tup)\cdot X_{i}
\]
The probability distribution $\pd'$ assigns all world vectors zero probability except for $\abs{\idb}$ world vectors (representing the possible worlds) $\vct{w}_i$. All elements of $\vct{w}_i$ are zero except for the position corresponding to variables $X_{i}$ which is set to $1$. Unfolding definitions it is trivial to show that $\rmod(\pxdb) = \pdb$. Thus, \abbrNXPDB\xplural are a complete representation system.
Since $\semNX$ is the free object in the variety of semirings, Birkhoff's HSP theorem implies that any assignment $\vct{X} \to \semN$, which includes as a special case the assignments $\assign_{\vct{w}}$ used here, uniquely extends to the semiring homomorphism alluded to above, $\assign_\vct{w}\inparen{\pxdb}\inparen{\tup}: \semNX \to \semN$. For a polynomial $\assign_\vct{w}\inparen{\pxdb}\inparen{\tup}$ substitutes variables based on $\vct{w}$ and then evaluates the resulting expression in $\semN$. For instance, consider the polynomial $\pxdb\inparen{\tup} = \poly = X + Y$ and assignment $\vct{w} := X = 0, Y=1$. We get $\assign_\vct{w}\inparen{\pxdb}\inparen{\tup} = 0 + 1 = 1$.
Closure under $\raPlus$ queries follows from this and from \cite{DBLP:conf/pods/GreenKT07}'s Proposition 3.5, which states that semiring homomorphisms commute with queries over $\semK$-relations.
\qed
\end{proof}
\subsection{\tis and \bis in the \abbrNXPDB model}\label{subsec:supp-mat-ti-bi-def}
Two important subclasses of \abbrNXPDB\xplural that are of interest to us are the bag versions of tuple-independent databases (\tis) and block-independent databases (\bis). Under set semantics, a \ti is a deterministic database $\db$ where each tuple $\tup$ is assigned a probability $\prob_\tup$. The set of possible worlds represented by a \ti $\db$ is all subsets of $\db$. The probability of each world is the product of the probabilities of all tuples that exist with one minus the probability of all tuples of $\db$ that are not part of this world, i.e., tuples are treated as independent random events. In a \bi, we also assign each tuple a probability, but additionally partition $\db$ into blocks. The possible worlds of a \bi $\db$ are all subsets of $\db$ that contain at most one tuple from each block. Note then that the tuples sharing the same block are disjoint, and the sum of the probabilitites of all the tuples in the same block $\block$ is at most $1$.
The probability of such a world is the product of the probabilities of all tuples present in the world.
For bag \tis and \bis, we define the probability of a tuple to be the probability that the tuple exists with multiplicity at least $1$.
In this work, we define \tis and \bis as subclasses of \abbrNXPDB\xplural defined over variables $\vct{X}$ (\Cref{def:semnx-pdbs}) where $\vct{X}$ can be partitioned into blocks that satisfy the conditions of a \ti or \bi (stated formally in \Cref{subsec:tidbs-and-bidbs}).
In this work, we consider one further deviation from the standard: We use bag semantics for queries.
Even though tuples cannot occur more than once in the input \ti or \bi, they can occur with a multiplicity larger than one in the result of a query.
Since in \tis and \bis, there is a one-to-one correspondence between tuples in the database and variables, we can interpret a vector $\vct{w} \in \{0,1\}^n$ as denoting which tuples exist in the possible world $\assign_{\vct{w}}(\pxdb)$ (the ones where $w_i = 1$).
For BIDBs specifically, note that at most one of the bits corresponding to tuples in each block will be set (i.e., for any pair of bits $w_j$, $w_{j'}$ that are part of the same block $b_i \supseteq \{t_{i,j}, t_{i,j'}\}$, at most one of them will be set).
Denote the vector $\vct{p}$ to be a vector whose elements are the individual probabilities $\prob_i$ of each tuple $\tup_i$. Given \abbrPDB $\pdb$t $\pd$ is the distribution induced by $\vct{p}$, which we will denote $\pd^{\inparen{\vct{\prob}}}$.
%
\begin{align}\label{eq:tidb-expectation}
\expct_{\vct{W} \sim \pd^{(\vct{p})}}\pbox{\poly(\vct{W})}
= \sum\limits_{\substack{\vct{w} \in \{0, 1\}^\numvar\\ s.t. w_j,w_{j'} = 1 \rightarrow \not \exists b_i \supseteq \{t_{i,j}, t_{i',j}\}}} \poly(\vct{w})\prod_{\substack{j \in [\numvar]\\ s.t. \wElem_j = 1}}\prob_j \prod_{\substack{j \in [\numvar]\\s.t. w_j = 0}}\left(1 - \prob_i\right)
\end{align}
%
Recall that tuple blocks in a TIDB always have size 1, so the outer summation of \cref{eq:tidb-expectation} is over the full set of vectors.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Proof of~\Cref{prop:expection-of-polynom}}
\label{subsec:expectation-of-polynom-proof}
\begin{proof}
We need to prove for $\semN$-PDB $\pdb = (\idb,\pd)$ and \abbrNXPDB $\pxdb = (\db_{\semNX}',\pd')$ where $\rmod(\pxdb) = \pdb$ that $\expct_{\randDB\sim \pd}[\query(\db)(t)] = \expct_{\vct{W} \sim \pd'}\pbox{\nxpolyqdt(\vct{W})}$
By expanding $\nxpolyqdt$ and the expectation we have:
\begin{align*}
\expct_{\vct{W} \sim \pd'}\pbox{\poly(\vct{W})}
& = \sum_{\vct{w} \in \{0,1\}^n}\probOf(\vct{w}) \cdot Q(\db_{\semNX})(t)(\vct{w})\\
\intertext{From $\rmod(\pxdb) = \pdb$, we have that the range of $\assign_{\vct{w}(\pxdb)}$ is $\idb$, so}
& = \sum_{\db \in \idb}\;\;\sum_{\vct{w} \in \{0,1\}^n : \assign_{\vct{w}}(\pxdb) = \db}\probOf(\vct{w}) \cdot Q(\db_{\semNX})(t)(\vct{w})\\
\intertext{The inner sum is only over $\vct{w}$ where $\assign_{\vct{w}}(\pxdb) = \db$ (i.e., $Q(\db_{\semNX})(t)(\vct{w}) = \query(\db)(t))$}
& = \sum_{\db \in \idb}\;\;\sum_{\vct{w} \in \{0,1\}^n : \assign_{\vct{w}}(\pxdb) = \db}\probOf(\vct{w}) \cdot \query(\db)(t)\\
\intertext{By distributivity of $+$ over $\times$}
& = \sum_{\db \in \idb}\query(\db)(t)\sum_{\vct{w} \in \{0,1\}^n : \assign_{\vct{w}}(\pxdb) = \db}\probOf(\vct{w})\\
\intertext{From the definition of $\pd$ in \cref{def:semnx-pdbs}, given $\rmod(\pxdb) = \pdb$, we get}
& = \sum_{\db \in \idb}\query(\db)(t) \cdot \probOf(D) \quad = \expct_{\randDB \sim \pd}[\query(\db)(t)]
\end{align*}
\qed
\end{proof}
\subsection{Proposition~\ref{proposition:q-qtilde}}\label{app:subsec-prop-q-qtilde}
\noindent Note the following fact:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Proposition}\label{proposition:q-qtilde} For any \bi-lineage polynomial $\poly(X_1, \ldots, X_\numvar)$ and all $\vct{w}$ such that $\probOf[\vct{W} = \vct{w}] > 0$, it holds that
$% \[
\poly(\vct{w}) = \rpoly(\vct{w}).
$% \]
\end{Proposition}
\begin{proof}
Note that any $\poly$ in factorized form is equivalent to its \abbrSMB expansion. For each term in the expanded form, further note that for all $b \in \{0, 1\}$ and all $e \geq 1$, $b^e = b$.
Finally, note that there are exactly three cases where the expectation of a monomial term $\expct\left[c_{\vct{d}}\prod_{i = n\; s.t.\; \vct{d}_i \geq 1}X_i\right]$ is zero:
(i) when $c_{\vct{d}} = 0$,
(ii) when $p_i = 0$ for some $i$ where $\vct{d}_i \geq 1$, and
(iii) when $X_i$ and $X_j$ are in the same block for some $i,j$ where $\vct{d}_i, \vct{d}_j \geq 1$.
\qed
\end{proof}
\subsection{Proof for Lemma~\ref{lem:tidb-reduce-poly}}\label{subsec:proof-exp-poly-rpoly}
\begin{proof}
Let $\poly$ be a polynomial of $\numvar$ variables with highest degree $= \hideg$, defined as follows:
\[\poly(X_1,\ldots, X_\numvar) = \sum_{\vct{d} \in \{0,\ldots, \hideg\}^\numvar}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar X_i^{d_i}.\]
Let the boolean function $\isInd{\cdot}$ take $\vct{d}$ as input and return true if there does not exist any dependent variables in $\vct{d}$, i.e., $\not\exists ~\block, i\neq j\suchthat d_{\block, i}, d_{\block, j} \geq 1$.\footnote{This \abbrBIDB notation is used and discussed in \cref{subsec:tidbs-and-bidbs}}.
Then in expectation we have
\begin{align}
\expct_{\vct{\randWorld}}\pbox{\poly(\vct{\randWorld})} &= \expct_{\vct{\randWorld}}\pbox{\sum_{\substack{\vct{d} \in \{0,\ldots,\hideg\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i} + \sum_{\substack{\vct{d} \in \{0,\ldots, \hideg\}^\numvar\\\wedge ~\neg\isInd{\vct{d}}}} c_{\vct{d}}\cdot\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar\randWorld_i^{d_i}}\label{p1-s1a}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,\hideg\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \expct_{\vct{\randWorld}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i}} + \sum_{\substack{\vct{d} \in \{0,\ldots, \hideg\}^\numvar\\\wedge ~\neg\isInd{\vct{d}}}} c_{\vct{d}}\cdot\expct_{\vct{\randWorld}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar\randWorld_i^{d_i}}\label{p1-s1b}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,\hideg\}^\numvar\\~\wedge\isInd{\vct{d}}}}c_{\vct{d}}\cdot \expct_{\vct{\randWorld}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \randWorld_i^{d_i}}\label{p1-s1c}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,\hideg\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{\randWorld}}\pbox{\randWorld_i^{d_i}}\label{p1-s2}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,\hideg\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{\randWorld}}\pbox{\randWorld_i}\label{p1-s3}\\
&= \sum_{\substack{\vct{d} \in \{0,\ldots,\hideg\}^\numvar\\\wedge~\isInd{\vct{d}}}}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\
&= \rpoly(\prob_1,\ldots, \prob_\numvar).\label{p1-s5}
\end{align}
\Cref{p1-s1a} is the result of substituting in the definition of $\poly$ given above. Then we arrive at \cref{p1-s1b} by linearity of expectation. Next, \cref{p1-s1c} is the result of the independence constraint of \abbrBIDB\xplural, specifically that any monomial composed of dependent variables, i.e., variables from the same block $\block$, has a probability of $0$. \Cref{p1-s2} is obtained by the fact that all variables in each monomial are independent, which allows for the expectation to be pushed through the product. In \cref{p1-s3}, since $\randWorld_i \in \{0, 1\}$ it is the case that for any exponent $e \geq 1$, $\randWorld_i^e = \randWorld_i$. Next, in \cref{p1-s4} the expectation of a tuple is indeed its probability.
Finally, it can be verified that \Cref{p1-s5} follows since \cref{p1-s4} satisfies the construction of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ in \Cref{def:reduced-poly}.
\qed
\end{proof}
\subsection{Proof For Corollary~\ref{cor:expct-sop}}
\begin{proof}
Note that~\Cref{lem:tidb-reduce-poly} shows that $\expct\pbox{\poly} =$ $\rpoly(\prob_1,\ldots, \prob_\numvar)$. Therefore, if $\poly$ is already in \abbrSMB form, one only needs to compute $\poly(\prob_1,\ldots, \prob_\numvar)$ ignoring exponent terms (note that such a polynomial is $\rpoly(\prob_1,\ldots, \prob_\numvar)$), which indeed has $\bigO{\abs{\poly}}$ computations.
\qed
\end{proof}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

View File

@ -0,0 +1,126 @@
%root: main.tex
\subsection{$\onepass$ Remarks}
Please note that it is \textit{assumed} that the original call to \onepass consists of a call on an input circuit \circuit such that the values of members \prt, \lwght and \rwght have been initialized to Null across all gates.
\input{app_onepass_eval-notes}
\subsection{$\onepass$ Example}
\begin{Example}\label{example:one-pass}
Let $\etree$ encode the expression $(X + Y)(X - Y) + Y^2$. After one pass, \Cref{alg:one-pass-iter} would have computed the following weight distribution. For the two inputs of the sink gate $\circuit$, $\circuit.\lwght = \frac{4}{5}$ and $\circuit.\rwght = \frac{1}{5}$. Similarly, for $\stree$ denoting the left input of $\circuit_{\lchild}$, $\stree.\lwght = \stree.\rwght = \frac{1}{2}$. This is depicted in \Cref{fig:expr-tree-T-wght}.
\end{Example}
\begin{figure}[h!]
\centering
\begin{tikzpicture}[thick]
%First level
\node[tree_node] (a1) at (1, 0) {$\boldsymbol{Y}$};
\node[tree_node] (b1) at (3, 0) {$\boldsymbol{-1}$};
%Second level
\node[tree_node] (a2) at (-0.75, 0) {$\boldsymbol{X}$};
\node[tree_node] (b2) at (1.6,1.25) {$\boldsymbol{\circmult}$};
\node[tree_node] (c2) at (2.9, 1.25) {$\boldsymbol{\circmult}$};
%Third level
\node[tree_node] (a3) at (0.7, 2.5) {$\boldsymbol{\circplus}$};
\node[tree_node] (b3) at (1.6, 2.5) {$\boldsymbol{\circplus}$};
%Fourth level
\node[tree_node] (a4) at (1.5, 3.75) {$\boldsymbol{\circmult}$};
\node[tree_node] (b4) at (2.8, 4) {$\boldsymbol{\circplus}$};
\node[above right=0.15cm of b4, inner sep=0pt, font=\bfseries](labelC) {$\circuit$};
\draw[->] (a1) edge[right] node{$\frac{1}{2}$} (a3);
\draw[->] (b1) -- (b2);
\draw[->] (a1) -- (b2);
\draw[->] (a1) edge[bend left=15] (c2);
\draw[->] (a1) edge[bend right=15] (c2);
\draw[->] (a2) edge[left] node{$\frac{1}{2}$} (a3);
\draw[->] (a2) edge[below] node{$\frac{1}{2}$} (b3);
\draw[->] (b2) edge[right] node{$\frac{1}{2}$} (b3);
\draw[->] (c2) edge[right] node{$\frac{1}{5}$} (b4);
\draw[->] (a3) -- (a4);
\draw[->] (b3) -- (a4);
\draw[->] (a4) edge[above] node{$\frac{4}{5}$} (b4);
\draw[black] (b4) -- (labelC);
\end{tikzpicture}
\caption{Weights computed by $\onepass$ in \Cref{example:one-pass}.}
\label{fig:expr-tree-T-wght}
\end{figure}
\begin{algorithm}[h!]
\caption{\onepass$(\circuit)$}
\label{alg:one-pass-iter}
\begin{algorithmic}[1]
\Require \circuit: Circuit
\Ensure \circuit: Annotated Circuit
\Ensure \vari{sum} $\in \domN$
\For{\gate in \topord(\circuit)}\label{alg:one-pass-loop}\Comment{\topord($\cdot$) is the topological order of \circuit}
\If{\gate.\type $=$ \var}
\State \gate.\prt $\gets 1$\label{alg:one-pass-var}
\ElsIf{\gate.\type $=$ \tnum}
\State \gate.\prt $\gets \abs{\gate.\val}$\label{alg:one-pass-num}
\ElsIf{\gate.\type $= \circmult$}
\State \gate.\prt $\gets \gate_\linput.\prt \times \gate_\rinput.\prt$\label{alg:one-pass-mult}
\Else
\State \gate.\prt $\gets \gate_\linput.\prt + \gate_\rinput.\prt$\label{alg:one-pass-plus}
\State \gate.\lwght $\gets \frac{\gate_\linput.\prt}{\gate.\prt}$\label{alg:one-pass-lwght}
\State \gate.\rwght $\gets \frac{\gate_\rinput.\prt}{\gate.\prt}$\label{alg:one-pass-rwght}
\EndIf
\State \vari{sum} $\gets \gate.\prt$
\EndFor
\State \Return (\vari{sum}, $\circuit$)
\end{algorithmic}
\end{algorithm}
\subsection{Proof of \onepass (\Cref{lem:one-pass})}\label{sec:proof-one-pass}
\begin{proof}
We prove the correct computation of \prt, \lwght, \rwght values on \circuit by induction over the number of iterations in the topological order \topord (line~\ref{alg:one-pass-loop}) of the input circuit \circuit. \topord follows the standard definition of a topological ordering over the DAG structure of \circuit.
For the base case, we have only one gate, which by definition is a source gate and must be either \var or \tnum. In this case, as per \cref{eq:T-all-ones}, lines~\ref{alg:one-pass-var} and~\ref{alg:one-pass-num} correctly compute \circuit.\prt as $1$.
For the inductive hypothesis, assume that \onepass correctly computes \subcircuit.\prt, \subcircuit.\lwght, and \subcircuit.\rwght for all gates \gate in \circuit with $k \geq 0$ iterations over \topord.
We now prove for $k + 1$ iterations that \onepass correctly computes the \prt, \lwght, and \rwght values for each gate $\gate_\vari{i}$ in \circuit for $i \in [k + 1]$.
The $\gate_\vari{k + 1}$ must be in the last ordering of all gates $\gate_\vari{i}$. When \size(\circuit) > 1, if $\gate_{k+1}$ is a leaf node, we are back to the base case. Otherwise $\gate_{k + 1}$ is an internal node
which requires binary input.
When $\gate_{k+1}.\type = \circplus$, then by line~\ref{alg:one-pass-plus} $\gate_{k+1}$.\prt $= \gate_{{k+1}_\lchild}$.\prt $+ \gate_{{k+1}_\rchild}$.\prt, a correct computation, as per \cref{eq:T-all-ones}. Further, lines~\ref{alg:one-pass-lwght} and~\ref{alg:one-pass-rwght} compute $\gate_{{k+1}}.\lwght = \frac{\gate_{{k+1}_\lchild}.\prt}{\gate_{{k+1}}.\prt}$ and analogously for $\gate_{{k+1}}.\rwght$. All values needed for each computation have been correctly computed by the inductive hypothesis.
When $\gate_{k+1}.\type = \circmult$, then line~\ref{alg:one-pass-mult} computes $\gate_{k+1}.\prt = \gate_{{k+1}_\lchild.\prt} \circmult \gate_{{k+1}_\rchild}.\prt$, which indeed by \cref{eq:T-all-ones} is correct. This concludes the proof of correctness.
\paragraph*{Runtime Analysis}
It is known that $\topord(G)$ is computable in linear time. There are $\size(\circuit)$ iterations, each of which takes $O\left( \multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log\inparen{\size(\circuit)}}\right)$ time. This can be seen since each of all the numbers which the algorithm computes is at most $\abs{\circuit}(1,\dots,1)$. Hence, by definition each such operation takes $\multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit)}}$ time, which proves the claimed runtime.
\qed
\end{proof}
\iffalse
\paragraph*{Sufficient condition for $\abs{\circuit}(1,\ldots, 1)$ to be size $O(N)$}
For our runtime results to be relevant, it must be the case that the sum of the coefficients computed by \onepass is indeed size $O(N)$ since there are $O(\log{N})$ bits in the RAM model where $N$ is the size of the input. The size of the input here is \size(\circuit). We show that when \size$(\circuit_\linput) = N_\linput$, \size$(\circuit_\rinput) = N_\rinput$, where $N_\linput + N_\rinput \leq N$, this is indeed the case.
\begin{proof}
To prove this result, we start by proving that $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k }$ for \degree(\circuit) $= k$.
For the base case, we have that \depth(\circuit) $= 0$, and there can only be one node which must contain a coefficient (or constant) of $1$. In this case, $\abs{\circuit}(1,\ldots, 1) = 1$, and \size(\circuit) $= 1$, and it is true that $\abs{\circuit}(1,\ldots, 1) = 1 \leq N^{2^k} = 1^{2^0} = 1$.
Assume for $\ell > 0$ an arbitrary circuit \circuit of $\depth(\circuit) \leq \ell$ that it is true that $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k }$.
For the inductive step we consider a circuit \circuit such that $\depth(\circuit) \leq \ell + 1$. The sink can only be either a $\circmult$ or $\circplus$ gate. Consider when sink node is $\circmult$. Let $k_\linput, k_\rinput$ denote \degree($\circuit_\linput$) and \degree($\circuit_\rinput$) respectively. Note that this case does not require the constraint on $N_\linput$ or $N_\rinput$.
\begin{align}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1)\circmult \abs{\circuit_\rinput}(1,\ldots, 1) \leq (N-1)^{2^{k_\linput}} \circmult (N - 1)^{2^{k_\rinput}}\nonumber\\
&\leq (N-1)^{2^{k}-1}\label{eq:sumcoeff-times-upper}\\
&\leq N^{2^k}.\nonumber
\end{align}
We derive the upperbound of \Cref{eq:sumcoeff-times-upper} by noting that the maximum value of the LHS occurs when both the base and exponent are maximized.
For the case when the sink node is a $\circplus$ node, then we have
\begin{align}
\abs{\circuit}(1,\ldots, 1) &= \abs{\circuit_\linput}(1,\ldots, 1) \circplus \abs{\circuit_\rinput}(1,\ldots, 1) \leq
N_\linput^{2^{k_\linput}} + N_\rinput^{2^{k_\rinput}}\nonumber\\
&\leq N_\linput^{2^k } + N_\rinput\label{eq:sumcoeff-plus-upper}\\
&\leq N^{2^k}.\nonumber
\end{align}
Similar to the $\circmult$ case, \Cref{eq:sumcoeff-plus-upper} upperbounds its LHS by the fact that the maximum base and exponent combination is always greater than or equal to the sum of lower base/exponent combinations. The final equality is true given the constraint over the inputs.
Since $\abs{\circuit}(1,\ldots, 1) \leq N^{2^k}$ for all circuits such that all $\circplus$ gates share at most one gate with their sibling (across their respective subcircuits), then $\log{N^{2^k}} = 2^k \cdot \log{N}$ which for fixed $k$ yields the desired $O(\log{N})$ bits for $O(1)$ arithmetic operations.
\end{proof}
\fi

View File

@ -0,0 +1,21 @@
%root: main.tex
The evaluation of $\abs{\circuit}(1,\ldots, 1)$ can be defined recursively, as follows (where $\circuit_\linput$ and $\circuit_\rinput$ are the `left' and `right' inputs of $\circuit$ if they exist):
{\small
\begin{align}
\label{eq:T-all-ones}
\abs{\circuit}(1,\ldots, 1) = \begin{cases}
\abs{\circuit_\linput}(1,\ldots, 1) \cdot \abs{\circuit_\rinput}(1,\ldots, 1) &\textbf{if }\circuit.\type = \circmult\\
\abs{\circuit_\linput}(1,\ldots, 1) + \abs{\circuit_\rinput}(1,\ldots, 1) &\textbf{if }\circuit.\type = \circplus \\
|\circuit.\val| &\textbf{if }\circuit.\type = \tnum\\
1 &\textbf{if }\circuit.\type = \var.
\end{cases}
\end{align}
}
It turns out that for proof of \Cref{lem:sample}, we need to argue that when $\circuit.\type = +$, we indeed have
\begin{align}
\label{eq:T-weights}
\circuit.\lwght &\gets \frac{\abs{\circuit_\linput}(1,\ldots, 1)}{\abs{\circuit_\linput}(1,\ldots, 1) + \abs{\circuit_\rinput}(1,\ldots, 1)};\\
\circuit.\rwght &\gets \frac{\abs{\circuit_\rinput}(1,\ldots, 1)}{\abs{\circuit_\linput}(1,\ldots, 1)+ \abs{\circuit_\rinput}(1,\ldots, 1)}
\end{align}

View File

@ -0,0 +1,153 @@
%root: main.tex
\subsection{\sampmon Remarks}\label{subsec:sampmon-remarks}
\input{app_samp-monom_pseudo-code}
We briefly describe the top-down traversal of \sampmon. When \circuit.\type $= +$, the input to be visited is sampled from the weighted distribution precomputed by \onepass.
When a \circuit.\type$= \times$ node is visited, both inputs are visited.
The algorithm computes two properties: the set of all variable leaf nodes visited, and the product of the signs of visited coefficient leaf nodes.
%
We will assume the TreeSet data structure to maintain sets with logarithmic time insertion and linear time traversal of its elements.
While we would like to take advantage of the space efficiency gained in using a circuit \circuit instead an expression tree \etree, we do not know that such a method exists when computing a sample of the input polynomial representation.
The efficiency gains of circuits over trees is found in the capability of circuits to only require space for each \emph{distinct} term in the compressed representation. This saves space in such polynomials containing non-distinct terms multiplied or added to each other, e.g., $x^4$. However, to avoid biased sampling, it is imperative to sample from both inputs of a multiplication gate, independently, which is indeed the approach of \sampmon.
\subsection{Proof of \sampmon (\Cref{lem:sample})}\label{sec:proof-sample-monom}
\begin{proof}
We first need to show that $\sampmon$ samples a valid monomial $\encMon$ by sampling and returning a set of variables $\monom$, such that $(\monom, \coef)$ is in $\expansion{\circuit}$ and $\encMon$ is indeed a monomial of the $\rpoly\inparen{\vct{X}}$ encoded in \circuit. We show this via induction over the depth of \circuit.
For the base case, let the depth $d$ of $\circuit$ be $0$. We have that the single gate is either a constant $\coef$ for which by line~\ref{alg:sample-num-return} we return $\{~\}$, or we have that $\circuit.\type = \var$ and $\circuit.\val = x$, and by line~\ref{alg:sample-var-return} we return $\{x\}$. By \cref{def:expand-circuit}, both cases return a valid $\monom$ for some $(\monom, \coef)$ from $\expansion{\circuit}$, and the base case is proven.
For the inductive hypothesis, assume that for $d \leq k$ for some $k \geq 0$, that it is indeed the case that $\sampmon$ returns a valid monomial.
For the inductive step, let us take a circuit $\circuit$ with $d = k + 1$. Note that each input has depth $d - 1 \leq k$, and by inductive hypothesis both of them sample a valid monomial. Then the sink can be either a $\circplus$ or $\circmult$ gate. For the case when $\circuit.\type = \circplus$, line~\ref{alg:sample-plus-bsamp} of $\sampmon$ will choose one of the inputs of the source. By inductive hypothesis it is the case that some valid monomial is being randomly sampled from each of the inputs. Then it follows when $\circuit.\type = \circplus$ that a valid monomial is sampled by $\sampmon$. When the $\circuit.\type = \circmult$, line~\ref{alg:sample-times-union} computes the set union of the monomials returned by the two inputs of the sink, and it is trivial to see by \cref{def:expand-circuit} that $\encMon$ is a valid monomial encoded by some $(\monom, \coef)$ of $\expansion{\circuit}$.
We will next prove by induction on the depth $d$ of $\circuit$ that for $(\monom,\coef) \in \expansion{\circuit}$, $\monom$ is sampled with a probability $\frac{|\coef|}{\abs{\circuit}\polyinput{1}{1}}$.
For the base case $d = 0$, by definition~\ref{def:circuit} we know that the $\size\inparen{\circuit} = 1$ and \circuit.\type$=$ \tnum or \var. For either case, the probability of the value returned is $1$ since there is only one value to sample from. When \circuit.\val $= x$, the algorithm always return the variable set $\{x\}$. When $\circuit.\type = \tnum$, \sampmon will always return $\emptyset$.
For the inductive hypothesis, assume that for $d \leq k$ and $k \geq 0$ $\sampmon$ indeed returns $\monom$ in $(\monom, \coef)$ of $\expansion{\circuit}$ with probability $\frac{|\coef|}{\abs{\circuit}\polyinput{1}{1}}$.
We prove now for $d = k + 1$ the inductive step holds. It is the case that the sink of $\circuit$ has two inputs $\circuit_\linput$ and $\circuit_\rinput$. Since $\circuit_\linput$ and $\circuit_\rinput$ are both depth $d - 1 \leq k$, by inductive hypothesis, $\sampmon$ will return $\monom_\linput$ in $(\monom_\lchild, \coef_\lchild)$ of $\expansion{\circuit_\linput}$ and $\monom_\rinput$ in $(\monom_\rchild, \coef_\rchild)$ of $\expansion{\circuit_\rinput}$, from $\circuit_\linput$ and $\circuit_\rinput$ with probability $\frac{|\coef_\lchild|}{\abs{\circuit_\linput}\polyinput{1}{1}}$ and $\frac{|\coef_\rchild|}{\abs{\circuit_\rinput}\polyinput{1}{1}}$.
Consider the case when $\circuit.\type = \circmult$. For the term $(\monom, \coef)$ from $\expansion{\circuit}$ that is being sampled it is the case that $\monom = \monom_\lchild \cup \monom_\rchild$, where $\monom_\lchild$ is coming from $\circuit_\linput$ and $\monom_\rchild$ from $\circuit_\rinput$. The probability that \sampmon$(\circuit_{\lchild})$ returns $\monom_\lchild$ is $\frac{|\coef_{\monom_\lchild}|}{|\circuit_\linput|(1,\ldots, 1)}$ and $\frac{|\coef_{\monom_\rchild}|}{\abs{\circuit_\rinput}\polyinput{1}{1}}$ for $\monom_\rchild$. Since both $\monom_\lchild$ and $\monom_\rchild$ are sampled with independent randomness, the final probability for sample $\monom$ is then $\frac{|\coef_{\monom_\lchild}| \cdot |\coef_{\monom_\rchild}|}{|\circuit_\linput|(1,\ldots, 1) \cdot |\circuit_\rinput|(1,\ldots, 1)}$. For $(\monom, \coef)$ in $\expansion{\circuit}$, by \cref{def:expand-circuit} it is indeed the case that $|\coef| = |\coef_{\monom_\lchild}| \cdot |\coef_{\monom_\rchild}|$ and that (as shown in \cref{eq:T-all-ones}) $\abs{\circuit}(1,\ldots, 1) = |\circuit_\linput|(1,\ldots, 1) \cdot |\circuit_\rinput|(1,\ldots, 1)$, and therefore $\monom$ is sampled with correct probability $\frac{|\coef|}{\abs{\circuit}(1,\ldots, 1)}$.
For the case when $\circuit.\type = \circplus$, \sampmon ~will sample $\monom$ from one of its inputs. By inductive hypothesis we know that any $\monom_\lchild$ in $\expansion{\circuit_\linput}$ and any $\monom_\rchild$ in $\expansion{\circuit_\rinput}$ will both be sampled with correct probability $\frac{|\coef_{\monom_\lchild}|}{\abs{\circuit_{\lchild}}(1,\ldots, 1)}$ and $\frac{|\coef_{\monom_\rchild}|}{|\circuit_\rinput|(1,\ldots, 1)}$, where either $\monom_\lchild$ or $\monom_\rchild$ will equal $\monom$, depending on whether $\circuit_\linput$ or $\circuit_\rinput$ is sampled. Assume that $\monom$ is sampled from $\circuit_\linput$, and note that a symmetric argument holds for the case when $\monom$ is sampled from $\circuit_\rinput$. Notice also that the probability of choosing $\circuit_\linput$ from $\circuit$ is $\frac{\abs{\circuit_\linput}\polyinput{1}{1}}{\abs{\circuit_\linput}\polyinput{1}{1} + \abs{\circuit_\rinput}\polyinput{1}{1}}$ as computed by $\onepass$. Then, since $\sampmon$ goes top-down, and each sampling choice is independent (which follows from the randomness in the root of $\circuit$ being independent from the randomness used in its subtrees), the probability for $\monom$ to be sampled from $\circuit$ is equal to the product of the probability that $\circuit_\linput$ is sampled from $\circuit$ and $\monom$ is sampled in $\circuit_\linput$, and
\begin{align*}
&\probOf(\sampmon(\circuit) = \monom) = \\
&\probOf(\sampmon(\circuit_\linput) = \monom) \cdot \probOf(SampledChild(\circuit) = \circuit_\linput)\\
&= \frac{|\coef_\monom|}{|\circuit_\linput|(1,\ldots, 1)} \cdot \frac{\abs{\circuit_\linput}(1,\ldots, 1)}{|\circuit_\linput|(1,\ldots, 1) + |\circuit_\rinput|(1,\ldots, 1)}\\
&= \frac{|\coef_\monom|}{\abs{\circuit}(1,\ldots, 1)},
\end{align*}
and we obtain the desired result.
Lastly, we show by simple induction of the depth $d$ of \circuit that \sampmon indeed returns the correct sign value of $\coef$ in $(\monom, \coef)$.
In the base case, $\circuit.\type = \tnum$ or $\var$. For the former, \sampmon correctly returns the sign value of the gate. For the latter, \sampmon returns the correct sign of $1$, since a variable is a neutral element, and $1$ is the multiplicative identity, whose product with another sign element will not change that sign element.
For the inductive hypothesis, we assume for a circuit of depth $d \leq k$ and $k \geq 0$ that the algorithm correctly returns the sign value of $\coef$.
Similar to before, for a depth $d \leq k + 1$, it is true that $\circuit_\linput$ and $\circuit_\rinput$ both return the correct sign of $\coef$. For the case that $\circuit.\type = \circmult$, the sign value of both inputs are multiplied, which is the correct behavior by \cref{def:expand-circuit}. When $\circuit.\type = \circplus$, only one input of $\circuit$ is sampled, and the algorithm returns the correct sign value of $\coef$ by inductive hyptothesis.
\paragraph*{Run-time Analysis}
It is easy to check that except for lines~\ref{alg:sample-plus-bsamp} and~\ref{alg:sample-times-union}, all lines take $O(1)$ time. Consider an execution of \cref{alg:sample-times-union}. We note that we will be adding a given set of variables to some set at most once: since the sum of the sizes of the sets at a given level is at most $\degree(\circuit)$, each gate visited takes $O(\log{\degree(\circuit)})$. For \Cref{alg:sample-plus-bsamp}, note that we pick $\circuit_\linput$ with probability $\frac a{a+b}$ where $a=\circuit.\vari{Lweight}$ and $b=\circuit.\vari{Rweight}$. We can implement this step by picking a random number $r\in[a+b]$ and then checking if $r\le a$. It is easy to check that $a+b\le \abs{\circuit}(1,\dots,1)$. This means we need to add and compare $\log{\abs{\circuit}(1,\ldots, 1)}$-bit numbers, which can certainly be done in time $\multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit)}}$ (note that this is an over-estimate).
Denote \cost(\circuit) (\Cref{eq:cost-sampmon}) to be an upper bound of the number of gates visited by \sampmon. Then the runtime is $O\left(\cost(\circuit)\cdot \log{\degree(\circuit)}\cdot \multc{\log\left(\abs{\circuit(1\ldots, 1)}\right)}{\log{\size(\circuit)}}\right)$.
We now bound the number of recursive calls in $\sampmon$ by $O\left((\degree(\circuit) + 1)\right.$$\left.\cdot\right.$ $\left.\depth(\circuit)\right)$, which by the above will prove the claimed runtime.
Let \cost$(\cdot)$ be a function that models an upper bound on the number of gates that can be visited in the run of \sampmon. We define \cost$(\cdot)$ recursively as follows.
\begin{equation}
\cost(\circuit) =
\begin{cases}
1 + \cost(\circuit_\linput) + \cost(\circuit_\rinput) & \textbf{if } \text{\circuit.\type = }\circmult\\
1 + \max\left(\cost(\circuit_\linput), \cost(\circuit_\rinput)\right) & \textbf{if } \text{\circuit.\type = \circplus}\\
1 & \textbf{otherwise}
\end{cases}\label{eq:cost-sampmon}
\end{equation}
First note that the number of gates visited in \sampmon is $\leq\cost(\circuit)$. To show that \cref{eq:cost-sampmon} upper bounds the number of nodes visited by \sampmon, note that when \sampmon visits a gate such that \circuit.\type $ =\circmult$, line~\ref{alg:sample-times-for-loop} visits each input of \circuit, as defined in (\ref{eq:cost-sampmon}). For the case when \circuit.\type $= \circplus$, line~\ref{alg:sample-plus-bsamp} visits exactly one of the input gates, which may or may not be the subcircuit with the maximum number of gates traversed, which makes \cost$(\cdot)$ an upperbound. Finally, it is trivial to see that when \circuit.\type $\in \{\var, \tnum\}$, i.e., a source gate, that only one gate is visited.
We prove the following inequality holds.
\begin{equation}
2\left(\degree(\circuit) + 1\right) \cdot \depth(\circuit) + 1 \geq \cost(\circuit)\label{eq:strict-upper-bound}
\end{equation}
Note that \cref{eq:strict-upper-bound} implies the claimed runtime. We prove \cref{eq:strict-upper-bound} for the number of gates traversed in \sampmon using induction over $\depth(\circuit)$. Recall how degree is defined in \cref{def:degree}.
For the base case $\degree(\circuit) = \inset{0, 1}, \depth(\circuit) = 0$, $\cost(\circuit) = 1$, and it is trivial to see that the inequality $2\degree(\circuit) \cdot \depth(\circuit) + 1 \geq \cost(\circuit)$ holds.
For the inductive hypothesis, we assume the bound holds for any circuit where $\ell \geq \depth(\circuit) \geq 0$.
Now consider the case when \sampmon has an arbitrary circuit \circuit input with $\depth(\circuit) = \ell + 1$. By definition \circuit.\type $\in \{\circplus, \circmult\}$. Note that since $\depth(\circuit) \geq 1$, \circuit must have input(s). Further we know that by the inductive hypothesis the inputs $\circuit_i$ for $i \in \{\linput, \rinput\}$ of the sink gate \circuit uphold the bound
\begin{equation}
2\left(\degree(\circuit_i) + 1\right)\cdot \depth(\circuit_i) + 1 \geq \cost(\circuit_i).\label{eq:ih-bound-cost}
\end{equation}
In particular, since for any $i$, \cref{eq:ih-bound-cost} holds, then it immediately follows that an inequality whose operands consist of a sum of the aforementioned inequalities must also hold. This is readily seen in the inequality of \cref{eq:times-middle} and \cref{eq:times-rhs}, where $2\inparen{\degree(\circuit_\linput) + 1}\cdot \depth(\circuit_\linput) \geq \cost(\circuit_\linput)$, likewise for $\circuit_\rinput$, and $1\geq 1$.
It is also true that $\depth(\circuit_\linput) \leq \depth(\circuit) - 1$ and $\depth(\circuit_\rinput) \leq \depth(\circuit) - 1$.
If \circuit.\type $= \circplus$, then $\degree(\circuit) = \max\left(\degree(\circuit_\linput), \degree(\circuit_\rinput)\right)$. Otherwise \circuit.\type = $\circmult$ and $\degree(\circuit) = \degree(\circuit_\linput) + \degree(\circuit_\rinput) + 1$. In either case it is true that $\depth(\circuit) = \max\inparen{\depth(\circuit_\linput), \depth(\circuit_\rinput)} + 1$.
If \circuit.\type $= \circmult$, then, by \cref{eq:cost-sampmon}, substituting values, the following should hold,
\begin{align}
&2\left(\degree(\circuit_\linput) + \degree(\circuit_\rinput) + 2\right) \cdot \left(\max(\depth(\circuit_\linput), \depth(\circuit_\rinput)) + 1\right) + 1 \label{eq:times-lhs}\\
&\qquad\geq 2\left(\degree(\circuit_\linput) + 1\right) \cdot \depth(\circuit_\linput) + 2\left(\degree(\circuit_\rinput) + 1\right)\cdot \depth(\circuit_\rinput) + 3\label{eq:times-middle} \\
&\qquad\geq 1 + \cost(\circuit_\linput) + \cost(\circuit_\rinput) = \cost(\circuit) \label{eq:times-rhs}.
\end{align}
To prove (\ref{eq:times-middle}), first, \cref{eq:times-lhs} expands to,
\begin{equation}
2\degree(\circuit_\linput)\cdot\depth_{\max} + 2\degree(\circuit_\rinput)\cdot\depth_{\max} + 4\depth_{\max} + 2\degree(\circuit_\linput) + 2\degree(\circuit_\rinput) + 4 + 1\label{eq:times-lhs-expanded}
\end{equation}
where $\depth_{\max}$ is used to denote the maximum depth of the two input subcircuits. \Cref{eq:times-middle} expands to
\begin{equation}
2\degree(\circuit_\linput)\cdot\depth(\circuit_\linput) + 2\depth(\circuit_\linput) + 2\degree(\circuit_\rinput)\cdot\depth(\circuit_\rinput) + 2\depth(\circuit_\rinput) + 3\label{eq:times-middle-expanded}
\end{equation}
Putting \Cref{eq:times-lhs-expanded} and \Cref{eq:times-middle-expanded} together we get
\begin{align}
&2\degree(\circuit_\linput)\cdot\depth_{\max} + 2\degree(\circuit_\rinput)\cdot\depth_{\max} + 4\depth_{\max} + 2\degree(\circuit_\linput) + 2\degree(\circuit_\rinput) + 5\nonumber\\
&\qquad\geq 2\degree(\circuit_\linput)\cdot\depth(\circuit_\linput) + 2\degree(\circuit_\rinput)\cdot\depth(\circuit_\rinput) + 2\depth(\circuit_\linput) + 2\depth(\circuit_\rinput) + 3\label{eq:times-lhs-middle}
\end{align}
Since the following is always true,
\begin{align*}
&2\degree(\circuit_\linput)\cdot\depth_{\max} + 2\degree(\circuit_\rinput)\cdot\depth_{\max} + 4\depth_{\max} + 5\\
&\qquad \geq 2\degree(\circuit_\linput)\cdot\depth(\circuit_\linput) + 2\degree(\circuit_\rinput)\cdot\depth(\circuit_\rinput) + 2\depth(\circuit_\linput) + 2\depth(\circuit_\rinput) + 3,
\end{align*}
then it is the case that \Cref{eq:times-lhs-middle} is \emph{always} true.
Now to justify (\ref{eq:times-rhs}) which holds for the following reasons. First, \cref{eq:times-rhs}
is the result of \Cref{eq:cost-sampmon} when $\circuit.\type = \circmult$. \Cref{eq:times-middle}
is then produced by substituting the upperbound of (\ref{eq:ih-bound-cost}) for each $\cost(\circuit_i)$, trivially establishing the upper bound of (\ref{eq:times-rhs}). This proves \cref{eq:strict-upper-bound} for the $\circmult$ case.
For the case when \circuit.\type $= \circplus$, substituting values yields
\begin{align}
&2\left(\max(\degree(\circuit_\linput), \degree(\circuit_\rinput)) + 1\right) \cdot \left(\max(\depth(\circuit_\linput), \depth(\circuit_\rinput)) + 1\right) +1\label{eq:plus-lhs-inequality}\\
&\qquad \geq \max\left(2\left(\degree(\circuit_\linput) + 1\right) \cdot \depth(\circuit_\linput) + 1, 2\left(\degree(\circuit_\rinput) + 1\right) \cdot \depth(\circuit_\rinput) +1\right) + 1\label{eq:plus-middle}\\
&\qquad \geq 1 + \max(\cost(\circuit_\linput), \cost(\circuit_\rinput)) = \cost(\circuit)\label{eq:plus-rhs}
\end{align}
To prove (\ref{eq:plus-middle}), \cref{eq:plus-lhs-inequality} expands to
\begin{equation}
2\degree_{\max}\depth_{\max} + 2\degree_{\max} + 2\depth_{\max} + 2 + 1.\label{eq:plus-lhs-expanded}
\end{equation}
Since $\degree_{\max} \cdot \depth_{\max} \geq \degree(\circuit_i)\cdot \depth(\circuit_i),$ the following upper bound holds for the expansion of \cref{eq:plus-middle}:
\begin{equation}
2\degree_{\max}\depth_{\max} + 2\depth_{\max} + 2
\label{eq:plus-middle-expanded}
\end{equation}
Putting it together we obtain the following for (\ref{eq:plus-middle}):
\begin{align}
&2\degree_{\max}\depth_{\max} + 2\degree_{\max} + 2\depth_{\max} + 3\nonumber\\
&\qquad \geq 2\degree_{\max}\depth_{\max} + 2\depth_{\max} + 2, \label{eq:plus-upper-bound-final}
\end{align}
where it can be readily seen that the inequality stands and (\ref{eq:plus-upper-bound-final}) follows. This proves (\ref{eq:plus-middle}).
Similar to the case of $\circuit.\type = \circmult$, (\ref{eq:plus-rhs}) follows by equations $(\ref{eq:cost-sampmon})$ and $(\ref{eq:ih-bound-cost})$.
This proves (\ref{eq:strict-upper-bound}) as desired.
\qed
\end{proof}

View File

@ -0,0 +1,29 @@
%root: main.tex
\begin{algorithm}[t]
\caption{\sampmon(\circuit)}
\label{alg:sample}
\begin{algorithmic}[1]
\Require \circuit: Circuit
\Ensure \vari{vars}: TreeSet
\Ensure \vari{sgn} $\in \{-1, 1\}$
\Comment{\Cref{alg:one-pass-iter} should have been run before this one}
\State $\vari{vars} \gets \emptyset$ \label{alg:sample-global1}
\If{$\circuit.\type = +$}\Comment{Sample at every $+$ node}
\State $\circuit_{\vari{samp}} \gets$ Sample from left input ($\circuit_{\linput}$) and right input ($\circuit_{\rinput}$) w.p. $\circuit.\vari{Lweight}$ and $\circuit.\vari{Rweight}$. \label{alg:sample-plus-bsamp} \Comment{Each call to \sampmon uses fresh randomness}
\State $(\vari{v}, \vari{s}) \gets \sampmon(\circuit_{\vari{samp}})$\label{alg:sample-plus-traversal}
\State $\Return ~(\vari{v}, \vari{s})$
\ElsIf{$\circuit.\type = \times$}\Comment{Multiply the sampled values of all inputs}
\State $\vari{sgn} \gets 1$\label{alg:sample-global2}
\For {$input$ in $\circuit.\vari{input}$}\label{alg:sample-times-for-loop}
\State $(\vari{v}, \vari{s}) \gets \sampmon(input)$
\State $\vari{vars} \gets \vari{vars} \cup \{\vari{v}\}$\label{alg:sample-times-union}
\State $\vari{sgn} \gets \vari{sgn} \times \vari{s}$\label{alg:sample-times-product}
\EndFor
\State $\Return ~(\vari{vars}, \vari{sgn})$
\ElsIf{$\circuit.\type = \tnum$}\Comment{The leaf is a coefficient}
\State $\Return ~\left(\{\}, \func{sgn}(\circuit.\val)\right)$\label{alg:sample-num-return}\Comment{$\func{sgn}(\cdot)$ outputs $-1$ for \circuit.\val $\geq 1$ and $-1$ for \circuit.\val $\leq -1$}
\ElsIf{$\circuit.\type = var$}
\State $\Return~\left(\{\circuit.\val\}, 1\right) $\label{alg:sample-var-return}
\EndIf
\end{algorithmic}
\end{algorithm}

View File

@ -0,0 +1,31 @@
\section{Generalizing Beyond Set Inputs}
\label{sec:gener-results-beyond}
\subsection{\abbrTIDB{}s}
\label{sec:abbrtidbs}
In our definition of \abbrTIDBs (\Cref{subsec:tidbs-and-bidbs}), we assumed a model of \abbrTIDBs where each input tuple is assigned a probability $p$ of having multiplicity $1$. That is, we assumed inputs to be sets, but interpret queries under bag semantics. Other sensible generalizations of \abbrTIDBs from set semantics to bag semantics also exist.
One very natural such generalization is to assign each input tuple $\tup$ a multiplicity $m_\tup$ and probability $p$: the tuple has probability $p$ to exists with multiplicity $m_\tup$, and otherwise has multiplicity $0$. If the maximal multiplicity of all input tuples in the \abbrTIDB is bounded by some constant, then a generalization of our hardness results and approximation algorithm can be achieved by changing the construction of lineage polynomials (in \Cref{fig:nxDBSemantics}) as follows (all other cases remain the same as in \cref{fig:nxDBSemantics}):
\begin{align*}
\polyqdt{\rel}{\dbbase}{\tup} =&\begin{cases}
m_\tup X_\tup & \text{if }\dbbase.\rel\inparen{\tup} = m_\tup \\
0 &\text{otherwise.}\end{cases}
\end{align*}
That is the variable representing a tuple is multiplied by $m_\tup$ to encode the tuple's multiplicity $m_\tup$. We note that our lower bounds still hold for this model since we only need $m_\tup=1$ for all tuples $\tup$. Further, it can be argued that our proofs (as is) for approximation algorithms also work for this model. The only change is that since we now allow $m_\tup>1$ some of the constants in the runtime analysis of our algorithms change but the overall asymptotic runtime bound remains the same.
Yet another option would be to assign each tuple a probability distribution over multiplicities. It seems very unlikely that our results would extend to a model that allows arbitrary probability distributions over multiplicities (our current proof techniques definitely break down). However, we would like to note that the special case of a Poisson binomial distribution (sum of independent but not necessarily identical Bernoulli trials) over multiplicities can be handled as follows: we add an additional identifier attribute to each relation in the database. For a tuple $\tup$ with maximal multiplicity $m_\tup$, we create $m_\tup$ copies of $\tup$ with different identifiers. To answer a query over this encoding, we first project away the identifier attribute (note that as per \Cref{fig:nxDBSemantics}, in $\poly$ this would add up all the variables corresponding to the same tuple $\tup$).
\subsection{\abbrBIDB{}s}
\label{sec:abbrbidbs}
The approach described above works for \abbrBIDB\xplural as well if we define the bag version of \abbrBIDB{}s to associate each tuple $\tup$ a multiplicity $m_\tup$. Recall that we associate each tuple in a block with a unique variable. Thus, the modified lineage polynomial construction shown above can be applied for \abbrBIDB{}s too (and our approximation results also hold).
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

311
arXiv/appendix.tex Normal file
View File

@ -0,0 +1,311 @@
%!TEX root=./main.tex
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\input{app_set-to-bag-pdb}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Missing details from Section~\ref{sec:background}}\label{sec:proofs-background}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{$\semK$-relations and \abbrNXPDB\xplural}\label{subsec:supp-mat-background}\label{subsec:supp-mat-krelations}
\input{app_k-relations}
\input{app_notation-background}
\section{Missing details from Section~\ref{sec:hard}}
\label{app:single-mult-p}
\input{app_hardness-results}
\section{Missing Details from Section~\ref{sec:algo}}\label{sec:proofs-approx-alg}
\input{app_approx-algo-defs-and-examples}
\input{app_approx-alg-analysis}
\input{app_onepass-analysis}
\input{app_samp-monom-analysis}
\subsection{Experimental Results}\label{app:subsec:experiment}
\input{experiments}
\section{Circuits}\label{app:sec-cicuits}
\subsection{Representing Polynomials with Circuits}\label{app:subsec-rep-poly-lin-circ}
\subsubsection{Circuits for query plans}
\label{sec:circuits-formal}
We now formalize circuits and the construction of circuits for $\raPlus$ queries.
As mentioned earlier, we represent lineage polynomials as arithmetic circuits over $\mathbb N$-valued variables with $+$, $\times$.
A circuit for query $Q$ and \abbrNXPDB $\pxdb$ is a directed acyclic graph $\tuple{V_{Q,\pxdb}, E_{Q,\pxdb}, \phi_{Q,\pxdb}, \ell_{Q,\pxdb}}$ with vertices $V_{Q,\pxdb}$ and directed edges $E_{Q,\pxdb} \subset {V_{Q,\pxdb}}^2$.
The sink function $\phi_{Q,\pxdb} : \udom^n \rightarrow V_{Q,\pxdb}$ is a partial function that maps the tuples of the $n$-ary relation $Q(\pxdb)$ to vertices.
We require that $\phi_{Q,\pxdb}$'s range be limited to sink vertices (i.e., vertices with out-degree 0).
A function $\ell_{Q,\pxdb} : V_{Q,\pxdb} \rightarrow \{\;+,\times\;\}\cup \mathbb N \cup \vct X$ assigns a label to each node: Source nodes (i.e., vertices with in-degree 0) are labeled with constants or variables (i.e., $\mathbb N \cup \vct X$), while the remaining nodes are labeled with the symbol $+$ or $\times$.
We require that vertices have an in-degree of at most two.
Note that we can construct circuits for \bis in time linear in the time required for deterministic query processing over a possible world of the \bi under the aforementioned assumption that $\abs{\pxdb} \leq c \cdot \abs{\db}$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Modeling Circuit Construction}
\newcommand{\bagdbof}{\textsc{bag}(\pxdb)}
We now connect the size of a circuit (where the size of a circuit is the number of vertices in the corresponding DAG)
for a given $\raPlus$ query $Q$ and \abbrNXPDB $\pxdb$ to
the runtime $\qruntime{Q,\dbbase}$ of the PDB's \dbbaseName $\dbbase$.
We do this formally by showing that the size of the circuit is asymptotically no worse than the corresponding runtime of a large class of deterministic query processing algorithms.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\getpoly}[1]{\textbf{lin}\inparen{#1}}
Each vertex $v \in V_{Q,\pxdb}$ in the arithmetic circuit for
\[\tuple{V_{Q,\pxdb}, E_{Q,\pxdb}, \phi_{Q,\pxdb}, \ell_{Q,\pxdb}}\]
encodes a polynomial, realized as
\[\getpoly{v} = \begin{cases}
\sum_{v' : (v',v) \in E_{Q,\pxdb}} \getpoly{v'} & \textbf{if } \ell(v) = +\\
\prod_{v' : (v',v) \in E_{Q,\pxdb}} \getpoly{v'} & \textbf{if } \ell(v) = \times\\
\ell(v) & \textbf{otherwise}
\end{cases}\]
We define the circuit for a $\raPlus$ query $\query$ recursively by cases as follows. In each case, let $\tuple{V_{Q_i,\pxdb}, E_{Q_i,\pxdb}, \phi_{Q_{i},\pxdb}, \ell_{Q_i,\pxdb}}$ denote the circuit for subquery $Q_i$. We implicitly include in all circuits a global zero node $v_0$ s.t., $\ell_{Q, \pxdb}(v_0) = 0$ for any $Q,\pxdb$.
\begin{algorithm}
\caption{\abbrStepOne$(\query, \dbbase, E, V, \ell)$}
\label{alg:lc}
\begin{algorithmic}[1]
\Require $\query$: query
\Require $\dbbase$: a \dbbaseName
\Require $E, V, \ell$: accumulators for the edge list, vertex list, and vertex label list.
\Ensure $\circuit = \tuple{E, V, \phi, \ell}$: a circuit encoding the lineage of each tuple in $\query(\dbbase)$
\If{$\query$ is $R$} \Comment{\textbf{Case 1}: $\query$ is a relation atom}
\For{$t \in \dbbase.R$}
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, R(t))\}$ \Comment{Allocate a fresh node $v_t$}
\State $\phi(t) \gets v_t$
\EndFor
\ElsIf{$\query$ is $\sigma_\theta(\query')$} \Comment{\textbf{Case 2}: $\query$ is a Selection}
\State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \dbbase, V, E, \ell)$
\For{$t \in \domain(\phi')$}
\State \textbf{if }$\theta(t)$
\textbf{ then } $\phi(t) \gets \phi'(t)$
\textbf{ else } $\phi(t) \gets v_0$
\EndFor
\ElsIf{$\query$ is $\pi_{\vec{A}}(\query')$} \Comment{\textbf{Case 3}: $\query$ is a Projection}
\State $\tuple{V, E, \phi', \ell} \gets \abbrStepOne(\query', \dbbase, V, E, \ell)$
\For{$t \in \pi_{\vec{A}}(\query'(\dbbase))$}
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$\Comment{Allocate a fresh node $v_t$}
\State $\phi(t) \leftarrow v_t$
\EndFor
\For{$t \in \query'(\dbbase)$}
\State $E \leftarrow E \cup \{(\phi'(t), \phi(\pi_{\vec{A}}t))\}$
\EndFor
\State Correct nodes with in-degrees $>2$ by appending an equivalent fan-in two tree instead
\ElsIf{$\query$ is $\query_1 \cup \query_2$} \Comment{\textbf{Case 4}: $\query$ is a Bag Union}
\State $\tuple{V, E, \phi_1, \ell} \gets \abbrStepOne(\query_1, \dbbase, V, E, \ell)$
\State $\tuple{V, E, \phi_2, \ell} \gets \abbrStepOne(\query_2, \dbbase, V, E, \ell)$
\State $\phi \gets \phi_1 \cup \phi_2$
\For{$t \in \domain(\phi_1) \cap \domain(\phi_2)$}
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, +)\}$ \Comment{Allocate a fresh node $v_t$}
\State $\phi(t) \gets v_t$
\State $E \leftarrow E \cup \{(\phi_1(t), v_t), (\phi_2(t), v_t)\}$
\EndFor
\ElsIf{$\query$ is $\query_1 \bowtie \ldots \bowtie \query_m$} \Comment{\textbf{Case 5}: $\query$ is a $m$-ary Join}
\For{$i \in [m]$}
\State $\tuple{V, E, \phi_i, \ell} \gets \abbrStepOne(\query_i, \dbbase, V, E, \ell)$
\EndFor
\For{$t \in \domain(\phi_1) \bowtie \ldots \bowtie \domain(\phi_m)$}
\State $V \leftarrow V \cup \{v_t\}$; $\ell \leftarrow \ell \cup \{(v_t, \times)\}$ \Comment{Allocate a fresh node $v_t$}
\State $\phi(t) \gets v_t$
\State $E \leftarrow E \cup \comprehension{(\phi_i(\pi_{sch(\query_i(\dbbase))}(t)), v_t)}{i \in [n]}$
\EndFor
\State Correct nodes with in-degrees $>2$ by appending an equivalent fan-in two tree instead
\EndIf
\end{algorithmic}
\end{algorithm}
\Cref{alg:lc} defines how the circuit for a query result is constructed. We quickly review the number of vertices emitted in each case.
\caseheading{Base Relation}
This circuit has $|D_\Omega.R|$ vertices.
\caseheading{Selection}
If we assume dead sinks are iteratively garbage collected,
this circuit has at most $|V_{Q_1,\pxdb}|$ vertices.
\caseheading{Projection}
This formulation will produce vertices with an in-degree greater than two, a problem that we correct by replacing every vertex with an in-degree over two by an equivalent fan-in two tree. The resulting structure has at most $|{Q_1}|-1$ new vertices.
The corrected circuit thus has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
\caseheading{Union}
This circuit has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ vertices.
\caseheading{$k$-ary Join}
As in projection, newly created vertices will have an in-degree of $k$, and a fan-in two tree is required.
There are $|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ such vertices, so the corrected circuit has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(k-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsubsection{Bounding circuit depth}
\label{sec:circuit-depth}
We first show that the depth of the circuit (\depth; \Cref{def:size-depth}) is bounded by the size of the query. Denote by $|\query|$ the number of relational operators in query $\query$, which recall we assume is a constant.
\begin{Proposition}[Circuit depth is bounded]
\label{prop:circuit-depth}
Let $\query$ be a relational query and $\dbbase$ be a \dbbaseName with $n$ tuples. There exists a (lineage) circuit $\circuit^*$ encoding the lineage of all tuples $\tup \in \query(\dbbase)$ for which
$\depth(\circuit^*) \leq O(k|\query|\log(n))$.
\end{Proposition}
\begin{proof}
We show that the bound of \Cref{prop:circuit-depth} holds for the circuit constructed by \Cref{alg:lc}.
First, observe that \Cref{alg:lc} is (recursively) invoked exactly once for every relational operator or base relation in $\query$; It thus suffices to show that a call to \Cref{alg:lc} adds at most $O_k(\log(n))$ to the depth of a circuit produced by any recursive invocation.
Second, observe that modulo the logarithmic fan-in of the projection and join cases, the depth of the output is at most one greater than the depth of any input (or at most 1 in the base case of relation atoms).
For the join case, the number of in-edges can be no greater than the join width, which itself is bounded by $k$. The depth thus increases by at most a constant factor of $\lceil \log(k) \rceil = O_k(1)$.
For the projection case, observe that the fan-in is bounded by $|\query'(\dbbase)|$, which is in turn bounded by $n^k$. The depth increase for any projection node is thus at most $\lceil \log(n^k)\rceil = O(k\log(n))$, as desired.
\qed
\end{proof}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsubsection{Circuit size vs. runtime}
\label{sec:circuit-runtime}
\begin{Lemma}\label{lem:circ-model-runtime}
\label{lem:circuits-model-runtime}
Given a \abbrNXPDB $\pxdb$ with \dbbaseName $\dbbase$, and an $\raPlus$ query $Q$, the runtime of $Q$ over $\dbbase$ has the same or greater complexity as the size of the lineage of $Q(\pxdb)$. That is, we have $\abs{V_{Q,\pxdb}} \leq k\qruntime{Q, \dbbase}+1$, where $k\ge 1$ is the maximal degree of any polynomial in $Q(\pxdb)$.
\end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{proof}
We prove by induction that $\abs{V_{Q,\pxdb} \setminus \{v_0\}} \leq k\qruntime{Q, \dbbase}$. For clarity, we implicitly exclude $v_0$ in the proof below.
The base case is a base relation: $Q = R$ and is trivially true since $|V_{R,\pxdb}| = |\dbbase.R|=\qruntime{R, \dbbase}$ (note that here the degree $k=1$).
For the inductive step, we assume that we have circuits for subqueries $Q_1, \ldots, Q_m$ such that $|V_{Q_i,\pxdb}| \leq k_i\qruntime{Q_i,\dbbase}$ where $k_i$ is the degree of $Q_i$.
\caseheading{Selection}
Assume that $Q = \sigma_\theta(Q_1)$.
In the circuit for $Q$, $|V_{Q,\pxdb}| = |V_{Q_1,\dbbase}|$ vertices, so from the inductive assumption and $\qruntime{Q,\dbbase} = \qruntime{Q_1,\dbbase}$ by definition, we have $|V_{Q,\pxdb}| \leq k \qruntime{Q,\dbbase} $.
\caseheading{Projection}
Assume that $Q = \pi_{\vct A}(Q_1)$.
The circuit for $Q$ has at most $|V_{Q_1,\pxdb}|+|{Q_1}|$ vertices.
\begin{align*}
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}| + |Q_1|\\
\intertext{(From the inductive assumption)}
& \leq k\qruntime{Q_1,\dbbase} + \abs{Q_1}\\
\intertext{(By definition of $\qruntime{Q,\dbbase}$)}
& \le k\qruntime{Q,\dbbase}.
\end{align*}
\caseheading{Union}
Assume that $Q = Q_1 \cup Q_2$.
The circuit for $Q$ has $|V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1} \cap {Q_2}|$ vertices.
\begin{align*}
|V_{Q,\pxdb}| & \leq |V_{Q_1,\pxdb}|+|V_{Q_2,\pxdb}|+|{Q_1}|+|{Q_2}|\\
\intertext{(From the inductive assumption)}
& \leq k(\qruntime{Q_1,\dbbase} + \qruntime{Q_2,\dbbase}) + (|Q_1| + |Q_2|)
\intertext{(By definition of $\qruntime{Q,\dbbase}$)}
& \leq k(\qruntime{Q,\dbbase}).
\end{align*}
\caseheading{$m$-ary Join}
Assume that $Q = Q_1 \bowtie \ldots \bowtie Q_m$. Note that $k=\sum_{i=1}^m k_i\ge m$.
The circuit for $Q$ has $|V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|$ vertices.
\begin{align*}
|V_{Q,\pxdb}| & = |V_{Q_1,\pxdb}|+\ldots+|V_{Q_k,\pxdb}|+(m-1)|{Q_1} \bowtie \ldots \bowtie {Q_k}|\\
\intertext{From the inductive assumption and noting $\forall i: k_i \leq k$ and $m\le k$}
& \leq k\qruntime{Q_1,\dbbase}+\ldots+k\qruntime{Q_k,\dbbase}+\\
&\;\;\; (m-1)|{Q_1} \bowtie \ldots \bowtie {Q_m}|\\
& \leq k(\qruntime{Q_1,\dbbase}+\ldots+\qruntime{Q_m,\dbbase}+\\
&\;\;\;|{Q_1} \bowtie \ldots \bowtie {Q_m}|)\\
\intertext{(By definition of $\qruntime{Q,\dbbase}$ and assumption on $\jointime{\cdot}$)}
& \le k\qruntime{Q,\dbbase}.
\end{align*}
The property holds for all recursive queries, and the proof holds.
\qed
\end{proof}
\subsubsection{Runtime of \abbrStepOne}
\label{sec:lc-runtime}
We next need to show that we can construct the circuit in time linear in the deterministic runtime.
\begin{Lemma}\label{lem:tlc-is-the-same-as-det}
Given a query $\query$ over a \dbbaseName $\dbbase$ and the $\circuit^*$ output by \Cref{alg:lc}, the runtime $\timeOf{\abbrStepOne}(\query,\dbbase,\circuit^*) \le O(\qruntime{\query, \dbbase})$.
\end{Lemma}
\begin{proof}
By analysis of \Cref{alg:lc}, invoked as $\circuit^*\gets\abbrStepOne(\query, \dbbase, \{v_0\}, \emptyset, \{(v_0, 0)\})$.
We assume that the vertex list $V$, edge list $E$, and vertex label list $\ell$ are mutable accumulators with $O(1)$ ammortized append.
We assume that the tuple to sink mapping $\phi$ is a linked hashmap, with $O(1)$ insertions and retrievals, and $O(n)$ iteration over the domain of keys.
We assume that the n-ary join $\domain(\phi_1) \bowtie \ldots \bowtie\domain(\phi_n)$ can be computed in time $\jointime{\domain(\phi_1), \ldots, \domain(\phi_n)}$ (\Cref{def:join-cost}) and that an intersection $\domain(\phi_1) \cap \domain(\phi_2)$ can be computed in time $O(|\domain(\phi_1)| + |\domain(\phi_2)|)$ (e.g., with a hash table).
Before proving our runtime bound, we first observe that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$.
This is true by construction for the relation, projection, and union cases, by \Cref{def:join-cost} for joins, and by the observation that $|\sigma(R)| \leq |R|$.
We showthat $\qruntime{\query, \dbbase}$ is an upper-bound for the runtime of \Cref{alg:lc} by recursion.
The base case of a relation atom requires only an $O(|\dbbase.R|)$ iteration over the source tuples.
For the remaining cases, we make the recursive assumption that for every subquery $\query'$, it holds that $O(\qruntime{\query', \dbbase})$ bounds the runtime of \Cref{alg:lc}.
\caseheading{Selection}
Selection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \dbbase})$.
\Cref{alg:lc} requires a loop over every element of $\query'(\dbbase)$.
By the observation above that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$, this iteration is also bounded by $O(\qruntime{\query', \dbbase})$.
\caseheading{Projection}
Projection requires a recursive call to \Cref{alg:lc}, which by the recursive assumption is bounded by $O(\qruntime{\query', \dbbase})$, which in turn is a term in $\qruntime{\pi_{\vec{A}}\query', \dbbase}$.
What remains is an iteration over $\pi_{\vec A}(\query(\dbbase))$ (lines 13--16), an iteration over $\query'(\dbbase)$ (lines 17--19), and the construction of a fan-in tree (line 20).
The first iteration is $O(|\query(\dbbase)|) \leq O(\qruntime{\query, \dbbase})$.
The second iteration and the construction of the bounded fan-in tree are both $O(|\query'(\dbbase)|) \leq O(\qruntime{\query', \dbbase}) \leq O(\qruntime{\query, \dbbase}) $, by the the observation above that $\qruntime{\query, \db} \geq \Omega(|\query(\db)|)$.
\caseheading{Bag Union}
As above, the recursive calls explicitly correspond to terms in the expansion of $\qruntime{\query_1 \cup \query_2, \dbbase}$.
Initializing $\phi$ (line 24) can be accomplished in $O(\domain(\phi_1) + \domain(\phi_2)) = O(|\query_1(\dbbase)| + |\query_2(\dbbase)|) \leq O(\qruntime{\query_1, \dbbase} + \qruntime{\query_2, \dbbase})$.
The remainder requires computing $\query_1 \cup \query_2$ (line 25) and iterating over it (lines 25--29), which is $O(|\query_1| + |\query_2|)$ as noted above --- this directly corresponds to terms in $\qruntime{\query_1 \cup \query_2, \dbbase}$.
\caseheading{$m$-ary Join}
As in the prior cases, recursive calls explicitly correspond to terms in our target runtime.
The remaining logic involves (i) computing $\domain(\phi_1) \bowtie \ldots \bowtie \domain(\phi_m)$, (ii) iterating over the results, and (iii) creating a fan-in tree.
Respectively, these are: \\
~(i)~$\jointime{\domain(\phi_1), \ldots, \domain(\phi_m)}$\\
~(ii)~$O(|\query_1(\dbbase) \bowtie \ldots \bowtie \query_m(\dbbase)|) \leq O(\jointime{\domain(\phi_1), \ldots, \domain(\phi_m)})$ (\Cref{def:join-cost})\\
~(iii)~$O(m|\query_1(\dbbase) \bowtie \ldots \bowtie \query_m(\dbbase)|)$ (as (ii), noting that $m \leq k = O(1)$)
\qed
\end{proof}
\section{Higher Moments}
\label{sec:momemts}
%
We make a simple observation to conclude the presentation of our results.
So far we have only focused on the expectation of $\poly$.
In addition, we could e.g. prove bounds of the probability of a tuple's multiplicity being at least $1$.
Progress can be made on this as follows:
For any positive integer $m$ we can compute the $m$-th moment of the multiplicities, allowing us to e.g. use the Chebyschev inequality or other high moment based probability bounds on the events we might be interested in.
We leave further investigations for future work.
\section{The Karp-Luby Estimator}
\label{sec:karp-luby}
%
Computing the marginal probability of a tuple in the output of a set-probabilistic database query has been studied extensively.
To the best of our knowledge, the current state of the art approximation algorithm for this problem is the Karp-Luby estimator~\cite{DBLP:journals/jal/KarpLM89}, which first appeared in MayBMS/Sprout~\cite{DBLP:conf/icde/OlteanuHK10}, and more recently as part of an online ``anytime'' approximation algorithm~\cite{FH13,heuvel-19-anappdsd}.
The estimator works by observing that for any $\ell$ random binary (but not necessarily independent) events $\vct{W}_1, \ldots, \vct{W}_\ell$, the probability of at least one event occurring (i.e., $\probOf\inparen{\vct{W}_1 \vee \ldots \vee\vct{W}_\ell}$) is bounded from above by the sum of the independent event probabilities (i.e., $\probOf\inparen{\vct{W}_1 \vee \ldots \vee \vct{W}_\ell} \leq \probOf\inparen{\vct{W}_1} + \ldots + \probOf\inparen{\vct{W}_\ell}$).
Starting from this (`easily' computable and large) value, the estimator proceeds to correct the estimate by estimating how much of an over-estimate it is.
Specifically, if $\mathcal P$ is the joint distribution over $\vct{W}$, the estimator computes an approximation of:
$$\mathcal O = \underset{\vct{W} \sim \mathcal P}{\expct}\Big[
\left|\comprehension{i}{\vct{W}_i = 1, i \in [\ell]}\right|
\Big].$$
The accuracy of this estimate is improved by conditioning $\mathcal P$ on a $W_i$ chosen uniformly at random (which ensures that the sampled count will be at least 1) and correcting the resulting estimate by $\probOf\inparen{W_i}$. With an estimate of $\mathcal O$, it can easily be verified that the probability of the disjunction can be computed as:
$$\probOf\inparen{\vct{W}_1 \vee \ldots \vee\vct{W}_\ell} = \probOf\inparen{\vct{W}_1} + \ldots + \probOf\inparen{\vct{W}_\ell} - \mathcal O$$
The Karp-Luby estimator is employed on the \abbrSMB representation\footnote{Note that since we are in the set semantics, in the lineage polynomial/formula, addition is logical OR and multiplication is logical AND.} of $\circuit$ (to solve the set-PDB version of \Cref{prob:intro-stmt}), where each $W_i$ represents the event that one monomial is true.
By simple inspection, if there are $\ell$ monomials, this estimator has runtime $\Omega(\ell)$. Further, a minimum of $\left\lceil\frac{3\cdot \ell\cdot \log(\frac{2}{\delta})}{\epsilon^2}\right\rceil$ invocations of the estimator are required to achieve $1\pm\epsilon$ approximation with probability at least $1-\delta$~\cite{DBLP:conf/icde/OlteanuHK10}, entailing a runtime at least quadratic in $\ell$.
As an arbitrary lineage circuit $\circuit$ may encode $\Omega\inparen{|\circuit|^k}$ monomials, the worst case runtime is at least $\Omega\inparen{|\circuit|^{2k}}$ (where $k$ is the `degree' of lineage polynomial encoded by $\circuit$). By contrast note that by the discussion after \Cref{lem:val-ub} we can solve \Cref{prob:intro-stmt} in time $O\inparen{|\circuit|^2}$ for all \abbrBIDB circuits {\em independent} of the degree $k$.
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

158
arXiv/approx_alg.tex Normal file
View File

@ -0,0 +1,158 @@
%root: main.tex
%!TEX root=./main.tex
\section{$1 \pm \epsilon$ Approximation Algorithm}\label{sec:algo}
In \Cref{sec:hard}, we showed that \Cref{prob:bag-pdb-poly-expected} cannot be solved in $\bigO{\qruntime{\optquery{\query},\tupset,\bound}}$ runtime. In light of this, we desire to produce an approximation algorithm that runs in time $\bigO{\qruntime{\optquery{\query},\tupset,\bound}}$. We do this by showing the result via circuits,
such that our approximation algorithm for this problem runs in $\bigO{\abs{\circuit}}$ for a very broad class of circuits, (thus affirming~\Cref{prob:intro-stmt}); see the discussion after \Cref{lem:val-ub} for more.
The following approximation algorithm applies to bag query semantics over both
\abbrCTIDB lineage polynomials and general \abbrBIDB lineage polynomials in practice, where for the latter we note that a $1$-\abbrTIDB is equivalently a \abbrBIDB (blocks are size $1$). Our experimental results (see~\Cref{app:subsec:experiment}) which use queries from the PDBench benchmark~\cite{pdbench} show a low $\gamma$ (see~\Cref{def:param-gamma}) supporting the notion that our bounds hold for general \abbrBIDB in practice.
Corresponding proofs and pseudocode for all formal statements and algorithms
can be found in \Cref{sec:proofs-approx-alg}.
\subsection{Preliminaries and some more notation}
We now introduce definitions and notation related to circuits and polynomials that we will need to state our upper bound results. First we introduce the expansion $\expansion{\circuit}$ of circuit $\circuit$ which
is used in our auxiliary algorithm \sampmon for sampling monomials when computing the approximation.
\begin{Definition}[$\expansion{\circuit}$]\label{def:expand-circuit}
For a circuit $\circuit$, we define $\expansion{\circuit}$ as a list of tuples $(\monom, \coef)$, where $\monom$ is a set of variables and $\coef \in \domN$.
$\expansion{\circuit}$ has the following recursive definition ($\circ$ is list concatenation).
$\expansion{\circuit} =
\begin{cases}
\expansion{\circuit_\linput} \circ \expansion{\circuit_\rinput} &\textbf{ if }\circuit.\type = \circplus\\
\left\{(\monom_\linput \cup \monom_\rinput, \coef_\linput \cdot \coef_\rinput) ~|~(\monom_\linput, \coef_\linput) \in \expansion{\circuit_\linput}, (\monom_\rinput, \coef_\rinput) \in \expansion{\circuit_\rinput}\right\} &\textbf{ if }\circuit.\type = \circmult\\
\elist{(\emptyset, \circuit.\val)} &\textbf{ if }\circuit.\type = \tnum\\
\elist{(\{\circuit.\val\}, 1)} &\textbf{ if }\circuit.\type = \var.\\
\end{cases}
$
\end{Definition}
Later on, we will denote the monomial composed of the variables in $\monom$ as $\encMon$. As an example of $\expansion{\circuit}$, consider $\circuit$ illustrated in \Cref{fig:circuit}. $\expansion{\circuit}$ is then $[(X, 2), (XY, -1), (XY, 4), (Y, -2)]$. This helps us redefine $\rpoly$ (see \Cref{eq:tilde-Q-bi}) in a way that makes our algorithm more transparent.
\begin{Definition}[$\abs{\circuit}$]\label{def:positive-circuit}
For any circuit $\circuit$, the corresponding
{\em positive circuit}, denoted $\abs{\circuit}$, is obtained from $\circuit$ as follows. For each leaf node $\ell$ of $\circuit$ where $\ell.\type$ is $\tnum$, update $\ell.\vari{value}$ to $|\ell.\vari{value}|$.
\end{Definition}
We will overload notation and use $\abs{\circuit}\inparen{\vct{X}}$ to mean $\polyf\inparen{\abs{\circuit}}$.
Conveniently, $\abs{\circuit}\inparen{1,\ldots,1}$ gives us $\sum\limits_{\inparen{\monom, \coef} \in \expansion{\circuit}}\abs{\coef}$.
\begin{Definition}[\size($\cdot$), \depth$\inparen{\cdot}$]\label{def:size-depth}
The functions \size and \depth output the number of gates and levels respectively for input \circuit.
\end{Definition}
\begin{Definition}[$\degree(\cdot)$]\label{def:degree}\footnote{Note that the degree of $\polyf(\abs{\circuit})$ is always upper bounded by $\degree(\circuit)$ and the latter can be strictly larger (e.g. consider the case when $\circuit$ multiplies two copies of the constant $1$-- here we have $\deg(\circuit)=1$ but degree of $\polyf(\abs{\circuit})$ is $0$).}
$\degree(\circuit)$ is defined recursively as follows:
\[\degree(\circuit)=
\begin{cases}
\max(\degree(\circuit_\linput),\degree(\circuit_\rinput)) & \text{ if }\circuit.\type=+\\
\degree(\circuit_\linput) + \degree(\circuit_\rinput)+1 &\text{ if }\circuit.\type=\times\\
1 & \text{ if }\circuit.\type = \var\\
0 & \text{otherwise}.
\end{cases}
\]
\end{Definition}
Next, we use the following notation for the complexity of multiplying integers:
\begin{Definition}[$\multc{\cdot}{\cdot}$]\footnote{We note that when doing arithmetic operations on the RAM model for input of size $N$, we have that $\multc{O(\log{N})}{O(\log{N})}=O(1)$. More generally we have $\multc{N}{O(\log{N})}=O(N\log{N}\log\log{N})$.}
In a RAM model of word size of $W$-bits, $\multc{M}{W}$ denotes the complexity of multiplying two integers represented with $M$-bits. (We will assume that for input of size $N$, $W=O(\log{N})$.)
\end{Definition}
Finally, to get linear runtime results, we will need to define another parameter modeling the (weighted) number of monomials in $\expansion{\circuit}$
that need to be `canceled' when monomials with dependent variables are removed (\Cref{subsec:one-bidb}).
Let $\isInd{\cdot}$ be a boolean function returning true if monomial $\encMon$ is composed of independent variables and false otherwise; further, let $\indicator{\theta}$ also be a boolean function returning true if $\theta$ evaluates to true.
\begin{Definition}[Parameter $\gamma$]\label{def:param-gamma}
Given a \abbrOneBIDB circuit $\circuit$ define
\[\gamma(\circuit)=\frac{\sum_{(\monom, \coef)\in \expansion{\circuit}} \abs{\coef}\cdot \indicator{\neg\isInd{\encMon}} }
{\abs{\circuit}(1,\ldots, 1)}.\]
\end{Definition}
\subsection{Our main result}\label{sec:algo:sub:main-result}
We solve~\Cref{prob:intro-stmt} for any fixed $\epsilon > 0$ in what follows.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mypar{Algorithm Idea}
Our approximation algorithm (\approxq pseudo code in \Cref{sec:proof-lem-approx-alg})
is based on the following observation.
Given a lineage polynomial $\poly(\vct{X})=\polyf(\circuit)$ for circuit \circuit over
\abbrOneBIDB (recall that all \abbrCTIDB can be reduced to \abbrOneBIDB by~\Cref{prop:ctidb-reduct}), we have:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{equation}
\label{eq:tilde-Q-bi}
\rpoly\inparen{p_1,\dots,p_\numvar}=\hspace*{-1mm}\sum_{(\monom,\coef)\in \expansion{\circuit}}
\indicator{\isInd{\encMon}
}\cdot \coef\cdot\hspace*{-2mm}\prod_{X_i\in \monom}\hspace*{-2mm} p_i.
\end{equation}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Given the above, the algorithm is a sampling based algorithm for the above sum: we sample (via \sampmon) $(\monom,\coef)\in \expansion{\circuit}$ with probability proportional
to $\abs{\coef}$ and compute $\vari{Y}=\indicator{\isInd{\encMon}}
\cdot \prod_{X_i\in \monom} p_i$.
Repeating the sampling an appropriate number of times
and computing the average of $\vari{Y}$ gives us our final estimate. \onepass is used to compute the sampling probabilities needed in \sampmon (details are in \Cref{sec:proofs-approx-alg}).
%%%%%%%%%%%%%%%%%%%%%%%
\mypar{Runtime analysis} We can argue the following runtime for the algorithm outlined above:
\begin{Theorem}
\label{cor:approx-algo-const-p}
Let \circuit be an arbitrary \emph{\abbrOneBIDB} circuit, define $\poly(\vct{X})=\polyf(\circuit)$, let $k=\degree(\circuit)$, and let $\gamma=\gamma(\circuit)$. Further let it be the case that $\prob_i\ge \prob_0$ for all $i\in[\numvar]$. Then an estimate $\mathcal{E}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$
satisfying
\begin{equation}
\label{eq:approx-algo-bound-main}
\probOf\left(\left|\mathcal{E} - \rpoly(\prob_1,\dots,\prob_\numvar)\right|> \error' \cdot \rpoly(\prob_1,\dots,\prob_\numvar)\right) \leq \conf
\end{equation}
can be computed in time
\begin{equation}
\label{eq:approx-algo-runtime}
O\left(\left(\size(\circuit) + \frac{\log{\frac{1}{\conf}}\cdot k\cdot \log{k} \cdot \depth(\circuit))}{\inparen{\error'}^2\cdot(1-\gamma)^2\cdot \prob_0^{2k}}\right)\cdot\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right).
\end{equation}
In particular, if $\prob_0>0$ and $\gamma<1$ are absolute constants then the above runtime simplifies to $O_k\left(\left(\frac 1{\inparen{\error'}^2}\cdot\size(\circuit)\cdot \log{\frac{1}{\conf}}\right)\cdot\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)$.
\end{Theorem}
The restriction on $\gamma$ is satisfied by any
$1$-\abbrTIDB (where $\gamma=0$ in the equivalent $1$-\abbrBIDB of~\Cref{prop:ctidb-reduct})
as well as for all three queries of the PDBench \abbrBIDB benchmark (see \Cref{app:subsec:experiment} for experimental results). Further, we can also argue the following result, recalling from~\Cref{sec:intro} for \abbrCTIDB $\pdb = \inparen{\worlds, \bpd}$, where $\tupset$ is the set of possible tuples across all possible worlds of $\pdb$.
\begin{Lemma}
\label{lem:ctidb-gamma}
Given $\raPlus$ query $\query$ and \abbrCTIDB $\pdb$, let \circuit be the circuit computed by $\query\inparen{\tupset}$. Then, for the reduced \abbrOneBIDB $\pdb'$ there exists an equivalent circuit \circuit' obtained from $\query\inparen{\tupset'}$, such that $\gamma\inparen{\circuit'}\leq 1 - \inparen{\bound}^{-\inparen{k-1}}$ with $\size\inparen{\circuit'} \leq \size\inparen{\circuit} + \bigO{\numvar\bound}$
and $\depth\inparen{\circuit'} = \depth\inparen{\circuit} + \bigO{\log{\bound}}$.
\end{Lemma}
We briefly connect the runtime in \Cref{eq:approx-algo-runtime} to the algorithm outline earlier (where we ignore the dependence on $\multc{\cdot}{\cdot}$, which is needed to handle the cost of arithmetic operations over integers). The $\size(\circuit)$ comes from the time taken to run \onepass once (\onepass essentially computes $\abs{\circuit}(1,\ldots, 1)$ using the natural circuit evaluation algorithm on $\circuit$). We make $\frac{\log{\frac{1}{\conf}}}{\inparen{\error'}^2\cdot(1-\gamma)^2\cdot \prob_0^{2k}}$ many calls to \sampmon (each of which essentially traces $O(k)$ random sink to source paths in $\circuit$ all of which by definition have length at most $\depth(\circuit)$).
Finally, we address the $\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}$ term in the runtime.
\begin{Lemma}
\label{lem:val-ub}
For any \emph{\abbrOneBIDB} circuit $\circuit$ with $\degree(\circuit)=k$, we have
$\abs{\circuit}(1,\ldots, 1)\le 2^{2^k\cdot \depth(\circuit)}.$
Further, if $\circuit$ is a tree, then we have $\abs{\circuit}(1,\ldots, 1)\le \size(\circuit)^{O(k)}.$
\end{Lemma}
Note that the above implies that with the assumption $\prob_0>0$ and $\gamma<1$ are absolute constants from \Cref{cor:approx-algo-const-p}, then the runtime there simplifies to $O_k\left(\frac 1{\inparen{\error'}^2}\cdot\size(\circuit)^2\cdot \log{\frac{1}{\conf}}\right)$ for general circuits $\circuit$. If $\circuit$ is a tree, then the runtime simplifies to $O_k\left(\frac 1{\inparen{\error'}^2}\cdot\size(\circuit)\cdot \log{\frac{1}{\conf}}\right)$, which then answers \Cref{prob:intro-stmt} with yes for such circuits.
Finally, note that by \Cref{prop:circuit-depth} and \Cref{lem:circ-model-runtime} for any $\raPlus$ query $\query$, there exists a circuit $\circuit^*$ for $\apolyqdt$ such that $\depth(\circuit^*)\le O_{|Q|}(\log{n})$ and $\size(\circuit)\le O_k\inparen{\qruntime{\query, \tupset, \bound}}$. Using this along with \Cref{lem:val-ub}, \Cref{cor:approx-algo-const-p} and the fact that $n\le \qruntime{\query, \tupset, \bound}$, we have the following corollary:
\begin{Corollary}
\label{cor:approx-algo-punchline}
Let $\query$ be an $\raPlus$ query and $\pdb$ be a \emph{\abbrOneBIDB} with $p_0>0$ and $\gamma<1$ (where $p_0,\gamma$ as in \Cref{cor:approx-algo-const-p}) are absolute constants. Let $\poly(\vct{X})=\apolyqdt$ for any result tuple $\tup$ with $\deg(\poly)=k$. Then one can compute an approximation satisfying \Cref{eq:approx-algo-bound-main} in time $O_{k,|Q|,\error',\conf}\inparen{\qruntime{\optquery{\query}, \tupset, \bound}}$ (given $\query,\tupset$ and $p_i$ for each $i\in [n]$ that defines $\pd$).
\end{Corollary}
Next, we note that the above result along with \Cref{lem:ctidb-gamma}
answers \Cref{prob:big-o-joint-steps} in the affirmative as follows:
\begin{Corollary}
\label{cor:approx-algo-punchline-ctidb}
Let $\query$ be an $\raPlus$ query and $\pdb$ be a \abbrCTIDB with $p_0>0$ (where $p_0$ as in \Cref{cor:approx-algo-const-p}) is an absolute constant. Let $\poly(\vct{X})=\apolyqdt$ for any result tuple $\tup$ with $\deg(\poly)=k$. Then one can compute an approximation satisfying \Cref{eq:approx-algo-bound-main} in time $O_{k,|Q|,\error',\conf,\bound}\inparen{\qruntime{\optquery{\query}, \tupset, \bound}}$ (given $\query,\tupset$ and $\prob_{\tup, j}$ for each $\tup\in\tupset,~j\in\pbox{\bound}$ that defines $\bpd$).
\end{Corollary}
\begin{proof}[Proof of~\Cref{cor:approx-algo-punchline-ctidb}]
The proof follows by~\Cref{lem:ctidb-gamma}, and~\Cref{cor:approx-algo-punchline}.
\end{proof}
\qed
If we want to approximate the expected multiplicities of all $Z=O(n^k)$ result tuples $\tup$ simultaneously, we just need to run the above result with $\conf$ replaced by $\frac \conf Z$. Note this increases the runtime by only a logarithmic factor.
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

101
arXiv/binarybidb.tex Normal file
View File

@ -0,0 +1,101 @@
%root: main.tex
%!TEX root=./main.tex
\section{Background and Notation}\label{sec:background}
\subsection{Polynomial Definition and Terminology}
Given an index set $S$ over variables $X_\tup$ for $\tup\in S$, a (general) polynomial $\genpoly$ over $\inparen{X_\tup}_{\tup \in S}$ with individual degree $\hideg <\infty$
is formally defined as:
\begin{align}
\label{eq:sop-form}
\genpoly\inparen{\inparen{X_\tup}_{\tup\in S}}=\sum_{\vct{d}\in\{0,\ldots,\hideg\}^{S}} c_{\vct{d}}\cdot \prod_{\tup\in S}X_\tup^{d_\tup}&&\text{ where } c_{\vct{d}}\in \semN.
\end{align}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Standard Monomial Basis]\label{def:smb}
The term $\prod_{\tup\in S} X_\tup^{d_\tup}$ in \Cref{eq:sop-form} is a {\em monomial}. A polynomial $\genpoly\inparen{\vct{X}}$ is in standard monomial basis (\abbrSMB) when we keep only the terms with $c_{\vct{d}}\ne 0$ from \Cref{eq:sop-form}.
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Unless othewise noted, we consider all polynomials to be in \abbrSMB representation.
When it is unclear, we use $\smbOf{\genpoly}~\inparen{\smbOf{\poly}}$ to denote the \abbrSMB form of a polynomial (lineage polynomial) $\genpoly~\inparen{\poly}$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Degree]\label{def:degree-of-poly}
The degree of polynomial $\genpoly(\vct{X})$ is the largest $\sum_{\tup\in S}d_\tup
$ for all $\vct{d}\in\inset{0,\ldots,\hideg}^S$
such that $c_{(d_1,\dots,d_n)}\ne 0$.
We denote the degree of $\genpoly$ as $\deg\inparen{\genpoly}$.
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
As an example, the degree of the polynomial $X^2+2XY^2+Y^2$ is $3$.
Product terms in lineage arise only from join operations (\Cref{fig:nxDBSemantics}), so intuitively, the degree of a lineage polynomial is analogous to the largest number of joins needed to produce a result tuple.
We call a polynomial $\poly\inparen{\vct{X}}$ a \emph{\abbrCTIDB-lineage polynomial} (
or simply lineage polynomial), if it is clear from context that there exists an $\raPlus$ query $\query$, \abbrCTIDB $\pdb$, and result tuple $\tup$ such that $\poly\inparen{\vct{X}} = \apolyqdt\inparen{\vct{X}}.$
\subsection{\abbrOneBIDB}\label{subsec:one-bidb}
\label{subsec:tidbs-and-bidbs}
\noindent A block independent database \abbrBIDB $\pdb'$ models a set of worlds each of which consists of a subset of the possible tuples $\tupset'$, where $\tupset'$ is partitioned into $\numblock$ blocks $\block_i$ and all $\block_i$ are independent random events. $\pdb'$ further constrains that all $\tup\in\block_i$ for all $i\in\pbox{\numblock}$ of $\tupset'$ be disjoint events. We refer to any monomial that includes $X_\tup X_{\tup'}$ for $\tup\neq\tup'\in\block_i$ as a \emph{cancellation}. We define next a specific construction of \abbrBIDB that is useful for our work.
\begin{Definition}[\abbrOneBIDB]\label{def:one-bidb}
Define a \emph{\abbrOneBIDB} to be the pair $\pdb' = \inparen{\bigtimes_{\tup\in\tupset'}\inset{0, \bound_\tup}, \bpd'},$ where $\tupset'$ is the set of possible tuples such that each $\tup \in \tupset'$ has a multiplicity domain of $\inset{0, \bound_\tup}$, with $\bound_\tup\in\mathbb{N}$. $\tupset'$ is partitioned into $\numblock$ independent blocks $\block_i,$ for $i\in\pbox{\numblock}$, of disjoint tuples. $\bpd'$ is characterized by the vector $\inparen{\prob_\tup}_{\tup\in\tupset'}$ where for every block $\block_i$, $\sum_{\tup \in \block_i}\prob_\tup \leq 1$. Given $W\in\onebidbworlds{\tupset'}$ and for $i\in\pbox{\numblock}$, let $\prob_i(W) = \begin{cases}
1 - \sum_{\tup\in\block_i}\prob_\tup & \text{if }W_\tup = 0\text{ for all }\tup\in\block_i\\
0 & \text{if there exists } \tup,~\tup'\in\block_i, W_\tup, W_{\tup'}\neq 0\\
\prob_\tup & W_\tup \ne 0 \text{ for the unique } t\in B_i.\\
\end{cases}$
\noindent$\bpd'$ is the probability distribution across all worlds such that, given $W\in\bigtimes_{\tup \in \tupset'}\inset{0,\bound_\tup}$, $\probOf\pbox{\worldvec = W} = \prod_{i\in\pbox{\numblock}}\prob_{i}(W)$.
\footnote{
We slightly abuse notation here, denoting a world vector as $W$ rather than $\worldvec$ to distinguish between the random variable and the world instance. When there is no ambiguity, we will denote a world vector as $\worldvec$.}
\end{Definition}
\Cref{fig:lin-poly-bidb} shows the lineage construction of $\poly'\inparen{\vct{X}}$ given $\raPlus$ query $\query$ for arbitrary deterministic $\gentupset'$. Note that the semantics differ from~\Cref{fig:nxDBSemantics} only in the base case.
\begin{Proposition}[\abbrCTIDB reduction]\label{prop:ctidb-reduct}
Given \abbrCTIDB $\pdb = \inparen{\worlds, \bpd}$, let $\pdb' = \inparen{\onebidbworlds{\tupset'}, \bpd'}$ be the \emph{\abbrOneBIDB} obtained in the following manner: for each $\tup\in\tupset$, create block $\block_\tup = \inset{\intup{\tup, j}_{j\in\pbox{\bound}}}$ of disjoint tuples, for all $j\in\pbox{\bound}$.
The probability distribution $\bpd'$ is the characterized by the vector $\vct{p} = \inparen{\inparen{\prob_{\tup, j}}_{\tup\in\tupset, j\in\pbox{\bound}}}$.
Then, the distributions $\mathcal{P}$ and $\mathcal{P}'$ are equivalent.
\end{Proposition}
We now define the reduced polynomial $\rpoly'$ of a \abbrOneBIDB.
\begin{figure}[t!]
\centering
\resizebox{\textwidth}{!}{
\begin{minipage}{\textwidth}
\begin{align*}
\poly'\pbox{\project_A\inparen{\query}, \gentupset', \tup_j} =& \sum_{\substack{\tup_{j'},\\\project_{A}\inparen{\tup_{j'}} = \tup_j}}\poly'\pbox{\query, \gentupset', \tup_{j'}} &
\poly'\pbox{\query_1\union\query_2, \gentupset', \tup_j} =& \poly'\pbox{\query_1, \gentupset', \tup_j}+\poly'\pbox{\query_2, \gentupset', \tup_j}\\
\poly'\pbox{\select_\theta\inparen{\query}, \gentupset', \tup_j} =& \begin{cases}\theta = 1&\poly'\pbox{\query, \gentupset', \tup_j}\\\theta = 0& 0\\\end{cases} &
\begin{aligned}
\poly'\pbox{\query_1\join\query_2, \gentupset', \tup_j} = \\~
\end{aligned} &
\begin{aligned}
&\poly'\pbox{\query_1, \gentupset', \project_{attr\inparen{\query_1}}\inparen{\tup_j}}\\ &~~~\cdot\poly'\pbox{\query_2, \gentupset', \project_{attr\inparen{\query_2}}\inparen{\tup_j}}
\end{aligned}\\
&&&\poly'\pbox{\rel,\gentupset', \tup_j} = j\cdot X_{\tup, j}.
\end{align*}\\[-10mm]
\end{minipage}}
\caption{Construction of the lineage (polynomial) for an $\raPlus$ query $\query$ over $\gentupset'$.}
\label{fig:lin-poly-bidb}
\end{figure}
\begin{Definition}[$\rpoly'$]\label{def:reduced-poly-one-bidb}
Given a polynomial $\poly'\inparen{\vct{X}}$ generated from a \abbrOneBIDB and let $\rpoly'\inparen{\vct{X}}$ denote the reduced form of $\poly'\inparen{\vct{X}}$ derived as follows: i) compute $\smbOf{\poly'\inparen{\vct{X}}}$ eliminating all monomials with cross terms $X_{\tup}X_{\tup'}$ for $\tup\neq \tup' \in \block_i$ and ii) reduce all \emph{variable} exponents $e > 1$ to $1$.
\end{Definition}
Then given $\worldvec\in\inset{0,1}^{\tupset'}$ over the reduced \abbrOneBIDB of~\Cref{prop:ctidb-reduct}, the disjoint requirement and the semantics for constructing the lineage polynomial over a \abbrOneBIDB, $\poly'\inparen{\worldvec}$ is of the same structure as the reformulated polynomial $\refpoly{}\inparen{\worldvec}$ of step i) from~\Cref{def:reduced-poly}, which then implies that $\rpoly'$ is the reduced polynomial that results from step ii) of both~\Cref{def:reduced-poly} and~\Cref{def:reduced-poly-one-bidb}, and further that~\Cref{lem:tidb-reduce-poly} immediately follows for \abbrOneBIDB polynomials.
\begin{Lemma}
Given any \emph{\abbrOneBIDB} $\pdb'$, $\raPlus$ query $\query$, and lineage polynomial
$\poly'\inparen{\vct{X}}=\poly'\pbox{\query,\tupset',\tup}\inparen{\vct{X}}$, it holds that $
\expct_{\vct{W} \sim \pdassign'}\pbox{\poly'\inparen{\vct{W}}} = \rpoly'\inparen{\probAllTup}.
$
\end{Lemma}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

BIN
arXiv/cc-by.pdf Normal file

Binary file not shown.

View File

@ -0,0 +1,46 @@
%!TEX root=./main.tex
\subsection{Relationship to Deterministic Query Runtimes}\label{sec:gen}
In~\Cref{sec:intro}, we introduced the structure $T_{det}\inparen{\cdot}$ to analyze the runtime complexity of~\Cref{prob:expect-mult}.
To decouple our results from specific join algorithms, we first lower bound the cost of a join.
\begin{Definition}[Join Cost]
\label{def:join-cost}
Denote by $\jointime{R_1, \ldots, R_m}$ the runtime of an algorithm for computing the $m$-ary join $R_1 \bowtie \ldots \bowtie R_m$.
We require only that the algorithm must enumerate its output, i.e., that $\jointime{R_1, \ldots, R_m} \geq |R_1 \bowtie \ldots \bowtie R_m|$. With this definition of $\jointime{\cdot}$, worst-case optimal join algorithms are handled.
\end{Definition}
Worst-case optimal join algorithms~\cite{skew,ngo-survey} and query evaluation via factorized databases~\cite{factorized-db} (as well as work on FAQs~\cite{DBLP:conf/pods/KhamisNR16}) can be modeled as $\raPlus$ queries (though the query size is data dependent).
For these algorithms, $\jointime{R_1, \ldots, R_n}$ is linear in the {\em AGM bound}~\cite{AGM}.
Our cost model for general query evaluation follows from the join cost:
\noindent\resizebox{1\linewidth}{!}{
\begin{minipage}{1.0\linewidth}
\begin{align*}
\qruntimenoopt{R,\gentupset,\bound} & = |\gentupset.R| &
\qruntimenoopt{\sigma \query, \gentupset,\bound} & = \qruntimenoopt{\query,\gentupset} &
\qruntimenoopt{\pi \query, \gentupset,\bound} & = \qruntimenoopt{\query,\gentupset,\bound} + \abs{\query(\gentupset)}
\end{align*}\\[-15mm]
\begin{align*}
\qruntimenoopt{\query \cup \query', \gentupset,\bound} & = \qruntimenoopt{\query, \gentupset,\bound} +
\qruntimenoopt{\query', \gentupset,\bound} +
\abs{\query\inparen{\gentupset}}+\abs{\query'\inparen{\gentupset}} \\
\qruntimenoopt{\query_1 \bowtie \ldots \bowtie \query_m, \gentupset,\bound}
& = \qruntimenoopt{\query_1, \gentupset,\bound} + \ldots +
\qruntimenoopt{\query_m,\gentupset,\bound} +
\jointime{\query_1(\gentupset), \ldots, \query_m(\gentupset)}
\end{align*}
\end{minipage}
}\\
Under this model, an $\raPlus$ query $\query$ evaluated over database $\gentupset$ has runtime $O(\qruntimenoopt{Q,\gentupset, \bound})$.
We assume that full table scans are used for every base relation access. We can model index scans by treating an index scan query $\sigma_\theta(R)$ as a base relation.
\Cref{lem:circ-model-runtime} and \Cref{lem:tlc-is-the-same-as-det} show that for any $\raPlus$ query $\query$ and $\tupset$, there exists a circuit $\circuit^*$ such that $\timeOf{\abbrStepOne}(Q,\tupset,\circuit^*)$ and $|\circuit^*|$ are both $O(\qruntimenoopt{\optquery{\query}, \tupset,\bound})$. Recall we assumed these two bounds when we moved from \Cref{prob:big-o-joint-steps} to \Cref{prob:intro-stmt}. Lastly, we can handle FAQs and factorized databases by allowing for optimization, i.e. $\optquery{\query}$.
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

Binary file not shown.

14
arXiv/conclusions.tex Normal file
View File

@ -0,0 +1,14 @@
%!TEX root=./main.tex
\section{Conclusions and Future Work}\label{sec:concl-future-work}
We have studied the problem of calculating the expected multiplicity of a bag-query result tuple,
a problem that has a practical application in probabilistic databases over multisets.
We show that under various parameterized complexity hardness results/conjectures computing the expected multiplicities exactly is not possible in time linear in the corresponding deterministic query processing time.
We prove that it is possible to approximate the expectation of a lineage polynomial in linear time
in the deterministic query processing over TIDBs and BIDBs (assuming that there are few cancellations).
Interesting directions for future work include development of a dichotomy for bag \abbrPDB\xplural. While we can handle higher moments (this follows fairly easily from our existing results-- see \Cref{sec:momemts}), more general approximations are an interesting area for exploration, including those for more general data models.
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

26
arXiv/experiments.tex Normal file
View File

@ -0,0 +1,26 @@
% root: main.tex
Recall that by definition of $\abbrBIDB$, a query result cannot be derived by a self-join between non-identical tuples belonging to the same block. Note, that by \Cref{cor:approx-algo-const-p}, $\gamma$ must be a constant in order for \Cref{alg:mon-sam} to acheive linear time. We would like to determine experimentally whether queries over $\abbrBIDB$ instances in practice generate a constant number of cancellations or not. Such an experiment would ideally use a database instance with queries both considered to be typical representations of what is seen in practice.
We ran our experiments using Windows 10 WSL Operating System with an Intel Core i7 2.40GHz processor and 16GB RAM. All experiments used the PostgreSQL 13.0 database system.
For the data we used the MayBMS data generator~\cite{pdbench} tool to randomly generate uncertain versions of TPCH tables. The queries computed over the database instance are $\query_1$, $\query_2$, and $\query_3$ from~\cite{Antova_fastand}, all of which are modified versions of TPC-H queries $\query_3$, $\query_6$, and $\query_7$ where all aggregations have been dropped.
As written, the queries disallow $\abbrBIDB$ cross terms. We first ran all queries, noting the result size for each. Next the queries were rewritten so as not to filter out the cross terms. The comparison of the sizes of both result sets should then suggest in one way or another whether or not there exist many cross terms in practice. As seen, the experimental query results contain little to no cancelling terms. \Cref{fig:experiment-bidb-cancel} shows the result sizes of the queries, where column CF is the result size when all cross terms are filtered out, column CI shows the number of output tuples when the cancelled tuples are included in the result, and the last column is the value of $\gamma$. The experiments show $\gamma$ to be in a range between $[0, 0.1]\%$, indicating that only a negligible or constant (compare the result sizes of $\query_1 < \query_2$ and their respective $\gamma$ values) amount of tuples are cancelled in practice when running queries over a typical \abbrBIDB instance. Interestingly, only one of the three queries had tuples that violated the \abbrBIDB constraint.
To conclude, the results in \Cref{fig:experiment-bidb-cancel} show experimentally that $\gamma$ is negligible in practice for BIDB queries. We also observe that (i) tuple presence is independent across blocks, so the corresponding probabilities (and hence $\prob_0$) are independent of the number of blocks, and (ii) \bis model uncertain attributes, so block size (and hence $\gamma$) is a function of the ``messiness'' of a dataset, rather than its size.
Thus, we expect \Cref{cor:approx-algo-const-p} to hold in general.
\begin{figure}[ht]
\begin{tabular}{ c | c c c}\label{tbl:cancel}
Query & CF & CI & $\gamma$\\
\hline
$\query_1$ & $46,714$ & $46,768$ & $0.1\%$\\
$\query_2$ & $179.917$ & $179,917$ & $0\%$\\
$\query_3$ & $11,535$ & $11,535$ & $0\%$\\
\end{tabular}
\caption{Number of Cancellations for Queries Over $\abbrBIDB$.}
\label{fig:experiment-bidb-cancel}
\end{figure}

254
arXiv/introduction.tex Normal file
View File

@ -0,0 +1,254 @@
%!TEX root=./main.tex
%root: main.tex
\section{Introduction}\label{sec:intro}
This work explores the problem of computing the expectation of the multiplicity of a tuple in the result of a query over a \abbrCTIDB, a type of probabilistic database with bag semantics where the multiplicity of a tuple is a random variable with range $[0,\bound]$ for some fixed constant $\bound$ and multiplicities assigned to any two tuples are independent of each other.
Formally, a \abbrCTIDB,
$\pdb = \inparen{\worlds, \bpd}$ consists of a set of tuples $\tupset$ and a probability distribution $\bpd$ over all possible worlds generated by assigning each tuple $\tup \in \tupset$ a multiplicity in the range $[0,\bound]$.
Any such world can be encoded as a vector from $\worlds$, the set of all vectors of length $\numvar=\abs{\tupset}$ such that each index corresponds to a distinct $\tup \in \tupset$ storing its multiplicity.
A given world $\worldvec \in\worlds$ can be interpreted as follows: for each $\tup \in \tupset$, $\worldvec_{\tup}$ is the multiplicity of $\tup$ in $\worldvec$. Given that the multiplicities of tuples are independent events, the probability distribution $\bpd$ can be expressed compactly by assigning each tuple a (disjoint) probability distribution over $[0,\bound]$. Let $\prob_{\tup,j}$ denote the probability that tuple $\tup$ is assigned multiplicity $j$. The probability of a particular world $\worldvec$ is then $\prod_{\tup \in \tupset} \prob_{\tup,\worldvec_{\tup}}$.
Allowing for $\leq \bound$ multiplicities across all tuples gives rise to having $\leq \inparen{\bound+1}^\numvar$ possible worlds instead of the usual $2^\numvar$ possible worlds of a $1$-\abbrTIDB, which (assuming set query semantics), is the same as the traditional set \abbrTIDB.
In this work, since we are generally considering bag query input, we will only be considering bag query semantics. We denote by $\query\inparen{\worldvec}\inparen{\tup}$ the multiplicity of $\tup$ in query $\query$ over possible world $\worldvec\in\worlds$.
We can formally state our problem of computing the expected multiplicity of a result tuple as:
\begin{Problem}\label{prob:expect-mult}
Given a \abbrCTIDB $\pdb = \inparen{\worlds, \bpd}$, $\raPlus$ query\footnote{
An $\raPlus$ query is a query expressed in positive relational algebra, i.e., using only the relational algebra operators selection ($\select$), projection ($\project$), natural join ($\join$) and union ($\union$).
}
$\query$, and result tuple $\tup$, compute the expected multiplicity of $\tup$: $\expct_{\rvworld\sim\bpd}\pbox{\query\inparen{\rvworld}\inparen{\tup}}$.
\end{Problem}
It is natural to explore computing the expected multiplicity of a result tuple as this is the analog for computing the marginal probability of a tuple in a set \abbrPDB.
In this work we will assume that $c =\bigO{1}$ since this is what is typically seen in practice.
Allowing for unbounded $c$ is an interesting open problem.
\mypar{Hardness of Set Query Semantics and Bag Query Semantics}
Set query evaluation semantics over $1$-\abbrTIDB\xplural have been studied extensively, and the data complexity of the problem in general has been shown by Dalvi and Suicu to be \sharpphard\cite{10.1145/1265530.1265571}. For our setting, there exists a trivial polytime algorithm to compute~\Cref{prob:expect-mult} for any $\raPlus$ query over a \abbrCTIDB due to linearity of expection (see~\Cref{sec:intro-poly-equiv}).
Since we can compute~\Cref{prob:expect-mult} in polynomial time, the interesting question that we explore deals with analyzing the hardness of computing expectation using fine-grained analysis and parameterized complexity, where we are interested in the exponent of polynomial runtime.
Specifically, in this work we ask if~\Cref{prob:expect-mult} can be solved in time linear in the runtime of an analogous deterministic query which we make more precise shortly.
If this is true, then this would open up the way for deployment of \abbrCTIDB\xplural in practice. To analyze this question we denote by $\timeOf{}^*(Q,\pdb)$ the optimal runtime complexity of computing~\Cref{prob:expect-mult} over \abbrCTIDB $\pdb$.
Let $\qruntime{\query,\gentupset,\bound}$ (see~\Cref{sec:gen} for further details) denote the runtime for query $\query$, deterministic database $\gentupset$, and multiplicity bound $\bound$. This paper considers $\raPlus$ queries for which order of operations is \emph{explicit}, as opposed to other query languages, e.g. Datalog, UCQ. Thus, since order of operations affects runtime, we denote the optimized $\raPlus$ query picked by an arbitrary production system as $\optquery{\query} = \min_{\query'\in\raPlus, \query'\equiv\query}\qruntime{\query', \gentupset, \bound}$. Then $\qruntime{\optquery{\query}, \gentupset,\bound}$ is the runtime for the optimized query.\footnote{Note that our work applies to any $\query \in\raPlus$, which implies that specific heuristics for choosing an optimized query can be abstracted away, i.e., our work does not consider heuristic techniques.}
\begin{table}[t!]
\begin{tabular}{|p{0.43\textwidth}|p{0.12\textwidth}|p{0.35\textwidth}|}
\hline
\textbf{Lower bound on $\timeOf{}^*(\qhard,\pdb)$} & \textbf{Num.} $\bpd$s
& \textbf{Hardness Assumption}\\
\hline
$\Omega\inparen{\inparen{\qruntime{\optquery{\qhard}, \tupset, \bound}}^{1+\eps_0}}$ for {\em some} $\eps_0>0$ & Single & Triangle Detection hypothesis\\
$\omega\inparen{\inparen{\qruntime{\optquery{\qhard}, \tupset, \bound}}^{C_0}}$ for {\em all} $C_0>0$ & Multiple &$\sharpwzero\ne\sharpwone$\\
$\Omega\inparen{\inparen{\qruntime{\optquery{\qhard}, \tupset, \bound}}^{c_0\cdot k}}$ for {\em some} $c_0>0$ & Multiple & \Cref{conj:known-algo-kmatch}\\
\hline
\end{tabular}
\caption{Our lower bounds for a specific hard query $\qhard$ parameterized by $k$. For $\pdb = \inset{\worlds, \bpd}$ those with `Multiple' in the second column need the algorithm to be able to handle multiple $\bpd$, i.e. probability distributions (for a given $\tupset$). The last column states the hardness assumptions that imply the lower bounds in the first column ($\eps_o,C_0,c_0$ are constants that are independent of $k$).}
\label{tab:lbs}
\end{table}
\mypar{Our lower bound results}
Our question is whether or not it is always true that $\timeOf{}^*\inparen{\query, \pdb}\leq\qruntime{\optquery{\query}, \tupset, \bound}$. Unfortunately this is not the case.
~\Cref{tab:lbs} shows our results.
Specifically, depending on what hardness result/conjecture we assume, we get various weaker or stronger versions of {\em no} as an answer to our question. To make some sense of the other lower bounds in \Cref{tab:lbs}, we note that it is not too hard to show that $\timeOf{}^*(Q,\pdb) \le \bigO{\inparen{\qruntime{\optquery{\query}, \tupset, \bound}}^k}$, where $k$ is the join width (our notion of join width follows from~\Cref{def:degree-of-poly} and~\Cref{fig:nxDBSemantics}.) of the query $\query$ over all result tuples $\tup$ (and the parameter that defines our family of hard queries).
What our lower bound in the third row says is that one cannot get more than a polynomial improvement over essentially the trivial algorithm for~\Cref{prob:expect-mult}.
However, this result assumes a hardness conjecture that is not as well studied as those in the first two rows of the table (see \Cref{sec:hard} for more discussion on the hardness assumptions). Further, we note that existing results\footnote{This claim follows from known results for the problem of counting $k$-cliques, where for a query $\query$ over database $\tupset$ that counts the number of $k$-cliques. Specifically, a lower bound of the form $\Omega\inparen{n^{1+\eps_0}}$ for {\em some} $\eps_0>0$ follows from the triangle detection hypothesis (this like our result is for $k=3$). Second, a lower bound of $\omega\inparen{n^{C_0}}$ for {\em all} $C_0>0$ under the assumption $\sharpwzero\ne\sharpwone$ for counting $k$-clique~\cite{10.5555/645413.652181}. Finally, a lower bound of $\Omega\inparen{n^{c_0\cdot k}}$ for {\em some} $c_0>0$ was shown by~\cite{CHEN20061346} (under the strong exponential time hypothesis).
} already imply the claimed lower bounds if we were to replace the $\qruntime{\optquery{\query}, \tupset, \bound}$ by just $\numvar$ (indeed these results follow from known lower bounds for deterministic query processing). Our contribution is to then identify a family of hard queries where deterministic query processing is `easy' but computing the expected multiplicities is hard.
\mypar{Our upper bound results} We introduce a $(1\pm \epsilon)$-approximation algorithm that computes ~\Cref{prob:expect-mult} in time $O_\epsilon\inparen{\qruntime{\optquery{\query}, \tupset, \bound}}$. This means, when we are okay with approximation, that we solve~\Cref{prob:expect-mult} in time linear in the size of the deterministic query and bag \abbrPDB\xplural are deployable in practice.
In contrast, known approximation techniques (\cite{DBLP:conf/icde/OlteanuHK10,DBLP:journals/jal/KarpLM89}) in set-\abbrPDB\xplural need time $\Omega(\qruntime{\optquery{\query}, \tupset, \bound}^{2k})$
(see \Cref{sec:karp-luby}).
Further, our approximation algorithm works for a more general notion of bag \abbrPDB\xplural beyond \abbrCTIDB\xplural
(see \Cref{subsec:tidbs-and-bidbs}).
\subsection{Polynomial Equivalence}\label{sec:intro-poly-equiv}
A common encoding of probabilistic databases (e.g., in \cite{IL84a,Imielinski1989IncompleteII,Antova_fastand,DBLP:conf/vldb/AgrawalBSHNSW06} and many others) relies on annotating tuples with lineages or propositional formulas that describe the set of possible worlds that the tuple appears in. The bag semantics analog is a provenance/lineage polynomial (see~\Cref{fig:nxDBSemantics}) $\apolyqdt$~\cite{DBLP:conf/pods/GreenKT07}, a polynomial with non-zero integer coefficients and exponents, over variables $\vct{X}$ encoding input tuple multiplicities. Evaluating a lineage polynomial for a query result tuple $t_{out}$ by, for each tuple $\tup_{in}$, assigning the variable $X_{t_{in}}$ encoding the tuple's multiplicity to the tuple's multiplicity in the possible world yields the multiplicity of the $\tup_{out}$ in the query result for this world.
\begin{figure}[b!]
\begin{align*}
\polyqdt{\project_A(\query)}{\gentupset}{\tup} =& \sum_{\tup': \project_A(\tup') = \tup} \polyqdt{\query}{\gentupset}{\tup'} &
\polyqdt{\query_1 \union \query_2}{\gentupset}{\tup} =& \polyqdt{\query_1}{\gentupset}{\tup} + \polyqdt{\query_2}{\gentupset}{\tup}\\
\polyqdt{\select_\theta(\query)}{\gentupset}{\tup} =& \begin{cases}
\polyqdt{\query}{\gentupset}{\tup} & \text{if }\theta(\tup) \\
0 & \text{otherwise}.
\end{cases} &
\begin{aligned}
\polyqdt{\query_1 \join \query_2}{\gentupset}{\tup} =\\ ~
\end{aligned}&
\begin{aligned}
&\polyqdt{\query_1}{\gentupset}{\project_{\attr{\query_1}}{\tup}} \\
&~~~\cdot\polyqdt{\query_2}{\gentupset}{\project_{\attr{\query_2}}{\tup}}
\end{aligned}\\
& & & \polyqdt{\rel}{\gentupset}{\tup} = X_\tup
\end{align*}\\[-10mm]
\caption{Construction of the lineage (polynomial) for an $\raPlus$ query $\query$ over an arbitrary deterministic database $\gentupset$, where $\vct{X}$ consists of all $X_\tup$ over all $\rel$ in $\gentupset$ and $\tup$ in $\rel$. Here $\gentupset.\rel$ denotes the instance of relation $\rel$ in $\gentupset$. Please note, after we introduce the reduction to $1$-\abbrBIDB, the base case will be expressed alternatively.}
\label{fig:nxDBSemantics}
\end{figure}
We drop $\query$, $\tupset$, and $\tup$ from $\apolyqdt$ when they are clear from the context or irrelevant to the discussion. We now specify the problem of computing the expectation of tuple multiplicity in the language of lineage polynomials:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Problem}[Expected Multiplicity of Lineage Polynomials]\label{prob:bag-pdb-poly-expected}
Given an $\raPlus$ query $\query$, \abbrCTIDB $\pdb$ and result tuple $\tup$, compute the expected
multiplicity of the polynomial $\apolyqdt$ (i.e., $\expct_{\vct{W}\sim \pdassign}\pbox{\apolyqdt(\vct{W})}$, where $\vct{W} \in \worlds$).
\end{Problem}
We note that computing \Cref{prob:expect-mult}
is equivalent (yields the same result as) to computing \Cref{prob:bag-pdb-poly-expected} (see \Cref{prop:expection-of-polynom}).
All of our results rely on working with a {\em reduced} form $\inparen{\rpoly}$ of the lineage polynomial $\poly$. In fact, it turns out that for the $1$-\abbrTIDB case, computing the expected multiplicity (over bag query semantics) is {\em exactly} the same as evaluating this reduced polynomial over the probabilities that define the $1$-\abbrTIDB. This is also true when the query input(s) is a block independent disjoint probabilistic database~\cite{DBLP:conf/icde/OlteanuHK10} (bag query semantics with tuple multiplicity at most $1$), for which the proof of~\Cref{lem:tidb-reduce-poly} (introduced shortly) holds .
Next, we motivate this reduced polynomial.
Consider the query $\query_1$ defined as follows over the bag relations of \Cref{fig:two-step}:
\begin{lstlisting}
SELECT DISTINCT 1 FROM T $t_1$, R r, T $t_2$
WHERE $t_1$.Point = r.Point$_1$ AND $t_2$.Point = r.Point$_2$
\end{lstlisting}
It can be verified that $\poly\inparen{A, B, C, E, X, Y, Z}$ for the sole result tuple of $\query_1$ is $AXB + BYE + BZC$. Now consider the product query $\query_1^2 = \query_1 \times \query_1$.
The lineage polynomial for $Q_1^2$ is given by $\poly_1^2\inparen{A, B, C, E, X, Y, Z}$
$$
=A^2X^2B^2 + B^2Y^2E^2 + B^2Z^2C^2 + 2AXB^2YE + 2AXB^2ZC + 2B^2YEZC.
$$
To compute $\expct\pbox{\poly_1^2}$ we can use linearity of expectation and push the expectation through each summand. To keep things simple, let us focus on the monomial $\poly_1^{\inparen{ABX}^2} = A^2X^2B^2$ as the procedure is the same for all other monomials of $\poly_1^2$. Let $\randWorld_X$ be the random variable corresponding to a lineage variable $X$. Because the distinct variables in the product are independent, we can push expectation through them yielding $\expct\pbox{\randWorld_A^2\randWorld_X^2\randWorld_B^2}=\expct\pbox{\randWorld_A^2}\expct\pbox{\randWorld_X^2}\expct\pbox{\randWorld_B^2}$. Since $\randWorld_A, \randWorld_B\in \inset{0, 1}$ we can further derive $\expct\pbox{\randWorld_A}\expct\pbox{\randWorld_X^2}\expct\pbox{\randWorld_B}$ by the fact that for any $W\in \inset{0, 1}$, $W^2 = W$. Observe that if $X\in\inset{0, 1}$, then we further would have $\expct\pbox{\randWorld_A}\expct\pbox{\randWorld_X}\expct\pbox{\randWorld_B} = \prob_A\cdot\prob_X\cdot\prob_B$ (denoting $\probOf\pbox{\randWorld_A = 1} = \prob_A$) $= \rpoly_1^{\inparen{ABX}^2}\inparen{\prob_A, \prob_X, \prob_B}$ (see $ii)$ of~\Cref{def:reduced-poly}). However, in this example, we get stuck with $\expct\pbox{\randWorld_X^2}$, since $\randWorld_X\in\inset{0, 1, 2}$ and for $\randWorld_X \gets 2$, $\randWorld_X^2 \neq \randWorld_X$.
Denote the variables of $\poly$ to be $\vars{\poly}.$ In the \abbrCTIDB setting, $\poly\inparen{\vct{X}}$ has an equivalent reformulation $\inparen{\refpoly{}\inparen{\vct{X_R}}}$ that is of use to us, where $\abs{\vct{X_R}} = \bound\cdot\abs{\vct{X}}$ . Given $X_\tup \in\vars{\poly}$ and integer valuation $X_\tup \in\inset{0,\ldots, c}$. We can replace $X_\tup$ by $\sum_{j\in\pbox{\bound}}jX_{\tup, j}$ where the variables $\inparen{X_{\tup, j}}_{j\in\pbox{\bound}}$ are disjoint with integer assignments $X_\tup\in\inset{0, 1}$. Then for any $\worldvec\in\worlds$ and corresponding reformulated world $\worldvec_{\vct{R}}\in\inset{0, 1}^{\tupset\bound}$, we set $\worldvec_{\vct{R}_{\tup, j}} = 1$ for $\worldvec_\tup = j$, while $\worldvec_{\vct{R}_{\tup, j'}} = 0$ for all $j'\neq j\in\pbox{\bound}$. By construction then $\poly\inparen{\vct{X}}\equiv\refpoly{}\inparen{\vct{X_R}}$ $\inparen{\vct{X_R} = \vars{\refpoly{}}}$ since for any integer valuation $X_\tup\in\pbox{\bound}$, $X_j\in\inset{0, 1}$ we have the equality $X_\tup = j = \sum_{j\in\pbox{\bound}}jX_j$.
Considering again our example,
\begin{multline*}
\refpoly{1, }^{\inparen{ABX}^2}\inparen{A, X, B} = \poly_1^{\inparen{AXB}^2}\inparen{\sum_{j_1\in\pbox{\bound}}j_1A_{j_1}, \sum_{j_2\in\pbox{\bound}}j_2X_{j_2}, \sum_{j_3\in\pbox{\bound}}j_3B_{j_3}} \\
= \inparen{\sum_{j_1\in\pbox{\bound}}j_1A_{j_1}}^2\inparen{\sum_{j_2\in\pbox{\bound}}j_2X_{j_2}}^2\inparen{\sum_{j_3\in\pbox{\bound}}j_3B_{j_3}}^2.
\end{multline*}
Since the set of multiplicities for tuple $\tup$ by nature are disjoint we can drop all cross terms and have $\refpoly{1, }^2 = \sum_{j_1, j_2, j_3 \in \pbox{\bound}}j_1^2A^2_{j_1}j_2^2X_{j_2}^2j_3^2B^2_{j_3}$. Computing expectation we get $\expct\pbox{\refpoly{1, }^2}=\sum_{j_1,j_2,j_3\in\pbox{\bound}}j_1^2j_2^2j_3^2\expct\pbox{\randWorld_{A_{j_1}}}\expct\pbox{\randWorld_{X_{j_2}}}\expct\pbox{\randWorld_{B_{j_3}}}$, since we now have that all $\randWorld_{X_j}\in\inset{0, 1}$.
This leads us to consider a structure related to the lineage polynomial.
\begin{Definition}\label{def:reduced-poly}
For any polynomial $\poly\inparen{\inparen{X_\tup}_{\tup\in\tupset}}$ define the reformulated polynomial $\refpoly{}\inparen{\inparen{X_{\tup, j}}_{\tup\in\tupset, j\in\pbox{\bound}}}
$ to be the polynomial $\refpoly{}$ = $\poly\inparen{\inparen{\sum_{j\in\pbox{\bound}}j\cdot X_{\tup, j}}_{\tup\in\tupset}}
$ and ii) define the \emph{reduced polynomial} $\rpoly\inparen{\inparen{X_{\tup, j}}_{\tup\in\tupset, j\in\pbox{\bound}}}
$ to be the polynomial resulting from converting $\refpoly{}$ into the standard monomial basis (\abbrSMB),
\footnote{
This is the representation, typically used in set-\abbrPDB\xplural, where the polynomial is reresented as sum of `pure' products. See \Cref{def:smb} for a formal definition.
}
removing all monomials containing the term $X_{\tup, j}X_{\tup, j'}$ for $\tup\in\tupset, j\neq j'\in\pbox{c}$, and setting all \emph{variable} exponents $e > 1$ to $1$.
\end{Definition}
Continuing with the example
\footnote{
To save clutter we do not show the full expansion for variables with greatest multiplicity $= 1$ since e.g. for variable $A$, the sum of products itself evaluates to $1^2\cdot A^2 = A$.
}
$\poly_1^2\inparen{A, B, C, E, X_1, X_2, Y, Z}$ we have
\begin{multline*}
\rpoly_1^2(A, B, C, E, X_1, X_2, Y, Z) = \\
A\inparen{\sum\limits_{j\in\pbox{\bound}}j^2X_j}B + BYE + BZC + 2A\inparen{\sum\limits_{j\in\pbox{\bound}}j^2X_j}BYE + 2A\inparen{\sum\limits_{j\in\pbox{\bound}}j^2X_j}BZC + 2BYEZC =\\
ABX_1 + AB\inparen{2}^2X_2 + BYE + BZC + 2AX_1BYE + 2A\inparen{2}^2X_2BYE + 2AX_1BZC + 2A\inparen{2}^2X_2BZC + 2BYEZC.
\end{multline*}
Note that we have argued that for our specific example the expectation that we want is $\rpoly_1^2(\probOf\inparen{A=1},$ $\probOf\inparen{B=1}, \probOf\inparen{C=1}), \probOf\inparen{E=1}, \probOf\inparen{X_1=1}, \probOf\inparen{X_2=1}, \probOf\inparen{Y=1}, \probOf\inparen{Z=1})$.
\Cref{lem:tidb-reduce-poly} generalizes the equivalence to {\em all} $\raPlus$ queries on \abbrCTIDB\xplural (proof in \Cref{subsec:proof-exp-poly-rpoly}).
\begin{Lemma}\label{lem:tidb-reduce-poly}
For any \abbrCTIDB $\pdb$, $\raPlus$ query $\query$, and lineage polynomial
$\poly\inparen{\vct{X}}=\poly\pbox{\query,\tupset,\tup}\inparen{\vct{X}}$, it holds that $
\expct_{\vct{W} \sim \pdassign}\pbox{\refpoly{}\inparen{\vct{W}}} = \rpoly\inparen{\probAllTup}
$, where $\probAllTup = \inparen{\inparen{\prob_{\tup, j}}_{\tup\in\tupset, j\in\pbox{c}}}.$
\end{Lemma}
\subsection{Our Techniques}
\mypar{Lower Bound Proof Techniques}
Our main hardness result shows that computing~\Cref{prob:expect-mult} is $\sharpwonehard$ for $1$-\abbrTIDB. To prove this result we show that for the same $\query_1$ from the example above, for an arbitrary `product width' $k$, the query $\qhard^k$ is able to encode various hard graph-counting problems (assuming $\bigO{\numvar}$ tuples rather than the $\bigO{1}$ tuples in \Cref{fig:two-step}).
We do so by considering an arbitrary graph $G$ (analogous to relation $\boldsymbol{R}$ of $\query_1$) and analyzing how the coefficients in the (univariate) polynomial $\widetilde{\poly}\left(p,\dots,p\right)$ relate to counts of subgraphs in $G$ that are isomorphic to various graphs with $k$ edges. E.g., we exploit the fact that the coefficient corresponding to the power of $2k$ in $\poly$ of $\qhard^k$ is proportional to the number of $k$-matchings in $G$,
a known hard problem in parameterized/fine-grained complexity literature.
\mypar{Upper Bound Techniques}
Our negative results (\Cref{tab:lbs}) indicate that \abbrCTIDB{}s (even for $\bound=1$) can not achieve comparable performance to deterministic databases for exact results (under complexity assumptions). In fact, under plausible hardness conjectures, one cannot (drastically) improve upon the trivial algorithm to exactly compute the expected multiplicities for $1$-\abbrTIDB\xplural. A natural followup is whether we can do better if we are willing to settle for an approximation to the expected multiplities.
\input{two-step-model}
We adopt a two-step intensional model of query evaluation used in set-\abbrPDB\xplural, as illustrated in \Cref{fig:two-step}:
(i) \termStepOne (\abbrStepOne): Given input $\tupset$ and $\query$, output every tuple $\tup$ that possibly satisfies $\query$, annotated with its lineage polynomial ($\poly(\vct{X})=\apolyqdt\inparen{\vct{X}}$);
(ii) \termStepTwo (\abbrStepTwo): Given $\poly(\vct{X})$ for each tuple, compute $\expct_{\randWorld\sim\bpd}\pbox{\poly(\vct{\randWorld})}$.
Let $\timeOf{\abbrStepOne}(Q,\tupset,\circuit)$ denote the runtime of \abbrStepOne when it outputs $\circuit$ (which is a representation of $\poly$ as an arithmetic circuit --- more on this representation in~\Cref{sec:expression-trees}).
Denote by $\timeOf{\abbrStepTwo}(\circuit, \epsilon)$ (recall $\circuit$ is the output of \abbrStepOne) the runtime of \abbrStepTwo, which we can leverage~\Cref{def:reduced-poly} and~\Cref{lem:tidb-reduce-poly} to address the next formal objective:
\begin{Problem}[\abbrCTIDB linear time approximation]\label{prob:big-o-joint-steps}
Given \abbrCTIDB $\pdb$, $\raPlus$ query $\query$,
is there a $(1\pm\epsilon)$-approximation of $\expct_{\rvworld\sim\bpd}\pbox{\query\inparen{\rvworld}\inparen{\tup}}$ for all result tuples $\tup$ where
$\exists \circuit : \timeOf{\abbrStepOne}(Q,\tupset, \circuit) + \timeOf{\abbrStepTwo}(\circuit, \epsilon) \le O_\epsilon(\qruntime{\optquery{\query}, \tupset, \bound})$?
\end{Problem}
We show in \Cref{sec:circuit-depth} an $\bigO{\qruntime{\optquery{\query}, \tupset, \bound}}$ algorithm for constructing the lineage polynomial for all result tuples of an $\raPlus$ query $\query$ (or more more precisely, a single circuit $\circuit$ with one sink per tuple representing the tuple's lineage).
A key insight of this paper is that the representation of $\circuit$ matters.
For example, if we insist that $\circuit$ represent the lineage polynomial in \abbrSMB, the answer to the above question in general is no, since then we will need $\abs{\circuit}\ge \Omega\inparen{\inparen{\qruntime{\optquery{\query}, \tupset, \bound}}^k}$,
and hence, just $\timeOf{\abbrStepOne}(\query,\tupset,\circuit)$ will be too large.
However, systems can directly emit compact, factorized representations of $\poly(\vct{X})$ (e.g., as a consequence of the standard projection push-down optimization~\cite{DBLP:books/daglib/0020812}).
For example, in~\Cref{fig:two-step}, $B(Y+Z)$ is a factorized representation of the SMB-form $BY+BZ$.
Accordingly, this work uses (arithmetic) circuits\footnote{
An arithmetic circuit is a DAG with variable and/or numeric source nodes and internal, each nodes representing either an addition or multiplication operator.
}
as the representation system of $\poly(\vct{X})$.
Given that there exists a representation $\circuit^*$ such that $\timeOf{\abbrStepOne}(\query,\tupset,\circuit^*)\le \bigO{\qruntime{\optquery{\query}, \tupset, \bound}}$, we can now focus on the complexity of the \abbrStepTwo step.
We can represent the factorized lineage polynomial by its correspoding arithmetic circuit $\circuit$ (whose size we denote by $|\circuit|$).
As we also show in \Cref{sec:circuit-runtime}, this size is also bounded by $\qruntime{\optquery{\query}, \tupset, \bound}$ (i.e., $|\circuit^*| \le \bigO{\qruntime{\optquery{\query}, \tupset, \bound}}$).
Thus, the question of approximation
can be stated as the following stronger (since~\Cref{prob:big-o-joint-steps} has access to \emph{all} equivalent \circuit representing $\query\inparen{\vct{W}}\inparen{\tup}$), but sufficient condition:
\begin{Problem}\label{prob:intro-stmt}
Given one circuit $\circuit$ that encodes $\apolyqdt$ for all result tuples $\tup$ (one sink per $\tup$) for \abbrCTIDB $\pdb$ and $\raPlus$ query $\query$, does there exist an algorithm that computes a $(1\pm\epsilon)$-approximation of $\expct_{\rvworld\sim\bpd}\pbox{\query\inparen{\rvworld}\inparen{\tup}}$ (for all result tuples $\tup$) in $\bigO{|\circuit|}$ time?
\end{Problem}
For an upper bound on approximating the expected count, it is easy to check that if all the probabilties are constant then (with an additive adjustment) $\poly\left(\prob_1,\dots, \prob_n\right)$
(i.e. evaluating the original lineage polynomial
over the probability values) is a constant factor approximation
. This is illustrated in the following example using $\query_1^2$ from earlier. To aid in presentation we assume $\bound = 2$ for variable $X$ and $\bound = 1$ for all other variables. Let $\prob_A$ denote $\probOf\pbox{A = 1}$.
In computing $\rpoly$, we have some cancellations to deal with:
\begin{footnotesize}
\begin{align*}
\refpoly{1, }^2\inparen{\vct{X}} &= A^2\inparen{X_1^2 + 4X_1X_2 + 4X_2^2}B^2 + B^2Y^2E^2 + B^2Z^2C^2 + 2AX_1B^2YE \\
&\qquad+ 2AX_2B^2YE + 2AX_1B^2ZC + 2AX_2B^2ZC + 2B^2YEZC\\
\end{align*}
\end{footnotesize}
This then implies
\begin{footnotesize}
\begin{align*}
\rpoly^2\inparen{\vct{X}} &= AX_1B+4AX_2B+BYE+BZC+2AX_1BYE+2AX_2BYE+2AX_1BZC\\
&\qquad+2AX_2BZC+2BYEZC\\
\end{align*}
\end{footnotesize}
Substituting $\vct{\prob}$ for $\vct{X}$,
\begin{footnotesize}
\begin{align*}
\hspace*{-3mm}
\refpoly{1, }^2\inparen{\probAllTup} &= \prob_A^2\prob_{X_1}^2\prob_B^2 + 4\prob_A^2\prob_{X_1}\prob_{X_2}\prob_B^2 + 4\prob_A^2\prob_{X_2}^2\prob_B^2 + \prob_B^2\prob_Y^2\prob_E^2 + \prob_B^2\prob_Z^2\prob_C^2 + 2\prob_A\prob_{X_1}\prob_B^2\prob_Y\prob_E + 2\prob_A\prob_{X_2}\prob_B^2\prob_Y\prob_E\\
&\qquad+ 2\prob_A\prob_{X_1}\prob_B^2\prob_Z\prob_C + 2\prob_A\prob_{X_2}\prob_B^2\prob_Z\prob_C+ 2\prob_B^2\prob_Y\prob_E\prob_Z\prob_C\\
&\leq\prob_A\prob_{X_1}\prob_B + 4\prob_A^2\prob_{X_1}\prob_{X_2}\prob_B^2 + 4\prob_A\prob_{X_2}\prob_b + \prob_B\prob_Y\prob_E + \prob_B\prob_Z\prob_C + 2\prob_A\prob_{X_1}\prob_B\prob_Y\prob_E+ 2\prob_A\prob_{X_2}\prob_B\prob_Y\prob_E \\
&\qquad+ 2\prob_A\prob_{X_1}\prob_B\prob_Z\prob_C + 2\prob_A\prob_{X_2}\prob_B\prob_Z\prob_C + 2\prob_B\prob_Y\prob_E\prob_Z\prob_C
= \rpoly_1^2\inparen{\vct{p}} + 4\prob_A^2\prob_{X_1}\prob_{X_2}\prob_B^2.
\end{align*}
\end{footnotesize}
If we assume that all probability values are at least $p_0>0$, then given access to $\refpoly{1, }^2\inparen{\vct{\prob}} - 4\prob_A^2\prob_{X_1}\prob_{X_2}\prob_B^2$
we get that $\refpoly{1, }^2\inparen{\vct{\prob}} - 4\prob_A^2\prob_{X_1}\prob_{X_2}\prob_B^2$ is in the range $\left(\inparen{p_0}^3\cdot\inparen{\rpoly^2_1\vct{\prob}}, \rpoly_1^2\inparen{\vct{\prob}}\right]$. We can simulate sampling from $\refpoly{1, }^2\inparen{\vct{X}}$ by sampling monomials from $\refpoly{1, }^2$ while ignoring any samples $A^2X_1X_2B^2$. Note however, that this is \emph{not a tight approximation}.
In~\cref{sec:algo} we demonstrate that a $(1\pm\epsilon)$ (multiplicative) approximation with competitive performance is achievable.
To get an $(1\pm \epsilon)$-multiplicative approximation and solve~\Cref{prob:intro-stmt}, using \circuit we uniformly sample monomials from the equivalent \abbrSMB representation of $\poly$ (without materializing the \abbrSMB representation) and `adjust' their contribution to $\widetilde{\poly}\left(\cdot\right)$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mypar{Applications}
Recent work in heuristic data cleaning~\cite{yang:2015:pvldb:lenses,DBLP:journals/vldb/SaRR0W0Z17,DBLP:journals/pvldb/RekatsinasCIR17,DBLP:journals/pvldb/BeskalesIG10,DBLP:journals/vldb/SaRR0W0Z17} emits a \abbrPDB when insufficient data exists to select the `correct' data repair.
Probabilistic data cleaning is a crucial innovation, as the alternative is to arbitrarily select one repair and `hope' that queries receive meaningful results.
Although \abbrPDB queries instead convey the trustworthiness of results~\cite{kumari:2016:qdb:communicating}, they are impractically slow~\cite{feng:2019:sigmod:uncertainty,feng:2021:sigmod:efficient}, even in approximation (see \Cref{sec:karp-luby}).
Bags, as we consider, are sufficient for production use, where bag-relational algebra is already the default for performance reasons.
Our results show that bag-\abbrPDB\xplural can be competitive, laying the groundwork for probabilistic functionality in production database engines.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mypar{Paper Organization} We present relevant background and notation in \Cref{sec:background}. We then prove our main hardness results in \Cref{sec:hard} and present our approximation algorithm in \Cref{sec:algo}.
Finally, we discuss related work in \Cref{sec:related-work} and conclude in \Cref{sec:concl-future-work}. All proofs are in the appendix.
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

BIN
arXiv/lipics-logo-bw.pdf Normal file

Binary file not shown.

579
arXiv/macros.tex Normal file
View File

@ -0,0 +1,579 @@
% -*- root: main.tex -*-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Temporary Macros for Outline Comparison
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\isIncluded}[1]{\textcolor{blue}{#1}}
\newcommand{\notIncluded}[1]{\textcolor{red}{#1}}
\newcommand{\xplural}{s\xspace}
\xspaceaddexceptions{\xplural}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% COMMENTS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%editing/highlighting sections
\newcommand{\AHchange}[1]{\textcolor{blue}{#1}}
\newcommand{\secrev}[1]{\color{red}#1\color{black}}
\newcommand{\draft}{0} %%% Change this to non-zero to remove comments
\ifnum\draft=0
\newcommand{\currentWork}[1]{\textcolor{red}{#1}}
\newcommand{\BG}[1]{\todo[inline]{\textbf{Boris says:$\,$} #1}}
\newcommand{\SF}[1]{\todo{\textbf{Su says:$\,$} #1}}
\newcommand{\OK}[1]{\todo[color=gray]{\textbf{Oliver says:$\,$} #1}}
\newcommand{\AH}[1]{\todo[inline, backgroundcolor=cyan, caption={}]{\textbf{Aaron says:$\,$} #1}}
\newcommand{\AR}[1]{\todo[inline,color=green]{\textbf{Atri says:$\,$} #1}}
\newcommand{\BGdel}[2]{\todo[inline]{\textbf{Boris deleted [#2]: {#1}}}}
\else
\newcommand{\BG}[1]{}
\newcommand{\SF}[1]{}
\newcommand{\OK}[1]{}
\newcommand{\AH}[1]{}
\newcommand{\AR}[1]{}
\fi
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREM LIKE ENVIRONMENTS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%v---what is this?
\DeclareMathAlphabet{\mathbbold}{U}{bbold}{m}{n}
\newtheorem{Theorem}{Theorem}[section]
\newtheorem{Definition}[Theorem]{Definition}
\newtheorem{Lemma}[Theorem]{Lemma}
\newtheorem{Proposition}[Theorem]{Proposition}
\newtheorem{Corollary}[Theorem]{Corollary}
\newtheorem{Example}[Theorem]{Example}
\newtheorem{hypo}[Theorem]{Conjecture}%used in mult_distinct_p.tex
\newtheorem{Problem}[Theorem]{Problem}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Rel model
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Need to have all Rel stuff in one place
\newcommand{\tup}{t}
\newcommand{\rel}{R}
\newcommand{\reli}{S}%<----better names?
\newcommand{\relii}{T}
\newcommand{\db}{D}
\newcommand{\query}{Q}
\newcommand{\qhard}{\query_{hard}}
\newcommand{\tset}{\mathcal{T}}%the set of tuples in a database
\newcommand{\join}{\mathlarger\Join}
\newcommand{\select}{\sigma}
\newcommand{\project}{\pi}
\newcommand{\union}{\cup}
\newcommand{\rename}{\mathlarger\rho}
\newcommand{\sch}{sch}
\newcommand{\attr}[1]{attr\left(#1\right)}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TERMINOLOGY AND ABBREVIATIONS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Perhaps PDB abbreviations should go here?
%Two-step (intensional evaluation model)
\newcommand{\termStepOne}{Lineage Computation\xspace}
\newcommand{\abbrStepOne}{LC\xspace}
\newcommand{\termStepTwo}{Expectation Computation\xspace}
\newcommand{\abbrStepTwo}{EC\xspace}
%
\newcommand{\expectProblem}{\textsc{Expected Result Multiplicity Problem}\xspace}
\newcommand{\termSMB}{standard monomial basis\xspace}
\newcommand{\abbrSMB}{SMB\xspace}%we already have this; one has to go
\newcommand{\termSOP}{sum of products\xspace}
\newcommand{\abbrSOP}{SOP\xspace}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Function Names and Typesetting %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\domain}{\func{Dom}}
\newcommand{\func}[1]{\textsc{#1}\xspace}
\newcommand{\isInd}[1]{\func{isInd}\inparen{#1}}
\newcommand{\polyf}{\func{poly}}
\newcommand{\evalmp}{\func{eval}}
\newcommand{\degree}{\func{deg}}
\newcommand{\size}{\func{size}}
\newcommand{\depth}{\func{depth}}
\newcommand{\topord}{\func{TopOrd}}
\newcommand{\smbOf}[1]{\func{\abbrSMB}\inparen{#1}}
%Verify if we need the above...
%saving \treesize for now to keep latex from breaking
\newcommand{\treesize}{\func{size}}
%I believe this is used in the algo psuedocode
\newcommand{\sign}{\func{sgn}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% SEMIRINGS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\udom}{\mathcal{U}}
\newcommand{\domK}{K}
\newcommand{\semK}{\mathcal{K}}
\newcommand{\semB}{\mathbb{B}}
\newcommand{\semN}{\mathbb{N}}
\newcommand{\semNX}{\mathbb{N}[\vct{X}]}
\newcommand{\onesymbol}{\mathbbold{1}}
\newcommand{\zerosymbol}{\mathbbold{0}}
\newcommand{\multsymb}{\otimes}
\newcommand{\addsymbol}{\oplus}
\newcommand{\addK}{\addsymbol_{\semK}}
\newcommand{\multK}{\multsymb_{\semK}}
\newcommand{\oneK}{\onesymbol_{\semK}}
\newcommand{\zeroK}{\zerosymbol_{\semK}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Incomplete DB/PDBs %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\idb}{{\Omega}}
\newcommand{\pd}{{\mathcal{P}}}%pd for probability distribution
\newcommand{\pdassign}{\mathcal{P}}
\newcommand{\pdb}{\mathcal{D}}
\newcommand{\dbbase}{\db_\idb}
\newcommand{\dbbaseName}{deterministic bounding database\xspace}
\newcommand{\pxdb}{\pdb_{\semNX}}
\newcommand{\pndb}{\pdb_{\semN}}
\newcommand{\nxdb}{D(\vct{X})}%\mathbb{N}[\vct{X}] db--Are we currently using this?
\newcommand{\valworlds}{\eta}%valid worlds--in particular referring to something like a BIDB, where not all worlds have Pr[w] > 0.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Bag c-TIDB Notation %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\bound}{c}
\newcommand{\tupsetsize}{n}
\newcommand{\tupset}{D}
\newcommand{\gentupset}{\overline{D}}
\newcommand{\world}{\inset{0,\ldots, c}}
\newcommand{\worldvec}{\vct{W}}
\newcommand{\worlds}{\world^\tupset}
\newcommand{\bpd}{\mathcal{P}}%bpd for bag probability distribution
%BIDB
\newcommand{\block}{B}
\newcommand{\bivar}{x_{\block, i}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Binary-BIDB Notation %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\onebidbworlds}[1]{\bigtimes_{\tup\in #1}\inset{0, \bound_\tup}}
%PDB Abbreviations
\newcommand{\abbrOneBIDB}{\text{Binary-BIDB}\xspace}
\newcommand{\abbrPDB}{\textnormal{PDB}\xspace}
\newcommand{\abbrBPDB}{\textnormal{bag-PDB}\xspace}
\newcommand{\abbrTIDB}{\textnormal{TIDB}\xspace}%replace \ti with this
\newcommand{\abbrCTIDB}{\textnormal{$\bound$-TIDB}\xspace}
\newcommand{\abbrTIDBs}{\textnormal{TIDBs}\xspace}%replace \ti with this
\newcommand{\abbrBIDB}{\textnormal{BIDB}\xspace}
\newcommand{\ti}{TIDB\xspace}
\newcommand{\tis}{TIDBs\xspace}
\newcommand{\bi}{BIDB\xspace}
\newcommand{\bis}{BIDBs\xspace}
\newcommand{\abbrNXPDB}{$\semNX$-encoded PDB\xspace}
%not sure if we use these; arguably the above abbrev macros should have a name change
\newcommand{\tiabb}{ti}
\newcommand{\biabb}{bi}
\newcommand{\biwset}{\idb_{\biabb}}
\newcommand{\biord}{\leq_{x_\block}}
\newcommand{\tiwset}{\idb_{\tiabb}}
\newcommand{\bipd}{\pd_{\biabb}}
\newcommand{\tipd}{\pd_{\tiabb}}
\newcommand{\bipdb}{\pdb_{\biabb}}
\newcommand{\tipdb}{\pdb_{\tiabb}}
%--------------------------------
\newcommand{\probDist}{\vct{\probOf}}%<---I don't think we need this.
\newcommand{\probAllTup}{\vct{\prob}}%<---I was using simply \vct{\prob}; decide on a convention
\newcommand{\wSet}{\Omega}%<---We have \idb, the set of possible worlds; decide on one of these
%Is this being used?
\newcommand{\pdbx}{X_{DB}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Math Symbols, Functions/Operators, Containers %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Number Sets
\newcommand{\domR}{\mathbb{R}}
\newcommand{\domN}{\mathbb{N}}
%Probability, Expectation
\newcommand{\expct}{\mathop{\mathbb{E}}}%why not just call this \expect
\newcommand{\probOf}{Pr}%probability function
%Functions/Operators
\newcommand{\abs}[1]{\left|#1\right|}
\newcommand{\suchthat}{\;|\;} %such that
\newcommand{\comprehension}[2]{\left\{\;#1\;|\;#2\;\right\}}
\newcommand{\eval}[1]{\llbracket #1 \rrbracket}%evaluation double brackets
\newcommand{\evald}[2]{\eval{{#1}}_{#2}}
%Containers
\newcommand{\pbox}[1]{\left[#1\right]}%<---used for expectation
\newcommand{\pbrace}[1]{\left\{#1\right\}}
%consider replacing \pbrace with what is below
\newcommand{\inparen}[1]{\left({#1}\right)}
\newcommand{\inset}[1]{\left\{{#1}\right\}}%we already have this as \pbrace; need to pick one
\newcommand{\intup}[1]{\left\langle{#1}\right\rangle}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Variable, Polynomial and Vector Notation
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Instance Variables
\newcommand{\prob}{p}
\newcommand{\wElem}{w} %an element of \vct{w}
\newcommand{\worldinst}{W}
%Polynomial Variables
\newcommand{\pVar}{X}%<----not used but recomment instituting this--pVar for polyVar
\newcommand{\kElem}{k}%the kth element<---where and how are we using this?
%Random Variables
\newcommand{\randWorld}{W}
\newcommand{\rvworld}{\vct{\randWorld}}
\newcommand{\randDB}{\vct{\db}}
\newcommand{\rvW}{W}%\rvW for random variable of type World<---this is the same as \randWorld
%One of these needs to go...I think...
\newcommand{\randomvar}{W}%this little guy needs a home!
%Container for Polynomial Params
\newcommand{\polyinput}[2]{\left(#1,\ldots, #2\right)}%do we still use this?
%Number of Variables--this could easily be number of tups--maybe move to Rel Model?
\newcommand{\numvar}{n}
%Number of blocks (BIDB)
\newcommand{\numblock}{m}
%Vector
\newcommand{\vct}[1]{{\bf #1}}
%norm
\newcommand{\norm}[1]{\left\lVert#1\right\rVert}
%using \wVec for world bit vector notation<-----Is this still the case?
%Polynomial
\newcommand{\hideg}{K}
\newcommand{\poly}{\Phi}
\newcommand{\genpoly}{\phi}
\newcommand{\vars}[1]{\func{Vars}\inparen{#1}}
\newcommand{\polyOf}[1]{\poly[#1]}
\newcommand{\polyqdt}[3]{\polyOf{#1,#2,#3}}
\newcommand{\apolyqdt}{\polyqdt{\query}{\tupset}{\tup}}
\newcommand{\nxpolyqdt}{\polyqdt{\query}{\db_{\semNX}}{\tup}}
\newcommand{\tupvar}[2]{X_{#1,#2}}
\newcommand{\atupvar}{\tupvar{\rel}{\tup}}
\newcommand{\polyX}{\poly\inparen{\vct{\pVar}}}%<---let's see if this proves handy
\newcommand{\rpoly}{\widetilde{\poly}}%r for reduced as in reduced 'Q'
\newcommand{\refpoly}[1]{\poly_{#1R}}
\newcommand{\rpolyX}{\rpoly\inparen{\pVar}}%<---if this isn't something we use much, we can get rid of it
\newcommand{\biDisProd}{\mathcal{B}}%bidb disjoint tuple products (def 2.5)
\newcommand{\rExp}{\mathcal{T}}%the set of variables to reduce all exponents to 1 via modulus operation; I think \mathcal T collides with the notation used for the set of tuples in D
\newcommand{\polyForTuple}{\poly_{\tup}}%do we use this?<--S 2
%Do we use this?
\newcommand{\out}{output}%output aggregation over the output vector
\newcommand{\prel}{\mathcal{\rel}}%What is this?
\newcommand{\linsett}[3]{\Phi_{#1,#2}^{#3}}%Where is this used?
\newcommand{\wbit}{w}%don't think we need this one
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Graph Notation %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\vset}{V}
\newcommand{\edgeSet}{E}
\newcommand{\gtype}[1]{\inparen{#1}}
\newcommand{\esetType}[1]{\edgeSet^{\gtype{#1}}}%edge set for induced graph G^{\inparen{\ell}}
\newcommand{\graph}[1]{G^{(#1)}}
\newcommand{\numocc}[2]{\#\left(#1,#2\right)}
\newcommand{\eset}[1]{E^{(#1)}_S} %edge set for arbitrary subgraph
%I don't think we use these anymore
\newcommand{\linsys}[1]{LS(\graph{#1})}
\newcommand{\lintime}[1]{LT^{\graph{#1}}}
\newcommand{\aug}[1]{AUG^{\graph{#1}}}
\newcommand{\mtrix}[1]{M_{#1}}
\newcommand{\dtrm}[1]{Det\left(#1\right)}
\newcommand{\tuple}[1]{\left<#1\right>}
\newcommand{\indicator}[1]{\onesymbol_{#1}}
%----------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Circuit Notation
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\circuit}{\vari{C}}
\newcommand{\circuitset}[1]{\vari{CSet}\inparen{#1}}
\newcommand{\circmult}{\times}
\newcommand{\circplus}{+}
\newcommand{\rinput}{\vari{R}}
\newcommand{\linput}{\vari{L}}
\newcommand{\inp}{\vari{input}}
\newcommand{\inputs}{\vari{inputs}}%do we use this?
\newcommand{\subcircuit}{\vari{S}}%does this clash/conflict with \coeffset?
\newcommand{\gate}{\vari{g}}
\newcommand{\lwght}{\vari{Lweight}}
\newcommand{\rwght}{\vari{Rweight}}
\newcommand{\prt}{\vari{partial}}
\newcommand{\degval}{\vari{degree}}
\newcommand{\type}{\vari{type}}
\newcommand{\val}{\vari{val}}
%types of C
\newcommand{\var}{\textsc{var}\xspace}
\newcommand{\tnum}{\textsc{num}\xspace}
%Do we use this?
\newcommand{\subgraph}{\vari{S}_{\equivtree(\circuit)}}
%-----
\newcommand{\cost}{\func{Cost}}
\newcommand{\nullval}{NULL}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Datalog
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\dlImp}[0]{\,\ensuremath{\mathtt{{:}-}}\,}
\newcommand{\dlDontcare}{\_}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Query Classes
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\qClass}{\mathcal{Q}}
\newcommand{\raPlus}{\ensuremath{\mathcal{RA}^{+}}\xspace}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% COMPLEXITY
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\bigO}[1]{O\inparen{#1}}
\newcommand{\littleo}[1]{o\inparen{#1}}
\newcommand{\bigOmega}[1]{\Omega\inparen{#1}}
\newcommand{\littleomega}[1]{\omega\inparen{#1}}
\newcommand{\np}{{\sf NP}\xspace}
\newcommand{\polytime}{{\sf P}\xspace}
\newcommand{\sharpp}{\#{\sf P}\xspace}
\newcommand{\sharpphard}{\#{\sf P}-hard\xspace}
\newcommand{\sharpwone}{\#{\sf W}[1]\xspace}
\newcommand{\sharpwzero}{\#{\sf W}[0]\xspace}
\newcommand{\sharpwonehard}{\#{\sf W}[1]-hard\xspace}
\newcommand{\ptime}{{\sf PTIME}\xspace}
\newcommand{\timeOf}[1]{T_{#1}}
\newcommand{\qruntime}[1]{T_{det}\inparen{#1}}
\newcommand{\optquery}[1]{\func{OPT}\inparen{#1}}
\newcommand{\qruntimenoopt}[1]{T_{det}\inparen{#1}}%need to get rid of this--needs to be propagated
\newcommand{\jointime}[1]{T_{join}(#1)}
\newcommand{\kmatchtime}{T_{match}\inparen{k, G}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Approx Alg
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\randvar}{\vari{Y}}
\newcommand{\coeffset}{S}
\newcommand{\distinctvars}{d}
\newcommand{\coeffitem}[1]{c_{#1}\cdot\prob^{\distinctvars_{#1}}}
\newcommand{\unidist}[1]{Uniform\left(#1\right)}
\newcommand{\samplesize}{N}
\newcommand{\setsize}{m}
\newcommand{\empmean}{\overline{\vct{\randvar}}}
\newcommand{\setsum}{SUM}
\newcommand{\ave}[1]{AVG(#1)}
\newcommand{\hoeffestsum}{EST_{\setsum}}
\newcommand{\error}{\epsilon}
\newcommand{\conf}{\delta}
%Pseudo Code Notation
\newcommand{\plus}{\texttt{+}}
\newcommand{\mult}{\texttt{\times}}
\newcommand{\algname}[1]{\textsc{#1}\xspace}
\newcommand{\approxq}{\algname{Approximate$\rpoly$}}
\newcommand{\onepass}{\algname{OnePass}}
\newcommand{\sampmon}{\algname{SampleMonomial}}
%I don't think we use reduce anymore
\newcommand{\reduce}{\algname{Reduce}}
\newcommand{\ceil}[1]{\left\lceil #1 \right\rceil}
\newcommand{\vari}[1]{\texttt{#1}\xspace}
\newcommand{\accum}{\vari{acc}}
\newcommand{\numsamp}{\vari{N}}%we have \samplesize above; we can get rid of one of these
\newcommand{\numedge}{m}%we have set size above; we can get rid of one of these
\newcommand{\bivec}{\vari{b}_{\vari{vec}}}%Section 3--proof in appendix for last theorem
%Major cleaning needed to get rid of obsolete notation like expression trees, etc.
%I don't know that we use any of the expression tree macros anymore; if we do, they would be predominantly in S 3 and 4 and their respective appendices
%expression tree T
\newcommand{\etree}{\vari{T}}
\newcommand{\stree}{\vari{S}}
\newcommand{\lchild}{\vari{L}}
\newcommand{\rchild}{\vari{R}}
%I don't think we talk of T but of C; let's update this. These should be used only in S 2 and S4
%members of T
\newcommand{\wght}{\vari{weight}}
\newcommand{\vpartial}{\vari{partial}}
%%%%%%%
\renewcommand{\algorithmicrequire}{\textbf{Input:}}
\renewcommand{\algorithmicensure}{\textbf{Output:}}
%\newcommand{\smb}{\poly\left(\vct{X}\right)}%smb for standard monomial basis; S 2<---this command is, I believe, unnecessary
%not sure if we use this
%not sure if we use this
\newcommand{\etreeset}[1]{\vari{ET}\left(#1\right)}
%verify this
%\expandtree is a placeholder until I change other files with the new macro name \expansion
\newcommand{\expandtree}[1]{\vari{E}(#1)}
\newcommand{\expansion}[1]{\vari{E}(#1)}
%not sure if we use this; I think the only occurrence would be in the def section of S 4
\newcommand{\elist}[1]{\vari{List}\pbox{#1}}
%not sure if we use this anymore either
\newcommand{\equivtree}{\vari{EET}}
%expandtree tuple elements:
\newcommand{\monom}{\vari{v}}
\newcommand{\encMon}{\monom_{\vari{m}}}
\newcommand{\lencMon}{\monom_{\vari{m}_\linput}}
\newcommand{\rencMon}{\monom_{\vari{m}_\rinput}}
\newcommand{\coef}{\vari{c}}
%----------------------------------
% REPRESENTATIONS--this might be Boris' or Atri's stuff; verify if these macros are current
\newcommand{\rmod}{Mod}%mod function which transforms N[X]-DB to N-DB (S 2 and App A)
\newcommand{\reprs}{\mathcal{M}}%used to define Representation System in App A
\newcommand{\repr}{M}
%not sure about these? Perhaps in appendix B for \assign and S 5 for \support?
\newcommand{\assign}{\psi}%assignment function from a world vector to polynomial output in App A
\newcommand{\support}[1]{supp({#1})}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\eps}{\epsilon}%<----this is already defined as \error; need to pick one
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Forcing Layouts
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\trimfigurespacing}{\vspace*{-5mm}}
\newcommand{\mypar}[1]{\smallskip\noindent\textbf{{#1}.}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Proof/Section Headings %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Is this being used?
\newcommand{\caseheading}[1]{\smallskip \noindent \textbf{#1}.~}
%%%%%
%%%Adding stuff below so that long chain of display equatoons can be split across pages
\allowdisplaybreaks
%Macro for mult complexity
\newcommand{\multc}[2]{\overline{\mathcal{M}}\left({#1},{#2}\right)}
%consider perhaps putting the tikz code into a separate file.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Tikz Graph Symbols
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Shift macro
\newcommand{\patternshift}[1]{\hspace*{-0.5mm}\raisebox{-0.35mm}{#1}\hspace*{-0.5mm} }
%Global styles
\tikzset{
default_node/.style={align=center, inner sep=0pt},
pattern_node/.style={fill=gray!50, draw=black, semithick, inner sep=0pt, minimum size = 2pt, circle},
tree_node/.style={default_node, draw=black, black, circle, text width=0.5cm, font=\bfseries, minimum size=0.65cm},
gen_tree_node/.style={default_node, draw, circle, text width=0.5cm, font=\bfseries, minimum size=0.65cm},
highlight_color/.style={black}, wght_color/.style={black},
highlight_treenode/.style={tree_node, draw=black, black},
edge from parent path={(\tikzparentnode) -- (\tikzchildnode)}
}
%Subgraph patterns
\newcommand{\ed}{\patternshift{
\begin{tikzpicture}[every path/.style={thick, draw}]%[baseline=0.00005cm]
%\begin{scope}[yshift=-5cm]
\node at (0, 0)[pattern_node](bottom){};
\node [above=0.07cm of bottom, pattern_node] (top){};
\draw (top) -- (bottom);
% \node at (0, -2)[pattern_node, blue](b2){};
% \node [above=0.07cm of b2, pattern_node, blue] (t2){};
% \draw (t2) -- (b2);
%\end{scope}
\end{tikzpicture}
}
}
\newcommand{\kmatch}{\ed\cdots\ed^\kElem}
\newcommand{\twodis}{\patternshift{
\begin{tikzpicture}[every path/.style={thick, draw}]
\node at (0, 0) [pattern_node] (bottom1) {};
\node[above=0.07cm of bottom1, pattern_node] (top1) {} edge (bottom1);
\node at (0.14, 0) [pattern_node] (bottom2) {};
\node [above=0.07cm of bottom2, pattern_node] (top2) {} edge (bottom2);
\end{tikzpicture}
}
}
\newcommand{\twopath}{\patternshift{
\begin{tikzpicture}[every path/.style={thick, draw}]
\node at (0, 0.08) [pattern_node] (top){};
\node [below left=0.095cm and 0.05cm of top, pattern_node](left){};
\node[below right=0.095cm and 0.05cm of top, pattern_node](right){};
\draw (top) -- (left);
\draw (top) -- (right);
\end{tikzpicture}
}
}
\newcommand{\threedis}{\patternshift{
\begin{tikzpicture}[every path/.style={thick, draw}]
\node at (0, 0) [pattern_node] (bottom1) {};
\node[above=0.07cm of bottom1, pattern_node] (top1) {} edge (bottom1);
\node at (0.14, 0) [pattern_node] (bottom2) {};
\node [above=0.07cm of bottom2, pattern_node] (top2) {} edge (bottom2);
\node at (0.28, 0) [pattern_node] (bottom3) {};
\node [above=0.07cm of bottom3, pattern_node] (top3) {} edge (bottom3);
\end{tikzpicture}
}
}
\newcommand{\tri}{\patternshift{
\begin{tikzpicture}[every path/.style={ thick, draw}]
\node at (0, 0.08) [pattern_node] (top){};
\node [below left=0.08cm and 0.01cm of top, pattern_node](left){} edge (top);
\node[below right=0.08cm and 0.01cm of top, pattern_node](right){} edge (top) edge (left);
\end{tikzpicture}
}
}
\newcommand{\twopathdis}{\ed~\twopath}
\newcommand{\threepath}{\patternshift{
\begin{tikzpicture}[every path/.style={thick, draw}]
\node at (0, 0) [pattern_node] (node1a) {};
\node [above=0.07cm of node1a, pattern_node] (node1b) {} edge (node1a);
\node [right=0.099cm of node1a, pattern_node] (node2b) {}; %edge [semithick] (node1b);
\node [above=0.07cm of node2b, pattern_node] (node3b) {} edge (node2b);
\draw (node1b) -- (node3b);
\end{tikzpicture}
}
}
\newcommand{\oneint}{\patternshift{
\begin{tikzpicture}[level/.style={sibling distance=0.14cm, level distance=0.15cm}, every path/.style={thick, draw}]
\node at (0, 0) [pattern_node] {} [grow=down]
child{node [pattern_node]{}}
child {node [pattern_node] {}}
child{node [pattern_node] {}};
\end{tikzpicture}
}
}
\newcommand{\bsym}[1]{\boldsymbol{#1}}%b for bold; sym for symbol
\newcommand{\sg}[1]{S^{(#1)}}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

619
arXiv/main.aux Normal file
View File

@ -0,0 +1,619 @@
\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand\zref@newlabel[2]{}
\providecommand\babel@aux[2]{}
\@nameuse{bbl@beforestart}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldcontentsline\contentsline
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\contentsline\oldcontentsline
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\gdef\@authornum{1}
\gdef\@authornum{2}
\gdef\@authornum{3}
\gdef\@authornum{4}
\gdef\@authornum{5}
\babel@aux{nil}{}
\gdef\@pageNumberEndAbstract{1}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}\protected@file@percent }
\newlabel{sec:intro}{{1}{1}{Introduction}{section.1}{}}
\newlabel{sec:intro@cref}{{[section][1][]1}{[1][1][]1}}
\citation{10.1145/1265530.1265571}
\citation{10.5555/645413.652181}
\citation{CHEN20061346}
\newlabel{prob:expect-mult}{{1.1}{2}{}{Theorem.1.1}{}}
\newlabel{prob:expect-mult@cref}{{[Problem][1][1]1.1}{[1][2][]2}}
\citation{DBLP:conf/icde/OlteanuHK10}
\citation{DBLP:journals/jal/KarpLM89}
\citation{IL84a}
\citation{Imielinski1989IncompleteII}
\citation{Antova_fastand}
\citation{DBLP:conf/vldb/AgrawalBSHNSW06}
\citation{DBLP:conf/pods/GreenKT07}
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{tab:lbs}{{1}{3}{Our lower bounds for a specific hard query $\qhard $ parameterized by $k$. For $\pdb = \inset {\worlds , \bpd }$ those with `Multiple' in the second column need the algorithm to be able to handle multiple $\bpd $, i.e. probability distributions (for a given $\tupset $). The last column states the hardness assumptions that imply the lower bounds in the first column ($\eps _o,C_0,c_0$ are constants that are independent of $k$).\relax }{table.caption.1}{}}
\newlabel{tab:lbs@cref}{{[table][1][]1}{[1][2][]3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Polynomial Equivalence}{3}{subsection.1.1}\protected@file@percent }
\newlabel{sec:intro-poly-equiv}{{1.1}{3}{Polynomial Equivalence}{subsection.1.1}{}}
\newlabel{sec:intro-poly-equiv@cref}{{[subsection][1][1]1.1}{[1][3][]3}}
\citation{DBLP:conf/icde/OlteanuHK10}
\newlabel{prob:bag-pdb-poly-expected}{{1.2}{4}{Expected Multiplicity of Lineage Polynomials}{Theorem.1.2}{}}
\newlabel{prob:bag-pdb-poly-expected@cref}{{[Problem][2][1]1.2}{[1][4][]4}}
\newlabel{fig:nxDBSemantics}{{1}{4}{Construction of the lineage (polynomial) for an $\raPlus $ query $\query $ over an arbitrary deterministic database $\gentupset $, where $\vct {X}$ consists of all $X_\tup $ over all $\rel $ in $\gentupset $ and $\tup $ in $\rel $. Here $\gentupset .\rel $ denotes the instance of relation $\rel $ in $\gentupset $. Please note, after we introduce the reduction to $1$-\abbrBIDB , the base case will be expressed alternatively.\relax }{figure.caption.2}{}}
\newlabel{fig:nxDBSemantics@cref}{{[figure][1][]1}{[1][3][]4}}
\newlabel{def:reduced-poly}{{1.3}{5}{}{Theorem.1.3}{}}
\newlabel{def:reduced-poly@cref}{{[Definition][3][1]1.3}{[1][5][]5}}
\newlabel{lem:tidb-reduce-poly}{{1.4}{5}{}{Theorem.1.4}{}}
\newlabel{lem:tidb-reduce-poly@cref}{{[Lemma][4][1]1.4}{[1][5][]5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Our Techniques}{5}{subsection.1.2}\protected@file@percent }
\citation{DBLP:books/daglib/0020812}
\newlabel{fig:two-step}{{2}{6}{Intensional Query Evaluation Model $(\query _2 = \project _{\text {Point}}$ $\inparen {T\join _{\text {Point} = \text {Point}_1}R}$ where, for table $R,~\bound = 2$, while for $T,~\bound = 1.)$\relax }{figure.caption.3}{}}
\newlabel{fig:two-step@cref}{{[figure][2][]2}{[1][6][]6}}
\newlabel{prob:big-o-joint-steps}{{1.5}{6}{\abbrCTIDB linear time approximation}{Theorem.1.5}{}}
\newlabel{prob:big-o-joint-steps@cref}{{[Problem][5][1]1.5}{[1][6][]6}}
\citation{yang:2015:pvldb:lenses}
\citation{DBLP:journals/vldb/SaRR0W0Z17}
\citation{DBLP:journals/pvldb/RekatsinasCIR17}
\citation{DBLP:journals/pvldb/BeskalesIG10}
\citation{DBLP:journals/vldb/SaRR0W0Z17}
\citation{kumari:2016:qdb:communicating}
\citation{feng:2019:sigmod:uncertainty}
\citation{feng:2021:sigmod:efficient}
\newlabel{prob:intro-stmt}{{1.6}{7}{}{Theorem.1.6}{}}
\newlabel{prob:intro-stmt@cref}{{[Problem][6][1]1.6}{[1][7][]7}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Background and Notation}{8}{section.2}\protected@file@percent }
\newlabel{sec:background}{{2}{8}{Background and Notation}{section.2}{}}
\newlabel{sec:background@cref}{{[section][2][]2}{[1][8][]8}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Polynomial Definition and Terminology}{8}{subsection.2.1}\protected@file@percent }
\newlabel{eq:sop-form}{{1}{8}{Polynomial Definition and Terminology}{equation.2.1}{}}
\newlabel{eq:sop-form@cref}{{[equation][1][]1}{[1][8][]8}}
\newlabel{def:smb}{{2.1}{8}{Standard Monomial Basis}{Theorem.2.1}{}}
\newlabel{def:smb@cref}{{[Definition][1][2]2.1}{[1][8][]8}}
\newlabel{def:degree-of-poly}{{2.2}{8}{Degree}{Theorem.2.2}{}}
\newlabel{def:degree-of-poly@cref}{{[Definition][2][2]2.2}{[1][8][]8}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}\text {Binary-BIDB}\xspace }{8}{subsection.2.2}\protected@file@percent }
\newlabel{subsec:one-bidb}{{2.2}{8}{\abbrOneBIDB }{subsection.2.2}{}}
\newlabel{subsec:one-bidb@cref}{{[subsection][2][2]2.2}{[1][8][]8}}
\newlabel{subsec:tidbs-and-bidbs}{{2.2}{8}{\abbrOneBIDB }{subsection.2.2}{}}
\newlabel{subsec:tidbs-and-bidbs@cref}{{[subsection][2][2]2.2}{[1][8][]8}}
\newlabel{def:one-bidb}{{2.3}{8}{\abbrOneBIDB }{Theorem.2.3}{}}
\newlabel{def:one-bidb@cref}{{[Definition][3][2]2.3}{[1][8][]8}}
\newlabel{fig:lin-poly-bidb}{{3}{9}{Construction of the lineage (polynomial) for an $\raPlus $ query $\query $ over $\gentupset '$.\relax }{figure.caption.4}{}}
\newlabel{fig:lin-poly-bidb@cref}{{[figure][3][]3}{[1][9][]9}}
\newlabel{prop:ctidb-reduct}{{2.4}{9}{\abbrCTIDB reduction}{Theorem.2.4}{}}
\newlabel{prop:ctidb-reduct@cref}{{[Proposition][4][2]2.4}{[1][9][]9}}
\newlabel{def:reduced-poly-one-bidb}{{2.5}{9}{$\rpoly '$}{Theorem.2.5}{}}
\newlabel{def:reduced-poly-one-bidb@cref}{{[Definition][5][2]2.5}{[1][9][]9}}
\citation{IL84a}
\citation{arith-complexity}
\newlabel{cor:expct-sop}{{2.7}{10}{}{Theorem.2.7}{}}
\newlabel{cor:expct-sop@cref}{{[Corollary][7][2]2.7}{[1][9][]10}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.2.1}Possible World Semantics}{10}{subsubsection.2.2.1}\protected@file@percent }
\newlabel{subsub:possible-world-sem}{{2.2.1}{10}{Possible World Semantics}{subsubsection.2.2.1}{}}
\newlabel{subsub:possible-world-sem@cref}{{[subsubsection][1][2,2]2.2.1}{[1][10][]10}}
\newlabel{prop:expection-of-polynom}{{2.8}{10}{Expectation of polynomials}{Theorem.2.8}{}}
\newlabel{prop:expection-of-polynom@cref}{{[Proposition][8][2]2.8}{[1][10][]10}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Formalizing \Cref {prob:intro-stmt}}{10}{subsection.2.3}\protected@file@percent }
\newlabel{sec:expression-trees}{{2.3}{10}{Formalizing \Cref {prob:intro-stmt}}{subsection.2.3}{}}
\newlabel{sec:expression-trees@cref}{{[subsection][3][2]2.3}{[1][10][]10}}
\newlabel{def:circuit}{{2.9}{11}{Circuit}{Theorem.2.9}{}}
\newlabel{def:circuit@cref}{{[Definition][9][2]2.9}{[1][10][]11}}
\newlabel{fig:circuit}{{4}{11}{Circuit encoding of $(X + 2Y)(2X - Y)$\relax }{figure.caption.5}{}}
\newlabel{fig:circuit@cref}{{[figure][4][]4}{[1][11][]11}}
\newlabel{def:poly-func}{{2.10}{11}{$\polyf (\cdot )$}{Theorem.2.10}{}}
\newlabel{def:poly-func@cref}{{[Definition][10][2]2.10}{[1][11][]11}}
\newlabel{def:circuit-set}{{2.11}{11}{Circuit Set}{Theorem.2.11}{}}
\newlabel{def:circuit-set@cref}{{[Definition][11][2]2.11}{[1][11][]11}}
\newlabel{def:the-expected-multipl}{{2.12}{11}{The Expected Result Multiplicity Problem}{Theorem.2.12}{}}
\newlabel{def:the-expected-multipl@cref}{{[Definition][12][2]2.12}{[1][11][]11}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.4}Relationship to Deterministic Query Runtimes}{11}{subsection.2.4}\protected@file@percent }
\newlabel{sec:gen}{{2.4}{11}{Relationship to Deterministic Query Runtimes}{subsection.2.4}{}}
\newlabel{sec:gen@cref}{{[subsection][4][2]2.4}{[1][11][]11}}
\citation{skew}
\citation{ngo-survey}
\citation{factorized-db}
\citation{DBLP:conf/pods/KhamisNR16}
\citation{AGM}
\citation{k-match}
\citation{k-match}
\newlabel{def:join-cost}{{2.13}{12}{Join Cost}{Theorem.2.13}{}}
\newlabel{def:join-cost@cref}{{[Definition][13][2]2.13}{[1][11][]12}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Hardness of Exact Computation}{12}{section.3}\protected@file@percent }
\newlabel{sec:hard}{{3}{12}{Hardness of Exact Computation}{section.3}{}}
\newlabel{sec:hard@cref}{{[section][3][]3}{[1][12][]12}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Preliminaries}{12}{subsection.3.1}\protected@file@percent }
\newlabel{sec:hard:sub:pre}{{3.1}{12}{Preliminaries}{subsection.3.1}{}}
\newlabel{sec:hard:sub:pre@cref}{{[subsection][1][3]3.1}{[1][12][]12}}
\newlabel{thm:k-match-hard}{{3.1}{12}{\cite {k-match}}{Theorem.3.1}{}}
\newlabel{thm:k-match-hard@cref}{{[Theorem][1][3]3.1}{[1][12][]12}}
\newlabel{conj:known-algo-kmatch}{{3.2}{12}{}{Theorem.3.2}{}}
\newlabel{conj:known-algo-kmatch@cref}{{[hypo][2][3]3.2}{[1][12][]12}}
\citation{triang-hard}
\newlabel{conj:graph}{{3.3}{13}{}{Theorem.3.3}{}}
\newlabel{conj:graph@cref}{{[hypo][3][3]3.3}{[1][12][]13}}
\newlabel{def:qk}{{3.4}{13}{}{Theorem.3.4}{}}
\newlabel{def:qk@cref}{{[Definition][4][3]3.4}{[1][13][]13}}
\zref@newlabel{mdf@pagelabel-1}{\default{3.1}\page{13}\abspage{13}\mdf@pagevalue{13}}
\newlabel{lem:tdet-om}{{3.5}{13}{}{Theorem.3.5}{}}
\newlabel{lem:tdet-om@cref}{{[Lemma][5][3]3.5}{[1][13][]13}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Multiple Distinct $p$ Values}{13}{subsection.3.2}\protected@file@percent }
\newlabel{sec:multiple-p}{{3.2}{13}{Multiple Distinct $\prob $ Values}{subsection.3.2}{}}
\newlabel{sec:multiple-p@cref}{{[subsection][2][3]3.2}{[1][13][]13}}
\newlabel{thm:mult-p-hard-result}{{3.6}{13}{}{Theorem.3.6}{}}
\newlabel{thm:mult-p-hard-result@cref}{{[Theorem][6][3]3.6}{[1][13][]13}}
\citation{pdbench}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Single $p$ value}{14}{subsection.3.3}\protected@file@percent }
\newlabel{sec:single-p}{{3.3}{14}{Single $\prob $ value}{subsection.3.3}{}}
\newlabel{sec:single-p@cref}{{[subsection][3][3]3.3}{[1][14][]14}}
\newlabel{th:single-p-hard}{{3.7}{14}{}{Theorem.3.7}{}}
\newlabel{th:single-p-hard@cref}{{[Theorem][7][3]3.7}{[1][14][]14}}
\@writefile{toc}{\contentsline {section}{\numberline {4}$1 \pm \epsilon $ Approximation Algorithm}{14}{section.4}\protected@file@percent }
\newlabel{sec:algo}{{4}{14}{$1 \pm \epsilon $ Approximation Algorithm}{section.4}{}}
\newlabel{sec:algo@cref}{{[section][4][]4}{[1][14][]14}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Preliminaries and some more notation}{14}{subsection.4.1}\protected@file@percent }
\newlabel{def:expand-circuit}{{4.1}{15}{$\expansion {\circuit }$}{Theorem.4.1}{}}
\newlabel{def:expand-circuit@cref}{{[Definition][1][4]4.1}{[1][14][]15}}
\newlabel{def:positive-circuit}{{4.2}{15}{$\abs {\circuit }$}{Theorem.4.2}{}}
\newlabel{def:positive-circuit@cref}{{[Definition][2][4]4.2}{[1][15][]15}}
\newlabel{def:size-depth}{{4.3}{15}{\size ($\cdot $), \depth $\inparen {\cdot }$}{Theorem.4.3}{}}
\newlabel{def:size-depth@cref}{{[Definition][3][4]4.3}{[1][15][]15}}
\newlabel{def:degree}{{4.4}{15}{$\degree (\cdot )$}{Theorem.4.4}{}}
\newlabel{def:degree@cref}{{[Definition][4][4]4.4}{[1][15][]15}}
\newlabel{def:param-gamma}{{4.6}{15}{Parameter $\gamma $}{Theorem.4.6}{}}
\newlabel{def:param-gamma@cref}{{[Definition][6][4]4.6}{[1][15][]15}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Our main result}{16}{subsection.4.2}\protected@file@percent }
\newlabel{sec:algo:sub:main-result}{{4.2}{16}{Our main result}{subsection.4.2}{}}
\newlabel{sec:algo:sub:main-result@cref}{{[subsection][2][4]4.2}{[1][15][]16}}
\newlabel{eq:tilde-Q-bi}{{2}{16}{Our main result}{equation.4.2}{}}
\newlabel{eq:tilde-Q-bi@cref}{{[equation][2][]2}{[1][16][]16}}
\newlabel{cor:approx-algo-const-p}{{4.7}{16}{}{Theorem.4.7}{}}
\newlabel{cor:approx-algo-const-p@cref}{{[Theorem][7][4]4.7}{[1][16][]16}}
\newlabel{eq:approx-algo-bound-main}{{3}{16}{}{equation.4.3}{}}
\newlabel{eq:approx-algo-bound-main@cref}{{[equation][3][]3}{[1][16][]16}}
\newlabel{eq:approx-algo-runtime}{{4}{16}{}{equation.4.4}{}}
\newlabel{eq:approx-algo-runtime@cref}{{[equation][4][]4}{[1][16][]16}}
\newlabel{lem:ctidb-gamma}{{4.8}{16}{}{Theorem.4.8}{}}
\newlabel{lem:ctidb-gamma@cref}{{[Lemma][8][4]4.8}{[1][16][]16}}
\citation{valiant-79-cenrp}
\citation{DS12}
\citation{FO16}
\citation{AB15}
\citation{kenig-13-nclexpdc}
\citation{roy-11-f}
\citation{sen-10-ronfqevpd}
\citation{VS17}
\citation{RS07}
\citation{GT06}
\citation{FH12}
\citation{AD11d}
\citation{DS12}
\newlabel{lem:val-ub}{{4.9}{17}{}{Theorem.4.9}{}}
\newlabel{lem:val-ub@cref}{{[Lemma][9][4]4.9}{[1][16][]17}}
\newlabel{cor:approx-algo-punchline}{{4.10}{17}{}{Theorem.4.10}{}}
\newlabel{cor:approx-algo-punchline@cref}{{[Corollary][10][4]4.10}{[1][17][]17}}
\newlabel{cor:approx-algo-punchline-ctidb}{{4.11}{17}{}{Theorem.4.11}{}}
\newlabel{cor:approx-algo-punchline-ctidb@cref}{{[Corollary][11][4]4.11}{[1][17][]17}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Related Work}{17}{section.5}\protected@file@percent }
\newlabel{sec:related-work}{{5}{17}{Related Work}{section.5}{}}
\newlabel{sec:related-work@cref}{{[section][5][]5}{[1][17][]17}}
\citation{FH13}
\citation{heuvel-19-anappdsd}
\citation{DBLP:conf/icde/OlteanuHK10}
\citation{DS07}
\citation{DS07}
\citation{DBLP:conf/icde/OlteanuHK10}
\citation{jha-12-pdwm}
\citation{factorized-db}
\citation{jha-12-pdwm}
\citation{DM14c}
\citation{S18a}
\citation{bahar-93-al}
\citation{chen-10-cswssr}
\bibstyle{plainurl}
\bibdata{main}
\bibcite{pdbench}{1}
\bibcite{DBLP:conf/vldb/AgrawalBSHNSW06}{2}
\bibcite{AB15}{3}
\bibcite{AD11d}{4}
\bibcite{Antova_fastand}{5}
\bibcite{AGM}{6}
\bibcite{bahar-93-al}{7}
\bibcite{DBLP:journals/pvldb/BeskalesIG10}{8}
\bibcite{arith-complexity}{9}
\bibcite{chen-10-cswssr}{10}
\bibcite{CHEN20061346}{11}
\@writefile{toc}{\contentsline {section}{\numberline {6}Conclusions and Future Work}{18}{section.6}\protected@file@percent }
\newlabel{sec:concl-future-work}{{6}{18}{Conclusions and Future Work}{section.6}{}}
\newlabel{sec:concl-future-work@cref}{{[section][6][]6}{[1][18][]18}}
\gdef\@pageNumberStartBibliography{18}
\bibcite{k-match}{12}
\bibcite{DS07}{13}
\bibcite{10.1145/1265530.1265571}{14}
\bibcite{DS12}{15}
\bibcite{heuvel-19-anappdsd}{16}
\bibcite{DM14c}{17}
\bibcite{feng:2021:sigmod:efficient}{18}
\bibcite{feng:2019:sigmod:uncertainty}{19}
\bibcite{FH12}{20}
\bibcite{FH13}{21}
\bibcite{FO16}{22}
\bibcite{10.5555/645413.652181}{23}
\bibcite{param-comp}{24}
\bibcite{DBLP:books/daglib/0020812}{25}
\bibcite{DBLP:conf/pods/GreenKT07}{26}
\bibcite{GT06}{27}
\bibcite{Imielinski1989IncompleteII}{28}
\bibcite{IL84a}{29}
\bibcite{jha-12-pdwm}{30}
\bibcite{DBLP:journals/jal/KarpLM89}{31}
\bibcite{kenig-13-nclexpdc}{32}
\bibcite{DBLP:conf/pods/KhamisNR16}{33}
\bibcite{triang-hard}{34}
\bibcite{kumari:2016:qdb:communicating}{35}
\bibcite{ngo-survey}{36}
\bibcite{skew}{37}
\bibcite{DBLP:conf/icde/OlteanuHK10}{38}
\bibcite{factorized-db}{39}
\bibcite{DBLP:journals/pvldb/RekatsinasCIR17}{40}
\bibcite{roy-11-f}{41}
\bibcite{RS07}{42}
\bibcite{DBLP:journals/vldb/SaRR0W0Z17}{43}
\bibcite{sen-10-ronfqevpd}{44}
\bibcite{S18a}{45}
\bibcite{valiant-79-cenrp}{46}
\bibcite{VS17}{47}
\bibcite{virgi-survey}{48}
\bibcite{yang:2015:pvldb:lenses}{49}
\gdef\@pageNumberEndBibliography{20}
\@writefile{toc}{\contentsline {section}{\numberline {7}Acknowledgements}{20}{section.7}\protected@file@percent }
\gdef\@pageNumberStartAppendix{21}
\citation{DBLP:conf/pods/GreenKT07}
\@writefile{toc}{\contentsline {section}{\numberline {A}Generalizing Beyond Set Inputs}{21}{appendix.A}\protected@file@percent }
\newlabel{sec:gener-results-beyond}{{A}{21}{Generalizing Beyond Set Inputs}{appendix.A}{}}
\newlabel{sec:gener-results-beyond@cref}{{[appendix][1][2147483647]A}{[1][21][]21}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A.1}\textnormal {TIDB}\xspace {}s}{21}{subsection.A.1}\protected@file@percent }
\newlabel{sec:abbrtidbs}{{A.1}{21}{\abbrTIDB {}s}{subsection.A.1}{}}
\newlabel{sec:abbrtidbs@cref}{{[subappendix][1][2147483647,1]A.1}{[1][21][]21}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A.2}\textnormal {BIDB}\xspace {}s}{21}{subsection.A.2}\protected@file@percent }
\newlabel{sec:abbrbidbs}{{A.2}{21}{\abbrBIDB {}s}{subsection.A.2}{}}
\newlabel{sec:abbrbidbs@cref}{{[subappendix][2][2147483647,1]A.2}{[1][21][]21}}
\@writefile{toc}{\contentsline {section}{\numberline {B}Missing details from Section~\ref {sec:background}}{21}{appendix.B}\protected@file@percent }
\newlabel{sec:proofs-background}{{B}{21}{Missing details from Section~\ref {sec:background}}{appendix.B}{}}
\newlabel{sec:proofs-background@cref}{{[appendix][2][2147483647]B}{[1][21][]21}}
\@writefile{toc}{\contentsline {subsection}{\numberline {B.1}$\mathcal {K}$-relations and $\mathbb {N}[{\bf X}]$-encoded PDB\xspace s\xspace }{21}{subsection.B.1}\protected@file@percent }
\newlabel{subsec:supp-mat-background}{{B.1}{21}{$\semK $-relations and \abbrNXPDB \xplural }{subsection.B.1}{}}
\newlabel{subsec:supp-mat-background@cref}{{[subappendix][1][2147483647,2]B.1}{[1][21][]21}}
\newlabel{subsec:supp-mat-krelations}{{B.1}{21}{$\semK $-relations and \abbrNXPDB \xplural }{subsection.B.1}{}}
\newlabel{subsec:supp-mat-krelations@cref}{{[subappendix][1][2147483647,2]B.1}{[1][21][]21}}
\newlabel{def:representation-syste}{{B.1}{22}{Representation System}{Theorem.B.1}{}}
\newlabel{def:representation-syste@cref}{{[Definition][1][2147483647,2]B.1}{[1][22][]22}}
\citation{DBLP:conf/pods/GreenKT07}
\citation{DBLP:conf/pods/GreenKT07}
\newlabel{def:semnx-pdbs}{{B.2}{23}{$\rmod \inparen {\pxdb }$}{Theorem.B.2}{}}
\newlabel{def:semnx-pdbs@cref}{{[Definition][2][2147483647,2]B.2}{[1][22][]23}}
\newlabel{prop:semnx-pdbs-are-a-}{{B.3}{23}{}{Theorem.B.3}{}}
\newlabel{prop:semnx-pdbs-are-a-@cref}{{[Proposition][3][2147483647,2]B.3}{[1][23][]23}}
\@writefile{toc}{\contentsline {subsection}{\numberline {B.2}TIDBs\xspace and BIDBs\xspace in the $\mathbb {N}[{\bf X}]$-encoded PDB\xspace model}{23}{subsection.B.2}\protected@file@percent }
\newlabel{subsec:supp-mat-ti-bi-def}{{B.2}{23}{\tis and \bis in the \abbrNXPDB model}{subsection.B.2}{}}
\newlabel{subsec:supp-mat-ti-bi-def@cref}{{[subappendix][2][2147483647,2]B.2}{[1][23][]23}}
\newlabel{eq:tidb-expectation}{{5}{24}{\tis and \bis in the \abbrNXPDB model}{equation.B.5}{}}
\newlabel{eq:tidb-expectation@cref}{{[equation][5][2147483647]5}{[1][24][]24}}
\@writefile{toc}{\contentsline {subsection}{\numberline {B.3}Proof of~\Cref {prop:expection-of-polynom}}{24}{subsection.B.3}\protected@file@percent }
\newlabel{subsec:expectation-of-polynom-proof}{{B.3}{24}{Proof of~\Cref {prop:expection-of-polynom}}{subsection.B.3}{}}
\newlabel{subsec:expectation-of-polynom-proof@cref}{{[subappendix][3][2147483647,2]B.3}{[1][24][]24}}
\@writefile{toc}{\contentsline {subsection}{\numberline {B.4}Proposition~\ref {proposition:q-qtilde}}{25}{subsection.B.4}\protected@file@percent }
\newlabel{app:subsec-prop-q-qtilde}{{B.4}{25}{Proposition~\ref {proposition:q-qtilde}}{subsection.B.4}{}}
\newlabel{app:subsec-prop-q-qtilde@cref}{{[subappendix][4][2147483647,2]B.4}{[1][25][]25}}
\newlabel{proposition:q-qtilde}{{B.4}{25}{}{Theorem.B.4}{}}
\newlabel{proposition:q-qtilde@cref}{{[Proposition][4][2147483647,2]B.4}{[1][25][]25}}
\@writefile{toc}{\contentsline {subsection}{\numberline {B.5}Proof for Lemma~\ref {lem:tidb-reduce-poly}}{25}{subsection.B.5}\protected@file@percent }
\newlabel{subsec:proof-exp-poly-rpoly}{{B.5}{25}{Proof for Lemma~\ref {lem:tidb-reduce-poly}}{subsection.B.5}{}}
\newlabel{subsec:proof-exp-poly-rpoly@cref}{{[subappendix][5][2147483647,2]B.5}{[1][25][]25}}
\newlabel{p1-s1a}{{6}{25}{Proof for Lemma~\ref {lem:tidb-reduce-poly}}{equation.B.6}{}}
\newlabel{p1-s1a@cref}{{[equation][6][2147483647]6}{[1][25][]25}}
\newlabel{p1-s1b}{{7}{25}{Proof for Lemma~\ref {lem:tidb-reduce-poly}}{equation.B.7}{}}
\newlabel{p1-s1b@cref}{{[equation][7][2147483647]7}{[1][25][]25}}
\newlabel{p1-s1c}{{8}{25}{Proof for Lemma~\ref {lem:tidb-reduce-poly}}{equation.B.8}{}}
\newlabel{p1-s1c@cref}{{[equation][8][2147483647]8}{[1][25][]25}}
\newlabel{p1-s2}{{9}{25}{Proof for Lemma~\ref {lem:tidb-reduce-poly}}{equation.B.9}{}}
\newlabel{p1-s2@cref}{{[equation][9][2147483647]9}{[1][25][]25}}
\newlabel{p1-s3}{{10}{25}{Proof for Lemma~\ref {lem:tidb-reduce-poly}}{equation.B.10}{}}
\newlabel{p1-s3@cref}{{[equation][10][2147483647]10}{[1][25][]25}}
\newlabel{p1-s4}{{11}{25}{Proof for Lemma~\ref {lem:tidb-reduce-poly}}{equation.B.11}{}}
\newlabel{p1-s4@cref}{{[equation][11][2147483647]11}{[1][25][]25}}
\newlabel{p1-s5}{{12}{25}{Proof for Lemma~\ref {lem:tidb-reduce-poly}}{equation.B.12}{}}
\newlabel{p1-s5@cref}{{[equation][12][2147483647]12}{[1][25][]25}}
\@writefile{toc}{\contentsline {subsection}{\numberline {B.6}Proof For Corollary~\ref {cor:expct-sop}}{26}{subsection.B.6}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {C}Missing details from Section~\ref {sec:hard}}{26}{appendix.C}\protected@file@percent }
\newlabel{app:single-mult-p}{{C}{26}{Missing details from Section~\ref {sec:hard}}{appendix.C}{}}
\newlabel{app:single-mult-p@cref}{{[appendix][3][2147483647]C}{[1][26][]26}}
\@writefile{toc}{\contentsline {subsection}{\numberline {C.1}\Cref {lem:pdb-for-def-qk}}{26}{subsection.C.1}\protected@file@percent }
\newlabel{lem:pdb-for-def-qk}{{C.1}{26}{}{Theorem.C.1}{}}
\newlabel{lem:pdb-for-def-qk@cref}{{[Lemma][1][2147483647,3]C.1}{[1][26][]26}}
\@writefile{toc}{\contentsline {subsection}{\numberline {C.2}Proof of \Cref {lem:tdet-om}}{26}{subsection.C.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {C.3}\Cref {lem:qEk-multi-p}}{27}{subsection.C.3}\protected@file@percent }
\newlabel{lem:qEk-multi-p}{{C.2}{27}{}{Theorem.C.2}{}}
\newlabel{lem:qEk-multi-p@cref}{{[Lemma][2][2147483647,3]C.2}{[1][27][]27}}
\@writefile{toc}{\contentsline {subsection}{\numberline {C.4}Proof of Lemma~\ref {lem:qEk-multi-p}}{27}{subsection.C.4}\protected@file@percent }
\newlabel{subsec:c2k-proportional}{{C.4}{27}{Proof of Lemma~\ref {lem:qEk-multi-p}}{subsection.C.4}{}}
\newlabel{subsec:c2k-proportional@cref}{{[subappendix][4][2147483647,3]C.4}{[1][27][]27}}
\@writefile{toc}{\contentsline {subsection}{\numberline {C.5}Proof of Theorem~\ref {thm:mult-p-hard-result}}{27}{subsection.C.5}\protected@file@percent }
\newlabel{eq:proof-omega-kmatch2}{{13}{28}{Proof of Theorem~\ref {thm:mult-p-hard-result}}{equation.C.13}{}}
\newlabel{eq:proof-omega-kmatch2@cref}{{[equation][13][2147483647]13}{[1][28][]28}}
\newlabel{eq:proof-omega-kmatch4}{{14}{28}{Proof of Theorem~\ref {thm:mult-p-hard-result}}{equation.C.14}{}}
\newlabel{eq:proof-omega-kmatch4@cref}{{[equation][14][2147483647]14}{[1][28][]28}}
\@writefile{toc}{\contentsline {subsection}{\numberline {C.6}Subgraph Notation and $O(1)$ Closed Formulas}{28}{subsection.C.6}\protected@file@percent }
\newlabel{eq:1e}{{15}{28}{Subgraph Notation and $O(1)$ Closed Formulas}{equation.C.15}{}}
\newlabel{eq:1e@cref}{{[equation][15][2147483647]15}{[1][28][]28}}
\newlabel{eq:2p}{{16}{28}{Subgraph Notation and $O(1)$ Closed Formulas}{equation.C.16}{}}
\newlabel{eq:2p@cref}{{[equation][16][2147483647]16}{[1][28][]28}}
\newlabel{eq:2m}{{17}{28}{Subgraph Notation and $O(1)$ Closed Formulas}{equation.C.17}{}}
\newlabel{eq:2m@cref}{{[equation][17][2147483647]17}{[1][28][]28}}
\newlabel{eq:3s}{{18}{28}{Subgraph Notation and $O(1)$ Closed Formulas}{equation.C.18}{}}
\newlabel{eq:3s@cref}{{[equation][18][2147483647]18}{[1][28][]28}}
\newlabel{eq:2pd-3d}{{19}{28}{Subgraph Notation and $O(1)$ Closed Formulas}{equation.C.19}{}}
\newlabel{eq:2pd-3d@cref}{{[equation][19][2147483647]19}{[1][28][]28}}
\newlabel{eq:3p-3tri}{{20}{28}{Subgraph Notation and $O(1)$ Closed Formulas}{equation.C.20}{}}
\newlabel{eq:3p-3tri@cref}{{[equation][20][2147483647]20}{[1][28][]28}}
\@writefile{toc}{\contentsline {subsection}{\numberline {C.7}Proofs of \Cref {eq:1e}-\Cref {eq:3p-3tri}}{28}{subsection.C.7}\protected@file@percent }
\newlabel{app:easy-counts}{{C.7}{28}{Proofs of \Cref {eq:1e}-\Cref {eq:3p-3tri}}{subsection.C.7}{}}
\newlabel{app:easy-counts@cref}{{[subappendix][7][2147483647,3]C.7}{[1][28][]28}}
\@writefile{toc}{\contentsline {subsection}{\numberline {C.8}Tools to prove \Cref {th:single-p-hard}}{29}{subsection.C.8}\protected@file@percent }
\newlabel{lem:qE3-exp}{{C.3}{29}{}{Theorem.C.3}{}}
\newlabel{lem:qE3-exp@cref}{{[Lemma][3][2147483647,3]C.3}{[1][29][]29}}
\newlabel{claim:four-one}{{21}{29}{}{equation.C.21}{}}
\newlabel{claim:four-one@cref}{{[equation][21][2147483647]21}{[1][29][]29}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {C.8.1}Proof for \Cref {lem:qE3-exp}}{30}{subsubsection.C.8.1}\protected@file@percent }
\newlabel{def:Gk}{{C.4}{30}{}{Theorem.C.4}{}}
\newlabel{def:Gk@cref}{{[Definition][4][2147483647,3]C.4}{[1][30][]30}}
\newlabel{th:single-p}{{C.5}{30}{}{Theorem.C.5}{}}
\newlabel{th:single-p@cref}{{[Theorem][5][2147483647,3]C.5}{[1][30][]30}}
\newlabel{lem:3m-G2}{{C.6}{30}{}{Theorem.C.6}{}}
\newlabel{lem:3m-G2@cref}{{[Lemma][6][2147483647,3]C.6}{[1][30][]30}}
\newlabel{lem:tri}{{C.7}{31}{}{Theorem.C.7}{}}
\newlabel{lem:tri@cref}{{[Lemma][7][2147483647,3]C.7}{[1][30][]31}}
\newlabel{lem:lin-sys}{{C.8}{31}{}{Theorem.C.8}{}}
\newlabel{lem:lin-sys@cref}{{[Lemma][8][2147483647,3]C.8}{[1][31][]31}}
\@writefile{toc}{\contentsline {subsection}{\numberline {C.9}Proofs for \Cref {lem:3m-G2}, \Cref {lem:tri}, and \Cref {lem:lin-sys}}{31}{subsection.C.9}\protected@file@percent }
\newlabel{subsec:proofs-struc-lemmas}{{C.9}{31}{Proofs for \Cref {lem:3m-G2}, \Cref {lem:tri}, and \Cref {lem:lin-sys}}{subsection.C.9}{}}
\newlabel{subsec:proofs-struc-lemmas@cref}{{[subappendix][9][2147483647,3]C.9}{[1][31][]31}}
\newlabel{def:ed-nota}{{C.9}{31}{$\esetType {\ell }$}{Theorem.C.9}{}}
\newlabel{def:ed-nota@cref}{{[Definition][9][2147483647,3]C.9}{[1][31][]31}}
\newlabel{def:ed-sub}{{C.11}{31}{$\binom {\edgeSet }{t}$ and $\binom {\edgeSet }{\leq t}$}{Theorem.C.11}{}}
\newlabel{def:ed-sub@cref}{{[Definition][11][2147483647,3]C.11}{[1][31][]31}}
\newlabel{def:fk}{{C.12}{31}{}{Theorem.C.12}{}}
\newlabel{def:fk@cref}{{[Definition][12][2147483647,3]C.12}{[1][31][]31}}
\newlabel{def:fk-inv}{{C.13}{31}{$f_\ell ^{-1}$}{Theorem.C.13}{}}
\newlabel{def:fk-inv@cref}{{[Definition][13][2147483647,3]C.13}{[1][31][]31}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {C.9.1}Proof of Lemma \ref {lem:3m-G2}}{32}{subsubsection.C.9.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {C.9.2}Proof of \Cref {lem:tri}}{33}{subsubsection.C.9.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {C.9.3}Proof of \Cref {lem:lin-sys}}{33}{subsubsection.C.9.3}\protected@file@percent }
\newlabel{eq:lem-qE3-exp}{{22}{33}{Proof of \Cref {lem:lin-sys}}{equation.C.22}{}}
\newlabel{eq:lem-qE3-exp@cref}{{[equation][22][2147483647]22}{[1][33][]33}}
\newlabel{eq:b1-alg-1}{{23}{33}{Proof of \Cref {lem:lin-sys}}{equation.C.23}{}}
\newlabel{eq:b1-alg-1@cref}{{[equation][23][2147483647]23}{[1][33][]33}}
\newlabel{eq:b1-alg-2}{{24}{33}{Proof of \Cref {lem:lin-sys}}{equation.C.24}{}}
\newlabel{eq:b1-alg-2@cref}{{[equation][24][2147483647]24}{[1][33][]33}}
\newlabel{eq:b2-sub-lem}{{25}{34}{Proof of \Cref {lem:lin-sys}}{equation.C.25}{}}
\newlabel{eq:b2-sub-lem@cref}{{[equation][25][2147483647]25}{[1][34][]34}}
\newlabel{eq:b2-final}{{26}{34}{Proof of \Cref {lem:lin-sys}}{equation.C.26}{}}
\newlabel{eq:b2-final@cref}{{[equation][26][2147483647]26}{[1][34][]34}}
\newlabel{eq:det-final}{{27}{34}{Proof of \Cref {lem:lin-sys}}{equation.C.27}{}}
\newlabel{eq:det-final@cref}{{[equation][27][2147483647]27}{[1][34][]34}}
\@writefile{toc}{\contentsline {subsection}{\numberline {C.10}Proof of \Cref {th:single-p}}{34}{subsection.C.10}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {C.11}Proof of \Cref {th:single-p-hard}}{34}{subsection.C.11}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {D}Missing Details from Section~\ref {sec:algo}}{35}{appendix.D}\protected@file@percent }
\newlabel{sec:proofs-approx-alg}{{D}{35}{Missing Details from Section~\ref {sec:algo}}{appendix.D}{}}
\newlabel{sec:proofs-approx-alg@cref}{{[appendix][4][2147483647]D}{[1][35][]35}}
\newlabel{eq:poly-eg}{{28}{35}{Missing Details from Section~\ref {sec:algo}}{equation.D.28}{}}
\newlabel{eq:poly-eg@cref}{{[equation][28][2147483647]28}{[1][35][]35}}
\newlabel{example:expr-tree-T}{{D.2}{35}{Example of Pure Expansion}{Theorem.D.2}{}}
\newlabel{example:expr-tree-T@cref}{{[Example][2][2147483647,4]D.2}{[1][35][]35}}
\newlabel{ex:def-pos-circ}{{D.3}{35}{Example for \Cref {def:positive-circuit}}{Theorem.D.3}{}}
\newlabel{ex:def-pos-circ@cref}{{[Example][3][2147483647,4]D.3}{[1][35][]35}}
\newlabel{lem:approx-alg}{{D.5}{35}{}{Theorem.D.5}{}}
\newlabel{lem:approx-alg@cref}{{[Theorem][5][2147483647,4]D.5}{[1][35][]35}}
\newlabel{eq:approx-algo-bound}{{29}{35}{}{equation.D.29}{}}
\newlabel{eq:approx-algo-bound@cref}{{[equation][29][2147483647]29}{[1][35][]35}}
\newlabel{alg:mon-sam}{{1}{36}{$\approxq (\circuit , \vct {p}, \conf , \error )$\relax }{algorithm.1}{}}
\newlabel{alg:mon-sam@cref}{{[algorithm][1][2147483647]1}{[1][35][]36}}
\newlabel{alg:mon-sam-global1}{{1}{36}{$\approxq (\circuit , \vct {p}, \conf , \error )$\relax }{ALG@line.1}{}}
\newlabel{alg:mon-sam-global1@cref}{{[line][1][2147483647]1}{[1][35][]36}}
\newlabel{alg:mon-sam-global2}{{2}{36}{$\approxq (\circuit , \vct {p}, \conf , \error )$\relax }{ALG@line.2}{}}
\newlabel{alg:mon-sam-global2@cref}{{[line][2][2147483647]2}{[1][35][]36}}
\newlabel{alg:mon-sam-onepass}{{3}{36}{$\approxq (\circuit , \vct {p}, \conf , \error )$\relax }{ALG@line.3}{}}
\newlabel{alg:mon-sam-onepass@cref}{{[line][3][2147483647]3}{[1][35][]36}}
\newlabel{alg:sampling-loop}{{4}{36}{$\approxq (\circuit , \vct {p}, \conf , \error )$\relax }{ALG@line.4}{}}
\newlabel{alg:sampling-loop@cref}{{[line][4][2147483647]4}{[1][35][]36}}
\newlabel{alg:mon-sam-sample}{{5}{36}{$\approxq (\circuit , \vct {p}, \conf , \error )$\relax }{ALG@line.5}{}}
\newlabel{alg:mon-sam-sample@cref}{{[line][5][2147483647]5}{[1][35][]36}}
\newlabel{alg:check-duplicate-block}{{6}{36}{$\approxq (\circuit , \vct {p}, \conf , \error )$\relax }{ALG@line.6}{}}
\newlabel{alg:check-duplicate-block@cref}{{[line][6][2147483647]6}{[1][35][]36}}
\newlabel{alg:mon-sam-assign1}{{7}{36}{$\approxq (\circuit , \vct {p}, \conf , \error )$\relax }{ALG@line.7}{}}
\newlabel{alg:mon-sam-assign1@cref}{{[line][7][2147483647]7}{[1][35][]36}}
\newlabel{alg:mon-sam-product}{{8}{36}{$\approxq (\circuit , \vct {p}, \conf , \error )$\relax }{ALG@line.8}{}}
\newlabel{alg:mon-sam-product@cref}{{[line][8][2147483647]8}{[1][35][]36}}
\newlabel{alg:mon-sam-add}{{9}{36}{$\approxq (\circuit , \vct {p}, \conf , \error )$\relax }{ALG@line.9}{}}
\newlabel{alg:mon-sam-add@cref}{{[line][9][2147483647]9}{[1][35][]36}}
\newlabel{alg:mon-sam-global3}{{12}{36}{$\approxq (\circuit , \vct {p}, \conf , \error )$\relax }{ALG@line.12}{}}
\newlabel{alg:mon-sam-global3@cref}{{[line][12][2147483647]12}{[1][35][]36}}
\@writefile{toc}{\contentsline {subsection}{\numberline {D.1}Proof of Theorem \ref {lem:approx-alg}}{36}{subsection.D.1}\protected@file@percent }
\newlabel{sec:proof-lem-approx-alg}{{D.1}{36}{Proof of Theorem \ref {lem:approx-alg}}{subsection.D.1}{}}
\newlabel{sec:proof-lem-approx-alg@cref}{{[subappendix][1][2147483647,4]D.1}{[1][35][]36}}
\newlabel{lem:one-pass}{{D.6}{36}{}{Theorem.D.6}{}}
\newlabel{lem:one-pass@cref}{{[Lemma][6][2147483647,4]D.6}{[1][36][]36}}
\newlabel{lem:sample}{{D.7}{36}{}{Theorem.D.7}{}}
\newlabel{lem:sample@cref}{{[Lemma][7][2147483647,4]D.7}{[1][36][]36}}
\newlabel{lem:mon-samp}{{D.8}{37}{}{Theorem.D.8}{}}
\newlabel{lem:mon-samp@cref}{{[Theorem][8][2147483647,4]D.8}{[1][37][]37}}
\@writefile{toc}{\contentsline {subsection}{\numberline {D.2}Proof of Theorem \ref {lem:mon-samp}}{37}{subsection.D.2}\protected@file@percent }
\newlabel{app:subsec-th-mon-samp}{{D.2}{37}{Proof of Theorem \ref {lem:mon-samp}}{subsection.D.2}{}}
\newlabel{app:subsec-th-mon-samp@cref}{{[subappendix][2][2147483647,4]D.2}{[1][37][]37}}
\@writefile{toc}{\contentsline {subsection}{\numberline {D.3}Proof of \Cref {cor:approx-algo-const-p}}{38}{subsection.D.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {D.4}Proof of~\Cref {lem:ctidb-gamma}}{39}{subsection.D.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {D.5}Proof of \Cref {lem:val-ub}}{39}{subsection.D.5}\protected@file@percent }
\newlabel{susec:proof-val-up}{{D.5}{39}{Proof of \Cref {lem:val-ub}}{subsection.D.5}{}}
\newlabel{susec:proof-val-up@cref}{{[subappendix][5][2147483647,4]D.5}{[1][39][]39}}
\newlabel{app:proof-lem-val-ub}{{D.5}{39}{Proof of \Cref {lem:val-ub}}{subsection.D.5}{}}
\newlabel{app:proof-lem-val-ub@cref}{{[subappendix][5][2147483647,4]D.5}{[1][39][]39}}
\newlabel{lem:C-ub-tree}{{D.9}{39}{}{Theorem.D.9}{}}
\newlabel{lem:C-ub-tree@cref}{{[Lemma][9][2147483647,4]D.9}{[1][39][]39}}
\newlabel{eq:sumcoeff-times-upper}{{30}{40}{Proof of \Cref {lem:val-ub}}{equation.D.30}{}}
\newlabel{eq:sumcoeff-times-upper@cref}{{[equation][30][2147483647]30}{[1][40][]40}}
\newlabel{eq:sumcoeff-plus-upper}{{31}{40}{Proof of \Cref {lem:val-ub}}{equation.D.31}{}}
\newlabel{eq:sumcoeff-plus-upper@cref}{{[equation][31][2147483647]31}{[1][40][]40}}
\newlabel{lem:C-ub-gen}{{D.10}{40}{}{Theorem.D.10}{}}
\newlabel{lem:C-ub-gen@cref}{{[Lemma][10][2147483647,4]D.10}{[1][40][]40}}
\@writefile{toc}{\contentsline {subsection}{\numberline {D.6}$\textsc {OnePass}\xspace $ Remarks}{41}{subsection.D.6}\protected@file@percent }
\newlabel{eq:T-all-ones}{{32}{41}{$\onepass $ Remarks}{equation.D.32}{}}
\newlabel{eq:T-all-ones@cref}{{[equation][32][2147483647]32}{[1][41][]41}}
\newlabel{eq:T-weights}{{33}{41}{$\onepass $ Remarks}{equation.D.33}{}}
\newlabel{eq:T-weights@cref}{{[equation][33][2147483647]33}{[1][41][]41}}
\@writefile{toc}{\contentsline {subsection}{\numberline {D.7}$\textsc {OnePass}\xspace $ Example}{41}{subsection.D.7}\protected@file@percent }
\newlabel{example:one-pass}{{D.11}{41}{}{Theorem.D.11}{}}
\newlabel{example:one-pass@cref}{{[Example][11][2147483647,4]D.11}{[1][41][]41}}
\@writefile{toc}{\contentsline {subsection}{\numberline {D.8}Proof of \textsc {OnePass}\xspace (\Cref {lem:one-pass})}{41}{subsection.D.8}\protected@file@percent }
\newlabel{sec:proof-one-pass}{{D.8}{41}{Proof of \onepass (\Cref {lem:one-pass})}{subsection.D.8}{}}
\newlabel{sec:proof-one-pass@cref}{{[subappendix][8][2147483647,4]D.8}{[1][41][]41}}
\newlabel{fig:expr-tree-T-wght}{{5}{42}{Weights computed by $\onepass $ in \Cref {example:one-pass}.\relax }{figure.caption.7}{}}
\newlabel{fig:expr-tree-T-wght@cref}{{[figure][5][2147483647]5}{[1][41][]42}}
\newlabel{alg:one-pass-iter}{{2}{42}{\onepass $(\circuit )$\relax }{algorithm.2}{}}
\newlabel{alg:one-pass-iter@cref}{{[algorithm][2][2147483647]2}{[1][41][]42}}
\newlabel{alg:one-pass-loop}{{1}{42}{\onepass $(\circuit )$\relax }{ALG@line.1}{}}
\newlabel{alg:one-pass-loop@cref}{{[line][1][2147483647]1}{[1][41][]42}}
\newlabel{alg:one-pass-var}{{3}{42}{\onepass $(\circuit )$\relax }{ALG@line.3}{}}
\newlabel{alg:one-pass-var@cref}{{[line][3][2147483647]3}{[1][41][]42}}
\newlabel{alg:one-pass-num}{{5}{42}{\onepass $(\circuit )$\relax }{ALG@line.5}{}}
\newlabel{alg:one-pass-num@cref}{{[line][5][2147483647]5}{[1][41][]42}}
\newlabel{alg:one-pass-mult}{{7}{42}{\onepass $(\circuit )$\relax }{ALG@line.7}{}}
\newlabel{alg:one-pass-mult@cref}{{[line][7][2147483647]7}{[1][41][]42}}
\newlabel{alg:one-pass-plus}{{9}{42}{\onepass $(\circuit )$\relax }{ALG@line.9}{}}
\newlabel{alg:one-pass-plus@cref}{{[line][9][2147483647]9}{[1][41][]42}}
\newlabel{alg:one-pass-lwght}{{10}{42}{\onepass $(\circuit )$\relax }{ALG@line.10}{}}
\newlabel{alg:one-pass-lwght@cref}{{[line][10][2147483647]10}{[1][41][]42}}
\newlabel{alg:one-pass-rwght}{{11}{42}{\onepass $(\circuit )$\relax }{ALG@line.11}{}}
\newlabel{alg:one-pass-rwght@cref}{{[line][11][2147483647]11}{[1][41][]42}}
\newlabel{alg:sample}{{3}{43}{\sampmon (\circuit )\relax }{algorithm.3}{}}
\newlabel{alg:sample@cref}{{[algorithm][3][2147483647]3}{[1][43][]43}}
\newlabel{alg:sample-global1}{{1}{43}{\sampmon (\circuit )\relax }{ALG@line.1}{}}
\newlabel{alg:sample-global1@cref}{{[line][1][2147483647]1}{[1][43][]43}}
\newlabel{alg:sample-plus-bsamp}{{3}{43}{\sampmon (\circuit )\relax }{ALG@line.3}{}}
\newlabel{alg:sample-plus-bsamp@cref}{{[line][3][2147483647]3}{[1][43][]43}}
\newlabel{alg:sample-plus-traversal}{{4}{43}{\sampmon (\circuit )\relax }{ALG@line.4}{}}
\newlabel{alg:sample-plus-traversal@cref}{{[line][4][2147483647]4}{[1][43][]43}}
\newlabel{alg:sample-global2}{{7}{43}{\sampmon (\circuit )\relax }{ALG@line.7}{}}
\newlabel{alg:sample-global2@cref}{{[line][7][2147483647]7}{[1][43][]43}}
\newlabel{alg:sample-times-for-loop}{{8}{43}{\sampmon (\circuit )\relax }{ALG@line.8}{}}
\newlabel{alg:sample-times-for-loop@cref}{{[line][8][2147483647]8}{[1][43][]43}}
\newlabel{alg:sample-times-union}{{10}{43}{\sampmon (\circuit )\relax }{ALG@line.10}{}}
\newlabel{alg:sample-times-union@cref}{{[line][10][2147483647]10}{[1][43][]43}}
\newlabel{alg:sample-times-product}{{11}{43}{\sampmon (\circuit )\relax }{ALG@line.11}{}}
\newlabel{alg:sample-times-product@cref}{{[line][11][2147483647]11}{[1][43][]43}}
\newlabel{alg:sample-num-return}{{15}{43}{\sampmon (\circuit )\relax }{ALG@line.15}{}}
\newlabel{alg:sample-num-return@cref}{{[line][15][2147483647]15}{[1][43][]43}}
\newlabel{alg:sample-var-return}{{17}{43}{\sampmon (\circuit )\relax }{ALG@line.17}{}}
\newlabel{alg:sample-var-return@cref}{{[line][17][2147483647]17}{[1][43][]43}}
\@writefile{toc}{\contentsline {subsection}{\numberline {D.9}\textsc {SampleMonomial}\xspace Remarks}{43}{subsection.D.9}\protected@file@percent }
\newlabel{subsec:sampmon-remarks}{{D.9}{43}{\sampmon Remarks}{subsection.D.9}{}}
\newlabel{subsec:sampmon-remarks@cref}{{[subappendix][9][2147483647,4]D.9}{[1][43][]43}}
\@writefile{toc}{\contentsline {subsection}{\numberline {D.10}Proof of \textsc {SampleMonomial}\xspace (\Cref {lem:sample})}{44}{subsection.D.10}\protected@file@percent }
\newlabel{sec:proof-sample-monom}{{D.10}{44}{Proof of \sampmon (\Cref {lem:sample})}{subsection.D.10}{}}
\newlabel{sec:proof-sample-monom@cref}{{[subappendix][10][2147483647,4]D.10}{[1][43][]44}}
\newlabel{eq:cost-sampmon}{{35}{45}{Run-time Analysis}{equation.D.35}{}}
\newlabel{eq:cost-sampmon@cref}{{[equation][35][2147483647]35}{[1][45][]45}}
\newlabel{eq:strict-upper-bound}{{36}{46}{Run-time Analysis}{equation.D.36}{}}
\newlabel{eq:strict-upper-bound@cref}{{[equation][36][2147483647]36}{[1][46][]46}}
\newlabel{eq:ih-bound-cost}{{37}{46}{Run-time Analysis}{equation.D.37}{}}
\newlabel{eq:ih-bound-cost@cref}{{[equation][37][2147483647]37}{[1][46][]46}}
\newlabel{eq:times-lhs}{{38}{46}{Run-time Analysis}{equation.D.38}{}}
\newlabel{eq:times-lhs@cref}{{[equation][38][2147483647]38}{[1][46][]46}}
\newlabel{eq:times-middle}{{39}{46}{Run-time Analysis}{equation.D.39}{}}
\newlabel{eq:times-middle@cref}{{[equation][39][2147483647]39}{[1][46][]46}}
\newlabel{eq:times-rhs}{{40}{46}{Run-time Analysis}{equation.D.40}{}}
\newlabel{eq:times-rhs@cref}{{[equation][40][2147483647]40}{[1][46][]46}}
\newlabel{eq:times-lhs-expanded}{{41}{46}{Run-time Analysis}{equation.D.41}{}}
\newlabel{eq:times-lhs-expanded@cref}{{[equation][41][2147483647]41}{[1][46][]46}}
\newlabel{eq:times-middle-expanded}{{42}{46}{Run-time Analysis}{equation.D.42}{}}
\newlabel{eq:times-middle-expanded@cref}{{[equation][42][2147483647]42}{[1][46][]46}}
\newlabel{eq:times-lhs-middle}{{43}{46}{Run-time Analysis}{equation.D.43}{}}
\newlabel{eq:times-lhs-middle@cref}{{[equation][43][2147483647]43}{[1][46][]46}}
\citation{pdbench}
\citation{Antova_fastand}
\newlabel{eq:plus-lhs-inequality}{{44}{47}{Run-time Analysis}{equation.D.44}{}}
\newlabel{eq:plus-lhs-inequality@cref}{{[equation][44][2147483647]44}{[1][47][]47}}
\newlabel{eq:plus-middle}{{45}{47}{Run-time Analysis}{equation.D.45}{}}
\newlabel{eq:plus-middle@cref}{{[equation][45][2147483647]45}{[1][47][]47}}
\newlabel{eq:plus-rhs}{{46}{47}{Run-time Analysis}{equation.D.46}{}}
\newlabel{eq:plus-rhs@cref}{{[equation][46][2147483647]46}{[1][47][]47}}
\newlabel{eq:plus-lhs-expanded}{{47}{47}{Run-time Analysis}{equation.D.47}{}}
\newlabel{eq:plus-lhs-expanded@cref}{{[equation][47][2147483647]47}{[1][47][]47}}
\newlabel{eq:plus-middle-expanded}{{48}{47}{Run-time Analysis}{equation.D.48}{}}
\newlabel{eq:plus-middle-expanded@cref}{{[equation][48][2147483647]48}{[1][47][]47}}
\newlabel{eq:plus-upper-bound-final}{{49}{47}{Run-time Analysis}{equation.D.49}{}}
\newlabel{eq:plus-upper-bound-final@cref}{{[equation][49][2147483647]49}{[1][47][]47}}
\@writefile{toc}{\contentsline {subsection}{\numberline {D.11}Experimental Results}{47}{subsection.D.11}\protected@file@percent }
\newlabel{app:subsec:experiment}{{D.11}{47}{Experimental Results}{subsection.D.11}{}}
\newlabel{app:subsec:experiment@cref}{{[subappendix][11][2147483647,4]D.11}{[1][47][]47}}
\newlabel{tbl:cancel}{{\caption@xref {tbl:cancel}{ on input line 16}}{48}{Experimental Results}{figure.caption.10}{}}
\newlabel{tbl:cancel@cref}{{[subappendix][11][2147483647,4]D.11}{[1][48][]48}}
\newlabel{fig:experiment-bidb-cancel}{{6}{48}{Number of Cancellations for Queries Over $\abbrBIDB $.\relax }{figure.caption.10}{}}
\newlabel{fig:experiment-bidb-cancel@cref}{{[figure][6][2147483647]6}{[1][48][]48}}
\@writefile{toc}{\contentsline {section}{\numberline {E}Circuits}{48}{appendix.E}\protected@file@percent }
\newlabel{app:sec-cicuits}{{E}{48}{Circuits}{appendix.E}{}}
\newlabel{app:sec-cicuits@cref}{{[appendix][5][2147483647]E}{[1][48][]48}}
\@writefile{toc}{\contentsline {subsection}{\numberline {E.1}Representing Polynomials with Circuits}{48}{subsection.E.1}\protected@file@percent }
\newlabel{app:subsec-rep-poly-lin-circ}{{E.1}{48}{Representing Polynomials with Circuits}{subsection.E.1}{}}
\newlabel{app:subsec-rep-poly-lin-circ@cref}{{[subappendix][1][2147483647,5]E.1}{[1][48][]48}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {E.1.1}Circuits for query plans}{48}{subsubsection.E.1.1}\protected@file@percent }
\newlabel{sec:circuits-formal}{{E.1.1}{48}{Circuits for query plans}{subsubsection.E.1.1}{}}
\newlabel{sec:circuits-formal@cref}{{[subsubappendix][1][2147483647,5,1]E.1.1}{[1][48][]48}}
\@writefile{toc}{\contentsline {subsection}{\numberline {E.2}Modeling Circuit Construction}{48}{subsection.E.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {E.2.1}Bounding circuit depth}{49}{subsubsection.E.2.1}\protected@file@percent }
\newlabel{sec:circuit-depth}{{E.2.1}{49}{Bounding circuit depth}{subsubsection.E.2.1}{}}
\newlabel{sec:circuit-depth@cref}{{[subsubappendix][1][2147483647,5,2]E.2.1}{[1][49][]49}}
\newlabel{prop:circuit-depth}{{E.1}{49}{Circuit depth is bounded}{Theorem.E.1}{}}
\newlabel{prop:circuit-depth@cref}{{[Proposition][1][2147483647,5]E.1}{[1][49][]49}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {E.2.2}Circuit size vs. runtime}{49}{subsubsection.E.2.2}\protected@file@percent }
\newlabel{sec:circuit-runtime}{{E.2.2}{49}{Circuit size vs. runtime}{subsubsection.E.2.2}{}}
\newlabel{sec:circuit-runtime@cref}{{[subsubappendix][2][2147483647,5,2]E.2.2}{[1][49][]49}}
\newlabel{alg:lc}{{4}{50}{\abbrStepOne $(\query , \dbbase , E, V, \ell )$\relax }{algorithm.4}{}}
\newlabel{alg:lc@cref}{{[algorithm][4][2147483647]4}{[1][49][]50}}
\newlabel{lem:circ-model-runtime}{{E.2}{51}{}{Theorem.E.2}{}}
\newlabel{lem:circ-model-runtime@cref}{{[Lemma][2][2147483647,5]E.2}{[1][49][]51}}
\newlabel{lem:circuits-model-runtime}{{E.2}{51}{}{Theorem.E.2}{}}
\newlabel{lem:circuits-model-runtime@cref}{{[Lemma][2][2147483647,5]E.2}{[1][49][]51}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {E.2.3}Runtime of LC\xspace }{52}{subsubsection.E.2.3}\protected@file@percent }
\newlabel{sec:lc-runtime}{{E.2.3}{52}{Runtime of \abbrStepOne }{subsubsection.E.2.3}{}}
\newlabel{sec:lc-runtime@cref}{{[subsubappendix][3][2147483647,5,2]E.2.3}{[1][51][]52}}
\newlabel{lem:tlc-is-the-same-as-det}{{E.3}{52}{}{Theorem.E.3}{}}
\newlabel{lem:tlc-is-the-same-as-det@cref}{{[Lemma][3][2147483647,5]E.3}{[1][52][]52}}
\@writefile{toc}{\contentsline {section}{\numberline {F}Higher Moments}{52}{appendix.F}\protected@file@percent }
\newlabel{sec:momemts}{{F}{52}{Higher Moments}{appendix.F}{}}
\newlabel{sec:momemts@cref}{{[appendix][6][2147483647]F}{[1][52][]52}}
\citation{DBLP:journals/jal/KarpLM89}
\citation{DBLP:conf/icde/OlteanuHK10}
\citation{FH13}
\citation{heuvel-19-anappdsd}
\citation{DBLP:conf/icde/OlteanuHK10}
\citation{param-comp}
\citation{virgi-survey}
\citation{triang-hard}
\@writefile{toc}{\contentsline {section}{\numberline {G}The Karp-Luby Estimator}{53}{appendix.G}\protected@file@percent }
\newlabel{sec:karp-luby}{{G}{53}{The Karp-Luby Estimator}{appendix.G}{}}
\newlabel{sec:karp-luby@cref}{{[appendix][7][2147483647]G}{[1][53][]53}}
\@writefile{toc}{\contentsline {section}{\numberline {H}Parameterized Complexity}{53}{appendix.H}\protected@file@percent }
\newlabel{sec:param-compl}{{H}{53}{Parameterized Complexity}{appendix.H}{}}
\newlabel{sec:param-compl@cref}{{[appendix][8][2147483647]H}{[1][53][]53}}
\newlabel{TotPages}{{54}{54}{}{page.54}{}}
\gdef \@abspage@last{54}

275
arXiv/main.bbl Normal file
View File

@ -0,0 +1,275 @@
\begin{thebibliography}{10}
\bibitem{pdbench}
pdbench.
\newblock \url{http://pdbench.sourceforge.net/}.
\newblock Accessed: 2020-12-15.
\bibitem{DBLP:conf/vldb/AgrawalBSHNSW06}
Parag Agrawal, Omar Benjelloun, Anish~Das Sarma, Chris Hayworth, Shubha~U.
Nabar, Tomoe Sugihara, and Jennifer Widom.
\newblock Trio: A system for data, uncertainty, and lineage.
\newblock In {\em VLDB}, pages 1151--1154, 2006.
\bibitem{AB15}
Antoine Amarilli, Pierre Bourhis, and Pierre Senellart.
\newblock Probabilities and provenance via tree decompositions.
\newblock {\em PODS}, 2015.
\bibitem{AD11d}
Yael Amsterdamer, Daniel Deutch, and Val Tannen.
\newblock Provenance for aggregate queries.
\newblock In {\em PODS}, pages 153--164, 2011.
\bibitem{Antova_fastand}
Lyublena Antova, Thomas Jansen, Christoph Koch, and Dan Olteanu.
\newblock Fast and simple relational processing of uncertain data.
\bibitem{AGM}
Albert Atserias, Martin Grohe, and D{\'{a}}niel Marx.
\newblock Size bounds and query plans for relational joins.
\newblock {\em {SIAM} J. Comput.}, 42(4):1737--1767, 2013.
\newblock \href {https://doi.org/10.1137/110859440}
{\path{doi:10.1137/110859440}}.
\bibitem{bahar-93-al}
R.~Iris Bahar, Erica~A. Frohm, Charles~M. Gaona, Gary~D. Hachtel, Enrico Macii,
Abelardo Pardo, and Fabio Somenzi.
\newblock Algebraic decision diagrams and their applications.
\newblock In {\em IEEE CAD}, 1993.
\bibitem{DBLP:journals/pvldb/BeskalesIG10}
George Beskales, Ihab~F. Ilyas, and Lukasz Golab.
\newblock Sampling the repairs of functional dependency violations under hard
constraints.
\newblock {\em Proc. {VLDB} Endow.}, 3(1):197--207, 2010.
\bibitem{arith-complexity}
Peter Bürgisser, Michael Clausen, and Mohammad~Amin Shokrollahi.
\newblock {\em Algebraic complexity theory}, volume 315.
\newblock Springer, 1997.
\bibitem{chen-10-cswssr}
Hubie Chen and Martin Grohe.
\newblock Constraint satisfaction with succinctly specified relations.
\newblock {\em J. Comput. Syst. Sci.}, 76(8):847--860, 2010.
\bibitem{CHEN20061346}
Jianer Chen, Xiuzhen Huang, Iyad~A. Kanj, and Ge~Xia.
\newblock Strong computational lower bounds via parameterized complexity.
\newblock {\em Journal of Computer and System Sciences}, 72(8):1346--1367,
2006.
\newblock URL:
\url{https://www.sciencedirect.com/science/article/pii/S0022000006000675},
\href {https://doi.org/https://doi.org/10.1016/j.jcss.2006.04.007}
{\path{doi:https://doi.org/10.1016/j.jcss.2006.04.007}}.
\bibitem{k-match}
Radu Curticapean.
\newblock Counting matchings of size k is w[1]-hard.
\newblock In {\em ICALP}, volume 7965, pages 352--363, 2013.
\bibitem{DS07}
N.~Dalvi and D.~Suciu.
\newblock Efficient query evaluation on probabilistic databases.
\newblock {\em VLDB}, 16(4):544, 2007.
\bibitem{10.1145/1265530.1265571}
Nilesh Dalvi and Dan Suciu.
\newblock The dichotomy of conjunctive queries on probabilistic structures.
\newblock In {\em PODS}, pages 293--302, 2007.
\bibitem{DS12}
Nilesh Dalvi and Dan Suciu.
\newblock The dichotomy of probabilistic inference for unions of conjunctive
queries.
\newblock {\em JACM}, 59(6):30, 2012.
\bibitem{heuvel-19-anappdsd}
Maarten~Van den Heuvel, Peter Ivanov, Wolfgang Gatterbauer, Floris Geerts, and
Martin Theobald.
\newblock Anytime approximation in probabilistic databases via scaled
dissociations.
\newblock In {\em SIGMOD}, pages 1295--1312, 2019.
\bibitem{DM14c}
Daniel Deutch, Tova Milo, Sudeepa Roy, and Val Tannen.
\newblock Circuits for datalog provenance.
\newblock In {\em ICDT}, pages 201--212, 2014.
\bibitem{feng:2021:sigmod:efficient}
Su~Feng, Boris Glavic, Aaron Huber, and Oliver Kennedy.
\newblock Efficient uncertainty tracking for complex queries with
attribute-level bounds.
\newblock In {\em SIGMOD}, 2021.
\bibitem{feng:2019:sigmod:uncertainty}
Su~Feng, Aaron Huber, Boris Glavic, and Oliver Kennedy.
\newblock Uncertainty annotated databases - a lightweight approach for
approximating certain answers.
\newblock In {\em SIGMOD}, 2019.
\bibitem{FH12}
Robert Fink, Larisa Han, and Dan Olteanu.
\newblock Aggregation in probabilistic databases via knowledge compilation.
\newblock {\em PVLDB}, 5(5):490--501, 2012.
\bibitem{FH13}
Robert Fink, Jiewen Huang, and Dan Olteanu.
\newblock Anytime approximation in probabilistic databases.
\newblock {\em VLDBJ}, 22(6):823--848, 2013.
\bibitem{FO16}
Robert Fink and Dan Olteanu.
\newblock Dichotomies for queries with negation in probabilistic databases.
\newblock {\em TODS}, 41(1):4:1--4:47, 2016.
\bibitem{10.5555/645413.652181}
J\"{o}rg Flum and Martin Grohe.
\newblock The parameterized complexity of counting problems.
\newblock In {\em Proceedings of the 43rd Symposium on Foundations of Computer
Science}, FOCS '02, page 538, USA, 2002. IEEE Computer Society.
\bibitem{param-comp}
J{\"{o}}rg Flum and Martin Grohe.
\newblock {\em Parameterized Complexity Theory}.
\newblock Texts in Theoretical Computer Science. An {EATCS} Series. Springer,
2006.
\newblock \href {https://doi.org/10.1007/3-540-29953-X}
{\path{doi:10.1007/3-540-29953-X}}.
\bibitem{DBLP:books/daglib/0020812}
Hector Garcia{-}Molina, Jeffrey~D. Ullman, and Jennifer Widom.
\newblock {\em Database systems - the complete book {(2.} ed.)}.
\newblock Pearson Education, 2009.
\bibitem{DBLP:conf/pods/GreenKT07}
Todd~J. Green, Gregory Karvounarakis, and Val Tannen.
\newblock Provenance semirings.
\newblock In {\em PODS}, pages 31--40, 2007.
\bibitem{GT06}
Todd~J Green and Val Tannen.
\newblock Models for incomplete and probabilistic information.
\newblock In {\em EDBT}, pages 278--296. 2006.
\bibitem{Imielinski1989IncompleteII}
T.~Imielinski and W.~Lipski.
\newblock Incomplete information in relational databases.
\newblock 1989.
\bibitem{IL84a}
Tomasz Imieli\'nski and Witold Lipski~Jr.
\newblock Incomplete information in relational databases.
\newblock {\em JACM}, 31(4):761--791, 1984.
\bibitem{jha-12-pdwm}
Abhay~Kumar Jha and Dan Suciu.
\newblock Probabilistic databases with markoviews.
\newblock {\em PVLDB}, 5(11):1160--1171, 2012.
\bibitem{DBLP:journals/jal/KarpLM89}
Richard~M. Karp, Michael Luby, and Neal Madras.
\newblock Monte-carlo approximation algorithms for enumeration problems.
\newblock {\em J. Algorithms}, 10(3):429--448, 1989.
\bibitem{kenig-13-nclexpdc}
Batya Kenig, Avigdor Gal, and Ofer Strichman.
\newblock A new class of lineage expressions over probabilistic databases
computable in p-time.
\newblock In {\em SUM}, volume 8078, pages 219--232, 2013.
\bibitem{DBLP:conf/pods/KhamisNR16}
Mahmoud~Abo Khamis, Hung~Q. Ngo, and Atri Rudra.
\newblock Faq: Questions asked frequently.
\newblock In {\em PODS}, pages 13--28, 2016.
\bibitem{triang-hard}
Tsvi Kopelowitz and Virginia~Vassilevska Williams.
\newblock Towards optimal set-disjointness and set-intersection data
structures.
\newblock In {\em ICALP}, volume 168, pages 74:1--74:16, 2020.
\bibitem{kumari:2016:qdb:communicating}
Poonam Kumari, Said Achmiz, and Oliver Kennedy.
\newblock Communicating data quality in on-demand curation.
\newblock In {\em QDB}, 2016.
\bibitem{ngo-survey}
Hung~Q. Ngo.
\newblock Worst-case optimal join algorithms: Techniques, results, and open
problems.
\newblock In {\em PODS}, 2018.
\bibitem{skew}
Hung~Q. Ngo, Christopher Ré, and Atri Rudra.
\newblock Skew strikes back: new developments in the theory of join algorithms.
\newblock {\em SIGMOD Rec.}, 42(4):5--16, 2013.
\bibitem{DBLP:conf/icde/OlteanuHK10}
Dan Olteanu, Jiewen Huang, and Christoph Koch.
\newblock Approximate confidence computation in probabilistic databases.
\newblock In {\em ICDE}, pages 145--156, 2010.
\bibitem{factorized-db}
Dan Olteanu and Maximilian Schleich.
\newblock Factorized databases.
\newblock {\em SIGMOD Rec.}, 45(2):5--16, 2016.
\bibitem{DBLP:journals/pvldb/RekatsinasCIR17}
Theodoros Rekatsinas, Xu~Chu, Ihab~F. Ilyas, and Christopher R{\'{e}}.
\newblock Holoclean: Holistic data repairs with probabilistic inference.
\newblock {\em Proc. {VLDB} Endow.}, 10(11):1190--1201, 2017.
\bibitem{roy-11-f}
Sudeepa Roy, Vittorio Perduca, and Val Tannen.
\newblock Faster query answering in probabilistic databases using read-once
functions.
\newblock In {\em ICDT}, 2011.
\bibitem{RS07}
C.~Ré and D.~Suciu.
\newblock Materialized views in probabilistic databases: for information
exchange and query optimization.
\newblock In {\em VLDB}, pages 51--62, 2007.
\bibitem{DBLP:journals/vldb/SaRR0W0Z17}
Christopher~De Sa, Alexander Ratner, Christopher R{\'{e}}, Jaeho Shin, Feiran
Wang, Sen Wu, and Ce~Zhang.
\newblock Incremental knowledge base construction using deepdive.
\newblock {\em {VLDB} J.}, 26(1):81--105, 2017.
\bibitem{sen-10-ronfqevpd}
Prithviraj Sen, Amol Deshpande, and Lise Getoor.
\newblock Read-once functions and query evaluation in probabilistic databases.
\newblock {\em PVLDB}, 3(1):1068--1079, 2010.
\bibitem{S18a}
Pierre Senellart.
\newblock Provenance and probabilities in relational databases.
\newblock {\em SIGMOD Record}, 46(4):5--15, 2018.
\bibitem{valiant-79-cenrp}
Leslie~G. Valiant.
\newblock The complexity of enumeration and reliability problems.
\newblock {\em SIAM J. Comput.}, 8(3):410--421, 1979.
\bibitem{VS17}
Guy Van~den Broeck and Dan Suciu.
\newblock Query processing on probabilistic data: A survey.
\newblock 2017.
\bibitem{virgi-survey}
Virginia~Vassilevska Williams.
\newblock Some open problems in fine-grained complexity.
\newblock {\em {SIGACT} News}, 49(4):29--35, 2018.
\newblock \href {https://doi.org/10.1145/3300150.3300158}
{\path{doi:10.1145/3300150.3300158}}.
\bibitem{yang:2015:pvldb:lenses}
Ying Yang, Niccolò Meneghetti, Ronny Fehling, Zhen~Hua Liu, Dieter Gawlick,
and Oliver Kennedy.
\newblock Lenses: An on-demand approach to etl.
\newblock {\em PVLDB}, 8(12):1578--1589, 2015.
\end{thebibliography}

802
arXiv/main.bib Normal file
View File

@ -0,0 +1,802 @@
@article{CHEN20061346,
title = {Strong computational lower bounds via parameterized complexity},
journal = {Journal of Computer and System Sciences},
volume = {72},
number = {8},
pages = {1346-1367},
year = {2006},
issn = {0022-0000},
doi = {https://doi.org/10.1016/j.jcss.2006.04.007},
url = {https://www.sciencedirect.com/science/article/pii/S0022000006000675},
author = {Jianer Chen and Xiuzhen Huang and Iyad A. Kanj and Ge Xia},
keywords = {Parameterized computation, Computational complexity, Lower bound, Clique, Polynomial time approximation scheme},
abstract = {We develop new techniques for deriving strong computational lower bounds for a class of well-known NP-hard problems. This class includes weighted satisfiability, dominating set, hitting set, set cover, clique, and independent set. For example, although a trivial enumeration can easily test in time O(nk) if a given graph of n vertices has a clique of size k, we prove that unless an unlikely collapse occurs in parameterized complexity theory, the problem is not solvable in time f(k)no(k) for any function f, even if we restrict the parameter values to be bounded by an arbitrarily small function of n. Under the same assumption, we prove that even if we restrict the parameter values k to be of the order Θ(μ(n)) for any reasonable function μ, no algorithm of running time no(k) can test if a graph of n vertices has a clique of size k. Similar strong lower bounds on the computational complexity are also derived for other NP-hard problems in the above class. Our techniques can be further extended to derive computational lower bounds on polynomial time approximation schemes for NP-hard optimization problems. For example, we prove that the NP-hard distinguishing substring selection problem, for which a polynomial time approximation scheme has been recently developed, has no polynomial time approximation schemes of running time f(1/ϵ)no(1/ϵ) for any function f unless an unlikely collapse occurs in parameterized complexity theory.}
}
@inproceedings{10.5555/645413.652181,
author = {Flum, J\"{o}rg and Grohe, Martin},
title = {The Parameterized Complexity of Counting Problems},
year = {2002},
isbn = {0769518222},
publisher = {IEEE Computer Society},
address = {USA},
abstract = {We develop a parameterized complexity theory for counting problems. As the basis of this theory, we introduce a hierarchy of parameterized counting complexity classes #W[t], for t geqslant 1 , that corresponds to Downey and Fellows's W-hierarchy [12] and show that a few central W-completeness results for decision problems translate to #W-completeness results for the corresponding counting problems.Counting complexity gets interesting with problems whose decision version is tractable, but whose counting version is hard. Our main result states that counting cycles and paths of length k in both directed and undirected graphs, parameterized by k , is#W[1]-complete. This makes it highly unlikely that any of these problems is fixed-parameter tractable, even though their decision versions are fixed-parameter tractable. More explicitly, our result shows that most likely there is no f(k) cdot n^c-algorithm for counting cycles or paths of length k in a graph of size n for any computable function f: mathbb{N} to mathbb{N} and constant c , even though there is a 2^{0(k)}cdot n^{2.376}algorithm for finding a cycle or path of length k [2].},
booktitle = {Proceedings of the 43rd Symposium on Foundations of Computer Science},
pages = {538},
series = {FOCS '02}
}
misc{pdbench,
howpublished = {r̆lhttp://pdbench.sourceforge.net/},
note = {Accessed: 2020-12-15},
title = {pdbench}
}
@article{AF18,
author = {Arab, Bahareh and Feng, Su and Glavic, Boris and Lee, Seokki and Niu, Xing and Zeng, Qitian},
journal = {IEEE Data Eng. Bull.},
number = {1},
pages = {51--62},
title = {GProM - A Swiss Army Knife for Your Provenance Needs},
volume = {41},
year = {2018}
}
@inproceedings{Imielinski1989IncompleteII,
title={Incomplete Information in Relational Databases},
author={T. Imielinski and W. Lipski},
year={1989}
}
@inproceedings{10.1145/1265530.1265571,
author = {Dalvi, Nilesh and Suciu, Dan},
booktitle = {PODS},
numpages = {10},
pages = {293--302},
title = {The Dichotomy of Conjunctive Queries on Probabilistic Structures},
year = {2007}
}
@inproceedings{DBLP:conf/icde/OlteanuHK10,
author = {Dan Olteanu and
Jiewen Huang and
Christoph Koch},
booktitle = {ICDE},
pages = {145--156},
title = {Approximate confidence computation in probabilistic databases},
year = {2010}
}
@book{DBLP:series/synthesis/2011Suciu,
author = {Dan Suciu and
Dan Olteanu and
Christopher Ré and
Christoph Koch},
publisher = {Morgan \& Claypool Publishers},
title = {Probabilistic Databases},
year = {2011}
}
@inproceedings{feng:2019:sigmod:uncertainty,
author = {Feng, Su and Huber, Aaron and Glavic, Boris and Kennedy, Oliver},
booktitle = {SIGMOD},
title = {Uncertainty Annotated Databases - A Lightweight Approach for Approximating Certain Answers},
year = {2019}
}
@article{FH12,
author = {Fink, Robert and Han, Larisa and Olteanu, Dan},
journal = {PVLDB},
number = {5},
pages = {490--501},
title = {Aggregation in probabilistic databases via knowledge compilation},
volume = {5},
year = {2012}
}
@inproceedings{DBLP:conf/tapp/Zavodny11,
author = {Jakub Závodný},
booktitle = {TaPP},
title = {On Factorisation of Provenance Polynomials},
year = {2011}
}
@inproceedings{kennedy:2010:icde:pip,
author = {Kennedy, Oliver and Koch, Christoph},
booktitle = {ICDE},
title = {PIP: A Database System for Great and Small Expectations},
year = {2010}
}
@inproceedings{DBLP:conf/icde/AntovaKO07a,
author = {Lyublena Antova and
Christoph Koch and
Dan Olteanu},
booktitle = {ICDE},
title = {MayBMS: Managing Incomplete Information with Probabilistic World-Set
Decompositions},
year = {2007}
}
@misc{Antova_fastand,
author = {Lyublena Antova and Thomas Jansen and Christoph Koch and Dan Olteanu},
title = {Fast and Simple Relational Processing of Uncertain Data},
year = {}
}
@inproceedings{DBLP:conf/pods/KhamisNR16,
author = {Mahmoud Abo Khamis and
Hung Q. Ngo and
Atri Rudra},
booktitle = {PODS},
pages = {13--28},
title = {FAQ: Questions Asked Frequently},
year = {2016}
}
@article{DBLP:journals/sigmod/GuagliardoL17,
author = {Paolo Guagliardo and
Leonid Libkin},
journal = {SIGMOD Rec.},
number = {3},
pages = {5--16},
title = {Correctness of SQL Queries on Databases with Nulls},
volume = {46},
year = {2017}
}
@inproceedings{DBLP:conf/vldb/AgrawalBSHNSW06,
author = {Parag Agrawal and
Omar Benjelloun and
Anish Das Sarma and
Chris Hayworth and
Shubha U. Nabar and
Tomoe Sugihara and
Jennifer Widom},
booktitle = {VLDB},
pages = {1151--1154},
title = {Trio: A System for Data, Uncertainty, and Lineage},
year = {2006}
}
@inproceedings{k-match,
author = {Radu Curticapean},
booktitle = {ICALP},
pages = {352--363},
title = {Counting Matchings of Size k Is W[1]-Hard},
volume = {7965},
year = {2013}
}
@inproceedings{DBLP:conf/sigmod/SinghMMPHS08,
author = {Sarvjeet Singh and
Chris Mayfield and
Sagar Mittal and
Sunil Prabhakar and
Susanne E. Hambrusch and
Rahul Shah},
booktitle = {SIGMOD},
pages = {1239--1242},
title = {Orion 2.0: native support for uncertain data},
year = {2008}
}
@inproceedings{DBLP:conf/pods/GreenKT07,
author = {Todd J. Green and
Gregory Karvounarakis and
Val Tannen},
booktitle = {PODS},
pages = {31--40},
title = {Provenance semirings},
year = {2007}
}
@inproceedings{ngo-survey,
author = {Hung Q. Ngo},
booktitle = {PODS},
title = {Worst-Case Optimal Join Algorithms: Techniques, Results, and Open
Problems},
year = {2018}
}
@article{skew,
author = {Hung Q. Ngo and
Christopher Ré and
Atri Rudra},
journal = {SIGMOD Rec.},
number = {4},
pages = {5--16},
title = {Skew strikes back: new developments in the theory of join algorithms},
volume = {42},
year = {2013}
}
@article{NPRR,
author = {Hung Q. Ngo and
Ely Porat and
Christopher Ré and
Atri Rudra},
journal = {J. ACM},
number = {3},
pages = {16:1--16:40},
title = {Worst-case Optimal Join Algorithms},
volume = {65},
year = {2018}
}
@book{arith-complexity,
author = {Peter Bürgisser and
Michael Clausen and
Mohammad Amin Shokrollahi},
publisher = {Springer},
title = {Algebraic complexity theory},
volume = {315},
year = {1997}
}
@inproceedings{triang-hard,
author = {Tsvi Kopelowitz and
Virginia Vassilevska Williams},
booktitle = {ICALP},
pages = {74:1--74:16},
title = {Towards Optimal Set-Disjointness and Set-Intersection Data Structures},
volume = {168},
year = {2020}
}
@article{LL97,
author = {Lakshmanan, L.V.S. and Leone, N. and Ross, R. and Subrahmanian, VS},
journal = {TODS},
number = {3},
pages = {419--469},
title = {Probview: A flexible probabilistic database system},
volume = {22},
year = {1997}
}
@article{jha-13-kcmdt,
author = {Jha, Abhay and Suciu, Dan},
title = {Knowledge Compilation Meets Database Theory: Compiling Queries
To Decision Diagrams},
journal = {Theory of Computing Systems},
volume = 52,
number = 3,
pages = {403--440},
year = 2013,
publisher = {Springer},
}
@inproceedings{BS06,
author = {Omar Benjelloun and Anish Das Sarma and Alon Y. Halevy and Jennifer Widom},
booktitle = {VLDB},
pages = {953--964},
title = {ULDBs: Databases with Uncertainty and Lineage},
year = {2006}
}
@conference{RS07,
author = {Ré, C. and Suciu, D.},
booktitle = {VLDB},
pages = {51--62},
title = {Materialized views in probabilistic databases: for information exchange and query optimization},
year = {2007}
}
@article{VS17,
Author = {Van den Broeck, Guy and Suciu, Dan},
Title = {Query Processing on Probabilistic Data: A Survey},
Year = {2017},
}
@incollection{GT06,
author = {Green, Todd J and Tannen, Val},
booktitle = {EDBT},
pages = {278--296},
title = {Models for incomplete and probabilistic information},
year = {2006}
}
@article{IL84a,
author = {Imieli\'nski, Tomasz and Lipski Jr, Witold},
journal = {JACM},
number = {4},
pages = {761--791},
title = {Incomplete Information in Relational Databases},
volume = {31},
year = {1984}
}
@article{DS12,
author = {Dalvi, Nilesh and Suciu, Dan},
journal = {JACM},
number = {6},
pages = {30},
title = {The dichotomy of probabilistic inference for unions of conjunctive queries},
volume = {59},
year = {2012}
}
@inproceedings{heuvel-19-anappdsd,
author = {Maarten Van den Heuvel and Peter Ivanov and Wolfgang Gatterbauer and Floris Geerts and Martin Theobald},
booktitle = {SIGMOD},
pages = {1295--1312},
title = {Anytime Approximation in Probabilistic Databases via Scaled Dissociations},
year = {2019}
}
@article{AB15,
author = {Amarilli, Antoine and Bourhis, Pierre and Senellart, Pierre},
journal = {PODS},
title = {Probabilities and provenance via tree decompositions},
year = {2015}
}
@inproceedings{OH09a,
author = {Olteanu, Dan and Huang, Jiewen},
booktitle = {SIGMOD},
pages = {389--402},
title = {Secondary-storage confidence computation for conjunctive queries with inequalities},
year = {2009}
}
@article{FO16,
author = {Robert Fink and Dan Olteanu},
journal = {TODS},
number = {1},
pages = {4:1--4:47},
title = {Dichotomies for Queries with Negation in Probabilistic Databases},
volume = {41},
year = {2016}
}
@article{FH13,
author = {Robert Fink and Jiewen Huang and Dan Olteanu},
journal = {VLDBJ},
number = {6},
pages = {823--848},
title = {Anytime approximation in probabilistic databases},
volume = {22},
year = {2013}
}
@inproceedings{AB15c,
author = {Antoine Amarilli and Pierre Bourhis and Pierre Senellart},
booktitle = {ICALP},
pages = {56--68},
title = {Provenance Circuits for Trees and Treelike Instances},
year = {2015}
}
@inproceedings{kenig-13-nclexpdc,
author = {Batya Kenig and Avigdor Gal and Ofer Strichman},
booktitle = {SUM},
pages = {219--232},
title = {A New Class of Lineage Expressions over Probabilistic Databases Computable in P-Time},
volume = {8078},
year = {2013}
}
@inproceedings{cavallo-87-tpd,
author = {Roger Cavallo and Michael Pittarelli},
booktitle = {VLDB},
pages = {71--81},
title = {The Theory of Probabilistic Databases},
year = {1987}
}
@inproceedings{roy-11-f,
author = {Sudeepa Roy and Vittorio Perduca and Val Tannen},
booktitle = {ICDT},
title = {Faster query answering in probabilistic databases using read-once functions},
year = {2011}
}
@article{sen-10-ronfqevpd,
author = {Prithviraj Sen and Amol Deshpande and Lise Getoor},
journal = {PVLDB},
number = {1},
pages = {1068--1079},
title = {Read-Once Functions and Query Evaluation in Probabilistic Databases},
volume = {3},
year = {2010}
}
@article{provan-83-ccccptg,
author = {J. Scott Provan and Michael O. Ball},
journal = {SIAM J. Comput.},
number = {4},
pages = {777--788},
title = {The Complexity of Counting Cuts and of Computing the Probability That a Graph Is Connected},
volume = {12},
year = {1983}
}
@article{valiant-79-cenrp,
author = {Leslie G. Valiant},
journal = {SIAM J. Comput.},
number = {3},
pages = {410--421},
title = {The Complexity of Enumeration and Reliability Problems},
volume = {8},
year = {1979}
}
@inproceedings{AD11d,
author = {Amsterdamer, Yael and Deutch, Daniel and Tannen, Val},
booktitle = {PODS},
pages = {153--164},
title = {Provenance for Aggregate Queries},
year = {2011}
}
@article{S18a,
author = {Senellart, Pierre},
journal = {SIGMOD Record},
number = {4},
pages = {5--15},
title = {Provenance and Probabilities in Relational Databases},
volume = {46},
year = {2018}
}
@article{RS09b,
author = {Christopher Ré and Dan Suciu},
journal = {VLDBJ},
number = {5},
pages = {1091--1116},
title = {The trichotomy of HAVING queries on a probabilistic database},
volume = {18},
year = {2009}
}
@article{gatterbauer-17-dpaplinws,
author = {Wolfgang Gatterbauer and Dan Suciu},
title = {Dissociation and Propagation for Approximate Lifted Inference
With Standard Relational Database Management Systems},
journal = {{VLDB} J.},
volume = 26,
number = 1,
pages = {5--30},
year = 2017
}
@inproceedings{fink-11,
author = {Robert Fink and Dan Olteanu},
booktitle = {ICDT},
pages = {174--185},
title = {On the optimal approximation of queries using tractable propositional languages},
year = {2011}
}
@article{jha-12-pdwm,
author = {Abhay Kumar Jha and Dan Suciu},
journal = {PVLDB},
number = {11},
pages = {1160--1171},
title = {Probabilistic Databases With Markoviews},
volume = {5},
year = {2012}
}
@conference{BD05,
author = {Boulos, J. and Dalvi, N. and Mandhani, B. and Mathur, S. and Re, C. and Suciu, D.},
booktitle = {SIGMOD},
title = {MYSTIQ: a system for finding more answers by using probabilities},
year = {2005}
}
@article{DS07,
author = {Dalvi, N. and Suciu, D.},
journal = {VLDB},
number = {4},
pages = {544},
title = {Efficient query evaluation on probabilistic databases},
volume = {16},
year = {2007}
}
@inproceedings{re-07-eftqevpd,
author = {Christopher Ré and Nilesh N. Dalvi and Dan Suciu},
booktitle = {ICDE},
pages = {886--895},
title = {Efficient Top-k Query Evaluation on Probabilistic Data},
year = {2007}
}
@inproceedings{DM14c,
author = {Deutch, Daniel and Milo, Tova and Roy, Sudeepa and Tannen, Val},
booktitle = {ICDT},
pages = {201--212},
title = {Circuits for Datalog Provenance},
year = {2014}
}
@inproceedings{bahar-93-al,
author = {R. Iris Bahar and Erica A. Frohm and Charles M. Gaona and Gary
D. Hachtel and Enrico Macii and Abelardo Pardo and Fabio
Somenzi},
booktitle = {IEEE CAD},
title = {Algebraic decision diagrams and their applications},
year = {1993}
}
@inproceedings{gogate-13-smp,
author = {Vibhav Gogate and Pedro M. Domingos},
booktitle = {UAI},
title = {Structured Message Passing},
year = {2013}
}
@article{chen-10-cswssr,
author = {Hubie Chen and Martin Grohe},
journal = {J. Comput. Syst. Sci.},
number = {8},
pages = {847--860},
title = {Constraint Satisfaction With Succinctly Specified Relations},
volume = {76},
year = {2010}
}
@inproceedings{GL16,
author = {Paolo Guagliardo and
Leonid Libkin},
booktitle = {PODS},
title = {Making SQL Queries Correct on Incomplete Databases: A Feasibility
Study},
year = {2016}
}
@inproceedings{jampani2008mcdb,
author = {Jampani, Ravi and Xu, Fei and Wu, Mingxi and Perez, Luis Leopoldo and Jermaine, Christopher and Haas, Peter J},
booktitle = {SIGMOD},
title = {MCDB: a monte carlo approach to managing uncertain data},
year = {2008}
}
@article{yang:2015:pvldb:lenses,
author = {Yang, Ying and Meneghetti, Niccolò and Fehling, Ronny and Liu, Zhen Hua and Gawlick, Dieter and Kennedy, Oliver},
title = {Lenses: An On-Demand Approach to ETL},
journal = {PVLDB},
volume = {8},
number = {12},
year = {2015},
pages = {1578--1589}
}
@misc{pdbench,
title = {pdbench},
howpublished = {\url{http://pdbench.sourceforge.net/}},
note = {Accessed: 2020-12-15}
}
@article{factorized-db,
author = {Dan Olteanu and
Maximilian Schleich},
journal = {SIGMOD Rec.},
number = {2},
pages = {5--16},
title = {Factorized Databases},
volume = {45},
year = {2016}
}
@article{virgi-survey,
author = {Virginia Vassilevska Williams},
title = {Some Open Problems in Fine-Grained Complexity},
journal = {{SIGACT} News},
volume = {49},
number = {4},
pages = {29--35},
year = {2018},
url = {https://doi.org/10.1145/3300150.3300158},
doi = {10.1145/3300150.3300158},
timestamp = {Tue, 18 Dec 2018 15:19:27 +0100},
biburl = {https://dblp.org/rec/journals/sigact/Williams18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@book{param-comp,
author = {J{\"{o}}rg Flum and
Martin Grohe},
title = {Parameterized Complexity Theory},
series = {Texts in Theoretical Computer Science. An {EATCS} Series},
publisher = {Springer},
year = {2006},
url = {https://doi.org/10.1007/3-540-29953-X},
doi = {10.1007/3-540-29953-X},
isbn = {978-3-540-29952-3},
timestamp = {Tue, 16 May 2017 14:24:38 +0200},
biburl = {https://dblp.org/rec/series/txtcs/FlumG06.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@book{DBLP:books/daglib/0020812,
author = {Hector Garcia{-}Molina and
Jeffrey D. Ullman and
Jennifer Widom},
title = {Database systems - the complete book {(2.} ed.)},
publisher = {Pearson Education},
year = {2009}
}
@article{DBLP:journals/jal/KarpLM89,
author = {Richard M. Karp and
Michael Luby and
Neal Madras},
title = {Monte-Carlo Approximation Algorithms for Enumeration Problems},
journal = {J. Algorithms},
volume = {10},
number = {3},
pages = {429--448},
year = {1989}
}
@inproceedings{ajar,
author = {Manas R. Joglekar and
Rohan Puttagunta and
Christopher R{\'{e}}},
editor = {Tova Milo and
Wang{-}Chiew Tan},
title = {{AJAR:} Aggregations and Joins over Annotated Relations},
booktitle = {Proceedings of the 35th {ACM} {SIGMOD-SIGACT-SIGAI} Symposium on Principles
of Database Systems, {PODS} 2016, San Francisco, CA, USA, June 26
- July 01, 2016},
pages = {91--106},
publisher = {{ACM}},
year = {2016},
url = {https://doi.org/10.1145/2902251.2902293},
doi = {10.1145/2902251.2902293},
timestamp = {Tue, 06 Nov 2018 16:58:02 +0100},
biburl = {https://dblp.org/rec/conf/pods/JoglekarPR16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@book{graetzer-08-un,
author = {Gr{\"a}tzer, George},
title = {Universal algebra},
year = 2008,
publisher = {Springer Science \& Business Media}
}
@article{AGM,
author = {Albert Atserias and
Martin Grohe and
D{\'{a}}niel Marx},
title = {Size Bounds and Query Plans for Relational Joins},
journal = {{SIAM} J. Comput.},
volume = {42},
number = {4},
pages = {1737--1767},
year = {2013},
url = {https://doi.org/10.1137/110859440},
doi = {10.1137/110859440},
timestamp = {Thu, 08 Jun 2017 08:59:24 +0200},
biburl = {https://dblp.org/rec/journals/siamcomp/AtseriasGM13.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/vldb/SaRR0W0Z17,
author = {Christopher De Sa and
Alexander Ratner and
Christopher R{\'{e}} and
Jaeho Shin and
Feiran Wang and
Sen Wu and
Ce Zhang},
title = {Incremental knowledge base construction using DeepDive},
journal = {{VLDB} J.},
volume = {26},
number = {1},
pages = {81--105},
year = {2017}
}
@article{DBLP:journals/pvldb/RekatsinasCIR17,
author = {Theodoros Rekatsinas and
Xu Chu and
Ihab F. Ilyas and
Christopher R{\'{e}}},
title = {HoloClean: Holistic Data Repairs with Probabilistic Inference},
journal = {Proc. {VLDB} Endow.},
volume = {10},
number = {11},
pages = {1190--1201},
year = {2017}
}
@article{DBLP:journals/pvldb/BeskalesIG10,
author = {George Beskales and
Ihab F. Ilyas and
Lukasz Golab},
title = {Sampling the Repairs of Functional Dependency Violations under Hard
Constraints},
journal = {Proc. {VLDB} Endow.},
volume = {3},
number = {1},
pages = {197--207},
year = {2010}
}
@article{DBLP:journals/tods/OlteanuS16,
author = {Dan Olteanu and
Sebastiaan J. van Schaik},
title = {ENFrame: {A} Framework for Processing Probabilistic Data},
journal = {{ACM} Trans. Database Syst.},
volume = {41},
number = {1},
pages = {3:1--3:44},
year = {2016}
}
@inproceedings{DBLP:conf/sigmod/GaoLPJ17,
author = {Zekai J. Gao and
Shangyu Luo and
Luis Leopoldo Perez and
Chris Jermaine},
title = {The {BUDS} Language for Distributed Bayesian Machine Learning},
booktitle = {{SIGMOD} Conference},
pages = {961--976},
publisher = {{ACM}},
year = {2017}
}
@inproceedings{DBLP:conf/sigmod/CaiVPAHJ13,
author = {Zhuhua Cai and
Zografoula Vagena and
Luis Leopoldo Perez and
Subramanian Arumugam and
Peter J. Haas and
Christopher M. Jermaine},
title = {Simulation of database-valued markov chains using SimSQL},
booktitle = {{SIGMOD} Conference},
pages = {637--648},
publisher = {{ACM}},
year = {2013}
}
@inproceedings{kumari:2016:qdb:communicating,
author = {Kumari, Poonam and Achmiz, Said and Kennedy, Oliver},
title = {Communicating Data Quality in On-Demand Curation},
booktitle = {QDB},
year = {2016}
}
@inproceedings{feng:2021:sigmod:efficient,
author = {Feng, Su and Glavic, Boris and Huber, Aaron and Kennedy, Oliver},
title = {Efficient Uncertainty Tracking for Complex Queries with Attribute-level Bounds},
booktitle = {SIGMOD},
year = {2021}
}

56
arXiv/main.blg Normal file
View File

@ -0,0 +1,56 @@
This is BibTeX, Version 0.99d (TeX Live 2020/W32TeX)
Capacity: max_strings=200000, hash_size=200000, hash_prime=170003
The top-level auxiliary file: main.aux
The style file: plainurl.bst
Reallocated glb_str_ptr (elt_size=4) to 20 items from 10.
Reallocated global_strs (elt_size=200001) to 20 items from 10.
Reallocated glb_str_end (elt_size=4) to 20 items from 10.
Reallocated singl_function (elt_size=4) to 100 items from 50.
Reallocated singl_function (elt_size=4) to 100 items from 50.
Database file #1: main.bib
Warning--to sort, need author or key in pdbench
Warning--empty publisher in GT06
Warning--empty booktitle in Imielinski1989IncompleteII
Warning--empty journal in VS17
You've used 49 entries,
2882 wiz_defined-function locations,
907 strings with 12153 characters,
and the built_in function-call counts, 23733 in all, are:
= -- 2415
> -- 922
< -- 18
+ -- 338
- -- 284
* -- 1338
:= -- 3300
add.period$ -- 153
call.type$ -- 49
change.case$ -- 286
chr.to.int$ -- 0
cite$ -- 53
duplicate$ -- 721
empty$ -- 2246
format.name$ -- 284
if$ -- 6122
int.to.chr$ -- 0
int.to.str$ -- 49
missing$ -- 50
newline$ -- 251
num.names$ -- 96
pop$ -- 575
preamble$ -- 1
purify$ -- 240
quote$ -- 0
skip$ -- 1600
stack$ -- 0
substring$ -- 1164
swap$ -- 202
text.length$ -- 18
text.prefix$ -- 0
top$ -- 0
type$ -- 190
warning$ -- 4
while$ -- 175
width$ -- 51
write$ -- 538
(There were 4 warnings)

2543
arXiv/main.log Normal file

File diff suppressed because it is too large Load Diff

67
arXiv/main.out Normal file
View File

@ -0,0 +1,67 @@
\BOOKMARK [1][-]{section.1}{\376\377\0001\000\040\000I\000n\000t\000r\000o\000d\000u\000c\000t\000i\000o\000n}{}% 1
\BOOKMARK [2][-]{subsection.1.1}{\376\377\0001\000.\0001\000\040\000P\000o\000l\000y\000n\000o\000m\000i\000a\000l\000\040\000E\000q\000u\000i\000v\000a\000l\000e\000n\000c\000e}{section.1}% 2
\BOOKMARK [2][-]{subsection.1.2}{\376\377\0001\000.\0002\000\040\000O\000u\000r\000\040\000T\000e\000c\000h\000n\000i\000q\000u\000e\000s}{section.1}% 3
\BOOKMARK [1][-]{section.2}{\376\377\0002\000\040\000B\000a\000c\000k\000g\000r\000o\000u\000n\000d\000\040\000a\000n\000d\000\040\000N\000o\000t\000a\000t\000i\000o\000n}{}% 4
\BOOKMARK [2][-]{subsection.2.1}{\376\377\0002\000.\0001\000\040\000P\000o\000l\000y\000n\000o\000m\000i\000a\000l\000\040\000D\000e\000f\000i\000n\000i\000t\000i\000o\000n\000\040\000a\000n\000d\000\040\000T\000e\000r\000m\000i\000n\000o\000l\000o\000g\000y}{section.2}% 5
\BOOKMARK [2][-]{subsection.2.2}{\376\377\0002\000.\0002\000\040\000B\000i\000n\000a\000r\000y\000-\000B\000I\000D\000B}{section.2}% 6
\BOOKMARK [3][-]{subsubsection.2.2.1}{\376\377\0002\000.\0002\000.\0001\000\040\000P\000o\000s\000s\000i\000b\000l\000e\000\040\000W\000o\000r\000l\000d\000\040\000S\000e\000m\000a\000n\000t\000i\000c\000s}{subsection.2.2}% 7
\BOOKMARK [2][-]{subsection.2.3}{\376\377\0002\000.\0003\000\040\000F\000o\000r\000m\000a\000l\000i\000z\000i\000n\000g\000\040\000p\000r\000o\000b\000:\000i\000n\000t\000r\000o\000-\000s\000t\000m\000t}{section.2}% 8
\BOOKMARK [2][-]{subsection.2.4}{\376\377\0002\000.\0004\000\040\000R\000e\000l\000a\000t\000i\000o\000n\000s\000h\000i\000p\000\040\000t\000o\000\040\000D\000e\000t\000e\000r\000m\000i\000n\000i\000s\000t\000i\000c\000\040\000Q\000u\000e\000r\000y\000\040\000R\000u\000n\000t\000i\000m\000e\000s}{section.2}% 9
\BOOKMARK [1][-]{section.3}{\376\377\0003\000\040\000H\000a\000r\000d\000n\000e\000s\000s\000\040\000o\000f\000\040\000E\000x\000a\000c\000t\000\040\000C\000o\000m\000p\000u\000t\000a\000t\000i\000o\000n}{}% 10
\BOOKMARK [2][-]{subsection.3.1}{\376\377\0003\000.\0001\000\040\000P\000r\000e\000l\000i\000m\000i\000n\000a\000r\000i\000e\000s}{section.3}% 11
\BOOKMARK [2][-]{subsection.3.2}{\376\377\0003\000.\0002\000\040\000M\000u\000l\000t\000i\000p\000l\000e\000\040\000D\000i\000s\000t\000i\000n\000c\000t\000\040\000p\000\040\000V\000a\000l\000u\000e\000s}{section.3}% 12
\BOOKMARK [2][-]{subsection.3.3}{\376\377\0003\000.\0003\000\040\000S\000i\000n\000g\000l\000e\000\040\000p\000\040\000v\000a\000l\000u\000e}{section.3}% 13
\BOOKMARK [1][-]{section.4}{\376\377\0004\000\040\0001\000\040\000\040\000A\000p\000p\000r\000o\000x\000i\000m\000a\000t\000i\000o\000n\000\040\000A\000l\000g\000o\000r\000i\000t\000h\000m}{}% 14
\BOOKMARK [2][-]{subsection.4.1}{\376\377\0004\000.\0001\000\040\000P\000r\000e\000l\000i\000m\000i\000n\000a\000r\000i\000e\000s\000\040\000a\000n\000d\000\040\000s\000o\000m\000e\000\040\000m\000o\000r\000e\000\040\000n\000o\000t\000a\000t\000i\000o\000n}{section.4}% 15
\BOOKMARK [2][-]{subsection.4.2}{\376\377\0004\000.\0002\000\040\000O\000u\000r\000\040\000m\000a\000i\000n\000\040\000r\000e\000s\000u\000l\000t}{section.4}% 16
\BOOKMARK [1][-]{section.5}{\376\377\0005\000\040\000R\000e\000l\000a\000t\000e\000d\000\040\000W\000o\000r\000k}{}% 17
\BOOKMARK [1][-]{section.6}{\376\377\0006\000\040\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000s\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000W\000o\000r\000k}{}% 18
\BOOKMARK [1][-]{section.7}{\376\377\0007\000\040\000A\000c\000k\000n\000o\000w\000l\000e\000d\000g\000e\000m\000e\000n\000t\000s}{}% 19
\BOOKMARK [1][-]{appendix.A}{\376\377\000A\000\040\000G\000e\000n\000e\000r\000a\000l\000i\000z\000i\000n\000g\000\040\000B\000e\000y\000o\000n\000d\000\040\000S\000e\000t\000\040\000I\000n\000p\000u\000t\000s}{}% 20
\BOOKMARK [2][-]{subsection.A.1}{\376\377\000A\000.\0001\000\040\000T\000I\000D\000B\000s}{appendix.A}% 21
\BOOKMARK [2][-]{subsection.A.2}{\376\377\000A\000.\0002\000\040\000B\000I\000D\000B\000s}{appendix.A}% 22
\BOOKMARK [1][-]{appendix.B}{\376\377\000B\000\040\000M\000i\000s\000s\000i\000n\000g\000\040\000d\000e\000t\000a\000i\000l\000s\000\040\000f\000r\000o\000m\000\040\000S\000e\000c\000t\000i\000o\000n\000\040\0002}{}% 23
\BOOKMARK [2][-]{subsection.B.1}{\376\377\000B\000.\0001\000\040\000K\000-\000r\000e\000l\000a\000t\000i\000o\000n\000s\000\040\000a\000n\000d\000\040\000N\000[\000X\000]\000-\000e\000n\000c\000o\000d\000e\000d\000\040\000P\000D\000B\000\040\000s}{appendix.B}% 24
\BOOKMARK [2][-]{subsection.B.2}{\376\377\000B\000.\0002\000\040\000T\000I\000D\000B\000s\000\040\000a\000n\000d\000\040\000B\000I\000D\000B\000s\000\040\000i\000n\000\040\000t\000h\000e\000\040\000N\000[\000X\000]\000-\000e\000n\000c\000o\000d\000e\000d\000\040\000P\000D\000B\000\040\000m\000o\000d\000e\000l}{appendix.B}% 25
\BOOKMARK [2][-]{subsection.B.3}{\376\377\000B\000.\0003\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000p\000r\000o\000p\000:\000e\000x\000p\000e\000c\000t\000i\000o\000n\000-\000o\000f\000-\000p\000o\000l\000y\000n\000o\000m}{appendix.B}% 26
\BOOKMARK [2][-]{subsection.B.4}{\376\377\000B\000.\0004\000\040\000P\000r\000o\000p\000o\000s\000i\000t\000i\000o\000n\000\040\000B\000.\0004}{appendix.B}% 27
\BOOKMARK [2][-]{subsection.B.5}{\376\377\000B\000.\0005\000\040\000P\000r\000o\000o\000f\000\040\000f\000o\000r\000\040\000L\000e\000m\000m\000a\000\040\0001\000.\0004}{appendix.B}% 28
\BOOKMARK [2][-]{subsection.B.6}{\376\377\000B\000.\0006\000\040\000P\000r\000o\000o\000f\000\040\000F\000o\000r\000\040\000C\000o\000r\000o\000l\000l\000a\000r\000y\000\040\0002\000.\0007}{appendix.B}% 29
\BOOKMARK [1][-]{appendix.C}{\376\377\000C\000\040\000M\000i\000s\000s\000i\000n\000g\000\040\000d\000e\000t\000a\000i\000l\000s\000\040\000f\000r\000o\000m\000\040\000S\000e\000c\000t\000i\000o\000n\000\040\0003}{}% 30
\BOOKMARK [2][-]{subsection.C.1}{\376\377\000C\000.\0001\000\040\000l\000e\000m\000:\000p\000d\000b\000-\000f\000o\000r\000-\000d\000e\000f\000-\000q\000k}{appendix.C}% 31
\BOOKMARK [2][-]{subsection.C.2}{\376\377\000C\000.\0002\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000l\000e\000m\000:\000t\000d\000e\000t\000-\000o\000m}{appendix.C}% 32
\BOOKMARK [2][-]{subsection.C.3}{\376\377\000C\000.\0003\000\040\000l\000e\000m\000:\000q\000E\000k\000-\000m\000u\000l\000t\000i\000-\000p}{appendix.C}% 33
\BOOKMARK [2][-]{subsection.C.4}{\376\377\000C\000.\0004\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000L\000e\000m\000m\000a\000\040\000C\000.\0002}{appendix.C}% 34
\BOOKMARK [2][-]{subsection.C.5}{\376\377\000C\000.\0005\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\0003\000.\0006}{appendix.C}% 35
\BOOKMARK [2][-]{subsection.C.6}{\376\377\000C\000.\0006\000\040\000S\000u\000b\000g\000r\000a\000p\000h\000\040\000N\000o\000t\000a\000t\000i\000o\000n\000\040\000a\000n\000d\000\040\000O\000\050\0001\000\051\000\040\000C\000l\000o\000s\000e\000d\000\040\000F\000o\000r\000m\000u\000l\000a\000s}{appendix.C}% 36
\BOOKMARK [2][-]{subsection.C.7}{\376\377\000C\000.\0007\000\040\000P\000r\000o\000o\000f\000s\000\040\000o\000f\000\040\000e\000q\000:\0001\000e\000-\000e\000q\000:\0003\000p\000-\0003\000t\000r\000i}{appendix.C}% 37
\BOOKMARK [2][-]{subsection.C.8}{\376\377\000C\000.\0008\000\040\000T\000o\000o\000l\000s\000\040\000t\000o\000\040\000p\000r\000o\000v\000e\000\040\000t\000h\000:\000s\000i\000n\000g\000l\000e\000-\000p\000-\000h\000a\000r\000d}{appendix.C}% 38
\BOOKMARK [3][-]{subsubsection.C.8.1}{\376\377\000C\000.\0008\000.\0001\000\040\000P\000r\000o\000o\000f\000\040\000f\000o\000r\000\040\000l\000e\000m\000:\000q\000E\0003\000-\000e\000x\000p}{subsection.C.8}% 39
\BOOKMARK [2][-]{subsection.C.9}{\376\377\000C\000.\0009\000\040\000P\000r\000o\000o\000f\000s\000\040\000f\000o\000r\000\040\000l\000e\000m\000:\0003\000m\000-\000G\0002\000,\000\040\000l\000e\000m\000:\000t\000r\000i\000,\000\040\000a\000n\000d\000\040\000l\000e\000m\000:\000l\000i\000n\000-\000s\000y\000s}{appendix.C}% 40
\BOOKMARK [3][-]{subsubsection.C.9.1}{\376\377\000C\000.\0009\000.\0001\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000L\000e\000m\000m\000a\000\040\000C\000.\0006}{subsection.C.9}% 41
\BOOKMARK [3][-]{subsubsection.C.9.2}{\376\377\000C\000.\0009\000.\0002\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000l\000e\000m\000:\000t\000r\000i}{subsection.C.9}% 42
\BOOKMARK [3][-]{subsubsection.C.9.3}{\376\377\000C\000.\0009\000.\0003\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000l\000e\000m\000:\000l\000i\000n\000-\000s\000y\000s}{subsection.C.9}% 43
\BOOKMARK [2][-]{subsection.C.10}{\376\377\000C\000.\0001\0000\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000t\000h\000:\000s\000i\000n\000g\000l\000e\000-\000p}{appendix.C}% 44
\BOOKMARK [2][-]{subsection.C.11}{\376\377\000C\000.\0001\0001\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000t\000h\000:\000s\000i\000n\000g\000l\000e\000-\000p\000-\000h\000a\000r\000d}{appendix.C}% 45
\BOOKMARK [1][-]{appendix.D}{\376\377\000D\000\040\000M\000i\000s\000s\000i\000n\000g\000\040\000D\000e\000t\000a\000i\000l\000s\000\040\000f\000r\000o\000m\000\040\000S\000e\000c\000t\000i\000o\000n\000\040\0004}{}% 46
\BOOKMARK [2][-]{subsection.D.1}{\376\377\000D\000.\0001\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\000D\000.\0005}{appendix.D}% 47
\BOOKMARK [2][-]{subsection.D.2}{\376\377\000D\000.\0002\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\000D\000.\0008}{appendix.D}% 48
\BOOKMARK [2][-]{subsection.D.3}{\376\377\000D\000.\0003\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000c\000o\000r\000:\000a\000p\000p\000r\000o\000x\000-\000a\000l\000g\000o\000-\000c\000o\000n\000s\000t\000-\000p}{appendix.D}% 49
\BOOKMARK [2][-]{subsection.D.4}{\376\377\000D\000.\0004\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000l\000e\000m\000:\000c\000t\000i\000d\000b\000-\000g\000a\000m\000m\000a}{appendix.D}% 50
\BOOKMARK [2][-]{subsection.D.5}{\376\377\000D\000.\0005\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000l\000e\000m\000:\000v\000a\000l\000-\000u\000b}{appendix.D}% 51
\BOOKMARK [2][-]{subsection.D.6}{\376\377\000D\000.\0006\000\040\000O\000n\000e\000P\000a\000s\000s\000\040\000R\000e\000m\000a\000r\000k\000s}{appendix.D}% 52
\BOOKMARK [2][-]{subsection.D.7}{\376\377\000D\000.\0007\000\040\000O\000n\000e\000P\000a\000s\000s\000\040\000E\000x\000a\000m\000p\000l\000e}{appendix.D}% 53
\BOOKMARK [2][-]{subsection.D.8}{\376\377\000D\000.\0008\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000O\000n\000e\000P\000a\000s\000s\000\040\000\050\000l\000e\000m\000:\000o\000n\000e\000-\000p\000a\000s\000s\000\051}{appendix.D}% 54
\BOOKMARK [2][-]{subsection.D.9}{\376\377\000D\000.\0009\000\040\000S\000a\000m\000p\000l\000e\000M\000o\000n\000o\000m\000i\000a\000l\000\040\000R\000e\000m\000a\000r\000k\000s}{appendix.D}% 55
\BOOKMARK [2][-]{subsection.D.10}{\376\377\000D\000.\0001\0000\000\040\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000S\000a\000m\000p\000l\000e\000M\000o\000n\000o\000m\000i\000a\000l\000\040\000\050\000l\000e\000m\000:\000s\000a\000m\000p\000l\000e\000\051}{appendix.D}% 56
\BOOKMARK [2][-]{subsection.D.11}{\376\377\000D\000.\0001\0001\000\040\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000R\000e\000s\000u\000l\000t\000s}{appendix.D}% 57
\BOOKMARK [1][-]{appendix.E}{\376\377\000E\000\040\000C\000i\000r\000c\000u\000i\000t\000s}{}% 58
\BOOKMARK [2][-]{subsection.E.1}{\376\377\000E\000.\0001\000\040\000R\000e\000p\000r\000e\000s\000e\000n\000t\000i\000n\000g\000\040\000P\000o\000l\000y\000n\000o\000m\000i\000a\000l\000s\000\040\000w\000i\000t\000h\000\040\000C\000i\000r\000c\000u\000i\000t\000s}{appendix.E}% 59
\BOOKMARK [3][-]{subsubsection.E.1.1}{\376\377\000E\000.\0001\000.\0001\000\040\000C\000i\000r\000c\000u\000i\000t\000s\000\040\000f\000o\000r\000\040\000q\000u\000e\000r\000y\000\040\000p\000l\000a\000n\000s}{subsection.E.1}% 60
\BOOKMARK [2][-]{subsection.E.2}{\376\377\000E\000.\0002\000\040\000M\000o\000d\000e\000l\000i\000n\000g\000\040\000C\000i\000r\000c\000u\000i\000t\000\040\000C\000o\000n\000s\000t\000r\000u\000c\000t\000i\000o\000n}{appendix.E}% 61
\BOOKMARK [3][-]{subsubsection.E.2.1}{\376\377\000E\000.\0002\000.\0001\000\040\000B\000o\000u\000n\000d\000i\000n\000g\000\040\000c\000i\000r\000c\000u\000i\000t\000\040\000d\000e\000p\000t\000h}{subsection.E.2}% 62
\BOOKMARK [3][-]{subsubsection.E.2.2}{\376\377\000E\000.\0002\000.\0002\000\040\000C\000i\000r\000c\000u\000i\000t\000\040\000s\000i\000z\000e\000\040\000v\000s\000.\000\040\000r\000u\000n\000t\000i\000m\000e}{subsection.E.2}% 63
\BOOKMARK [3][-]{subsubsection.E.2.3}{\376\377\000E\000.\0002\000.\0003\000\040\000R\000u\000n\000t\000i\000m\000e\000\040\000o\000f\000\040\000L\000C}{subsection.E.2}% 64
\BOOKMARK [1][-]{appendix.F}{\376\377\000F\000\040\000H\000i\000g\000h\000e\000r\000\040\000M\000o\000m\000e\000n\000t\000s}{}% 65
\BOOKMARK [1][-]{appendix.G}{\376\377\000G\000\040\000T\000h\000e\000\040\000K\000a\000r\000p\000-\000L\000u\000b\000y\000\040\000E\000s\000t\000i\000m\000a\000t\000o\000r}{}% 66
\BOOKMARK [1][-]{appendix.H}{\376\377\000H\000\040\000P\000a\000r\000a\000m\000e\000t\000e\000r\000i\000z\000e\000d\000\040\000C\000o\000m\000p\000l\000e\000x\000i\000t\000y}{}% 67

BIN
arXiv/main.pdf Normal file

Binary file not shown.

BIN
arXiv/main.synctex.gz Normal file

Binary file not shown.

144
arXiv/main.tex Normal file
View File

@ -0,0 +1,144 @@
\documentclass[a4paper]{lipics-v2021}
\usepackage[table]{xcolor}%for rebuttal document, in particular \rowcolor
\usepackage{caption}%caption for table
\usepackage{cellspace}%padding of tabular cells
\usepackage{bm}%for math mode bold font
\usepackage{relsize}%\mathlarger
\usepackage{algpseudocode}
\usepackage{algorithm}
\usepackage{tikz}
\usepackage{tikz-qtree}
\usepackage{comment}
\usepackage{amsmath}
\let\endproof\relax
\usepackage{amsthm}
\usepackage{mathtools}
\usepackage{etoolbox}
\usepackage{xstring} %for conditionals in \newcommand
\usepackage{stmaryrd}
\usepackage[normalem]{ulem}
\usepackage{subcaption}
\usepackage{booktabs}
\usepackage{todonotes}
\usepackage{graphicx}
\usepackage{listings}
%%%%%%%%%% SQL + proveannce listing settings
\usepackage{mdframed}
\lstdefinestyle{psql}
{
tabsize=2,
basicstyle=\small\upshape\ttfamily,
language=SQL,
morekeywords={PROVENANCE,BASERELATION,INFLUENCE,COPY,ON,TRANSPROV,TRANSSQL,TRANSXML,CONTRIBUTION,COMPLETE,TRANSITIVE,NONTRANSITIVE,EXPLAIN,SQLTEXT,GRAPH,IS,ANNOT,THIS,XSLT,MAPPROV,cxpath,OF,TRANSACTION,SERIALIZABLE,COMMITTED,INSERT,INTO,WITH,SCN,UPDATED,LENS,SCHEMA_MATCHING,string,WINDOW,max,OVER,PARTITION,FIRST_VALUE,WITH},
extendedchars=false,
keywordstyle=\bfseries,
mathescape=true,
escapechar=@,
sensitive=true
}
\lstset{style=psql}
%%%%%%%%%%%%%%%%%%
\usepackage{wrapfig}
\usepackage{fancyvrb}
\usepackage{caption}
\usepackage{subcaption}
\usepackage{braket}
\usepackage[inline]{enumitem}
\usepackage{xspace}
\usepackage{hyperref}
\usepackage{url}
\usepackage{cleveref}
\usepackage{color}
\graphicspath{ {figures/} }
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{outlines}%For outline capabilities
\usepackage{enumitem}%used in tandem with outlines package
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\input{macros}
% reference names
\crefname{example}{ex.}{ex.}
\Crefname{example}{Ex.}{Ex.}
\Crefname{figure}{Fig.}{Fig.}
\Crefname{section}{Sec.}{Sec.}
\Crefname{definition}{Def.}{Def.}
\Crefname{theorem}{Thm.}{Thm.}
\Crefname{lemma}{Lem.}{Lem.}
\crefname{equation}{eq.}{eq.}
\Crefname{equation}{Eq.}{Eq.}
%%%%%%%%%%%%%%%%%%%%
\title{Computing expected multiplicities for bag-TIDBs with bounded multiplicities}
\titlerunning{Bag PDB Queries}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\author{Su Feng}{Illinois Institute of Technology, Chicago, USA}{sfeng14@hawk.iit.edu}{}{}{}
\author{Boris Glavic}{Illinois Institute of Technology, USA}{bglavic@iit.edu}{}{}{}
\author{Aaron Huber}{University at Buffalo, USA}{ahuber@buffalo.edu}{}{}{}
\author{Oliver Kennedy}{University at Buffalo, USA}{okennedy@buffalo.edu}{}{}{}
\author{Atri Rudra}{University at Buffalo, USA}{atri@buffalo.edu}{}{}{}
\authorrunning{S. Feng, B. Glavic, A. Huber, O. Kennedy, A. Rudra}
\Copyright{Aaron Huber, Oliver Kennedy, Atri Rudra, Su Feng, Boris Glavic}
\ccsdesc{Information systems~Incomplete data}
\keywords{PDB, bags, polynomial, boolean formula, etc.}
\EventEditors{John Q. Open and Joan R. Access}
\EventNoEds{2}
\EventLongTitle{42nd Conference on Very Important Topics (CVIT 2016)}
\EventShortTitle{CVIT 2016}
\EventAcronym{CVIT}
\EventYear{2016}
\EventDate{December 24--27, 2016}
\EventLocation{Little Whinging, United Kingdom}
\EventLogo{}
\SeriesVolume{42}
\ArticleNo{23}
\begin{document}
\lstset{language=sql}
\maketitle
\input{abstract}
\input{introduction}
\input{binarybidb}
\input{pwsem}
\input{prob-def}
\input{mult_distinct_p}
\input{single_p}
\input{approx_alg}
\input{related-work}
\input{conclusions}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\bibliographystyle{plainurl}
\bibliography{main}
\input{acknowledgements}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% APPENDIX
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\clearpage
\appendix
\normalsize
\input{appendix}
\input{related-work-extra}
\end{document}

1
arXiv/main.vtc Normal file
View File

@ -0,0 +1 @@
\contitem\title{Computing expected multiplicities for bag-TIDBs with bounded multiplicities}\author{Su Feng, Boris Glavic, Aaron Huber, Oliver Kennedy, and Atri Rudra}\page{23:1--23:54}

87
arXiv/mult_distinct_p.tex Normal file
View File

@ -0,0 +1,87 @@
%root:main.tex
%!TEX root=./main.tex
\section{Hardness of Exact Computation}
\label{sec:hard}
In this section, we will prove the hardness results claimed in Table~\ref{tab:lbs} for a specific (family) of hard instance $(\qhard,\pdb)$ for \Cref{prob:bag-pdb-poly-expected} where $\pdb$ is a $1$-\abbrTIDB.
Note that this implies hardness for \abbrCTIDB\xplural $\inparen{\bound\geq1}$, showing \Cref{prob:bag-pdb-poly-expected} cannot be done in $\bigO{\qruntime{\optquery{\query},\tupset,\bound}}$ runtime. The results also apply to \abbrOneBIDB and other more general \abbrPDB\xplural.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Preliminaries}\label{sec:hard:sub:pre}
Our hardness results are based on (exactly) counting the number of (not necessarily induced) subgraphs in $G$ isomorphic to $H$. Let $\numocc{G}{H}$ denote this quantity. We can think of $H$ as being of constant size and $G$ as growing.
In particular, we will consider the problems of computing the following counts (given $G$ in its adjacency list representation): $\numocc{G}{\tri}$ (the number of triangles), $\numocc{G}{\threedis}$ (the number of $3$-matchings), and the latter's generalization $\numocc{G}{\kmatch}$ (the number of $k$-matchings). We use $\kmatchtime$ to denote the optimal runtime of computing $\numocc{G}{\kmatch}$ exactly. Our hardness results in \Cref{sec:multiple-p} are based on the following hardness results/conjectures:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Theorem}[\cite{k-match}]
\label{thm:k-match-hard}
Given positive integer $k$ and undirected graph $G=(\vset,\edgeSet)$ with no self-loops or parallel edges, $\kmatchtime\ge \littleomega{f(k)\cdot |\edgeSet|^c}$ for any function $f$ and any constant $c$ independent of $\abs{E}$ and $k$ (assuming $\sharpwzero\ne\sharpwone$).
\end{Theorem}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{hypo}\label{conj:known-algo-kmatch}
There exists an absolute constant $c_0>0$ such that for every $G=(\vset,\edgeSet)$, we have $\kmatchtime \ge \Omega\inparen{|E|^{c_0\cdot k}}$ for large enough $k$.
\end{hypo}
We note that the above conjecture is somewhat non-standard. In particular, the best known algorithm to compute $\numocc{G}{\kmatch}$ takes time $\Omega\inparen{|V|^{k/2}}$ (i.e. if this is the best algorithm then $c_0=\frac 14$)~\cite{k-match}. What the above conjecture is saying is that one can only hope for a polynomial improvement over the state of the art algorithm to compute $\numocc{G}{\kmatch}$.
%
Our hardness result in Section~\ref{sec:single-p} is based on the following conjectured hardness result:
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{hypo}
\label{conj:graph}
There exists a constant $\eps_0>0$ such that given an undirected graph $G=(\vset,\edgeSet)$, computing $\numocc{G}{\tri}$ exactly cannot be done in time $o\inparen{|\edgeSet|^{1+\eps_0}}$.
\end{hypo}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
The so called {\em Triangle detection hypothesis} (cf.~\cite{triang-hard}), which states that detecting the presence of triangles in $G$ takes time $\Omega\inparen{|\edgeSet|^{4/3}}$, implies that in Conjecture~\ref{conj:graph} we can take $\eps_0\ge \frac 13$.
All of our hardness results rely on a simple lineage polynomial encoding of the edges of a graph.
To prove our hardness result, consider a graph $G=(\vset, \edgeSet)$, where $|\edgeSet| = m$, $\vset = [\numvar]$. Our lineage polynomial has a variable $X_i$ for every $i$ in $[\numvar]$.
Consider the polynomial
$\poly_{G}(\vct{X}) = \sum\limits_{(i, j) \in \edgeSet} X_i \cdot X_j.$
The hard polynomial for our problem will be a suitable power $k\ge 3$ of the polynomial above:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}\label{def:qk}
For any graph $G=(V,\edgeSet)$ and $\kElem\ge 1$, define
\[\poly_{G}^\kElem(X_1,\dots,X_n) = \left(\sum\limits_{(i, j) \in \edgeSet} X_i \cdot X_j\right)^\kElem.\]
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\noindent Returning to \Cref{fig:two-step}, it can be seen that $\poly_{G}^\kElem(\vct{X})$ is the lineage polynomial from query $\qhard^k$, which we define next ($\query_2$ from~\Cref{sec:intro} is the same query with $k=2$). Let us alias
\begin{lstlisting}
SELECT DISTINCT 1 FROM T $t_1$, R r, T $t_2$
WHERE $t_1$.Point = r.Point$_1$ AND $t_2$.Point = r.Point$_2$
\end{lstlisting}
as $R$. The query $\qhard^k$ then becomes
\mdfdefinestyle{underbrace}{topline=false, rightline=false, bottomline=false, leftline=false, backgroundcolor=black!15!white, innerbottommargin=0pt}
\begin{mdframed}[style=underbrace]
\begin{lstlisting}
SELECT COUNT(*) FROM $\underbrace{R\text{ JOIN }R\text{ JOIN}\cdots\text{JOIN }R}_{k\rm\ times}$
\end{lstlisting}
\end{mdframed}
\noindent Consider again the \abbrCTIDB instance $\pdb$ of~\Cref{fig:two-step} and, for our hard instance, let $\bound = 1$. $\pdb$ generalizes to one compatible to~\Cref{def:qk} as follows. Relation $T$ has $n$ tuples corresponding to each vertex for $i$ in $[n]$, each with probability $\prob$ and $R$ has tuples corresponding to the edges $\edgeSet$ (each with probability of $1$).\footnote{Technically, $\poly_{G}^\kElem(\vct{X})$ should have variables corresponding to tuples in $R$ as well, but since they always are present with probability $1$, we drop those. Our argument also works when all the tuples in $R$ also are present with probability $\prob$ but to simplify notation we assign probability $1$ to edges.}
In other words, this instance $\tupset$ contains the set of $\numvar$ unary tuples in $T$ (which corresponds to $\vset$) and $\numedge$ binary tuples in $R$ (which corresponds to $\edgeSet$).
Note that this implies that $\poly_{G}^\kElem$ is indeed a $1$-\abbrTIDB lineage polynomial.
Next, we note that the runtime for answering $\qhard^k$ on deterministic database $\tupset$, as defined above, is $O_k\inparen{\numedge}$ (i.e. deterministic query processing is `easy' for this query):
\begin{Lemma}\label{lem:tdet-om}
Let $\qhard^k$ and $\tupset$ be as defined above. Then
$\qruntimenoopt{\qhard^k, \tupset}$ is $O_k\inparen{\numedge}$.
\end{Lemma}
\subsection{Multiple Distinct $\prob$ Values}
\label{sec:multiple-p}
We are now ready to present our main hardness result.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Theorem}\label{thm:mult-p-hard-result}
Let $\prob_0,\ldots,\prob_{2k}$ be $2k + 1$ distinct values in $(0, 1]$. Then computing $\rpoly_G^\kElem(\prob_i,\dots,\prob_i)$ (over all $i\in [2k+1]$) for arbitrary $G=(\vset,\edgeSet)$
needs time $\bigOmega{\kmatchtime}$, assuming $\kmatchtime\ge \omega\inparen{\abs{\edgeSet}}$.
\end{Theorem}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
Note that the second row of \Cref{tab:lbs} follows from \Cref{prop:expection-of-polynom}, \Cref{thm:mult-p-hard-result}, \Cref{lem:tdet-om}, and \Cref{thm:k-match-hard} while the third row is proved by \Cref{prop:expection-of-polynom}, \Cref{thm:mult-p-hard-result}, \Cref{lem:tdet-om}, and \Cref{conj:known-algo-kmatch}. Since \Cref{conj:known-algo-kmatch} is non-standard, the latter hardness result should be interpreted as follows. Any substantial polynomial improvement for \Cref{prob:bag-pdb-poly-expected} (over the trivial algorithm that converts $\poly$ into SMB and then uses \Cref{cor:expct-sop} for \abbrStepTwo) would lead to an improvement over the state of the art {\em upper} bounds on $\kmatchtime$. Finally, note that \Cref{thm:mult-p-hard-result} needs one to be able to compute the expected multiplicities over $(2k+1)$ distinct values of $p_i$, each of which corresponds to distinct $\bpd$ (for the same $\tupset$), which explain the `Multiple' entry in the second column in the second and third row in \Cref{tab:lbs}. Next, we argue how to get rid of this latter requirement.
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

BIN
arXiv/orcid.pdf Normal file

Binary file not shown.

104
arXiv/prob-def.tex Normal file
View File

@ -0,0 +1,104 @@
%root: main.tex
%!TEX root=./main.tex
\subsection{Formalizing \Cref{prob:intro-stmt}}\label{sec:expression-trees}
We focus on the problem of computing $\expct_{\worldvec\sim\pdassign}\pbox{\apolyqdt\inparen{\vct{\randWorld}}}$ from now on, assume implicit $\query, \tupset, \tup$, and drop them from $\apolyqdt$ (i.e., $\poly\inparen{\vct{X}}$ will denote a polynomial).
\Cref{prob:intro-stmt} asks if there exists a linear time approximation algorithm in the size of a given circuit \circuit which encodes $\poly\inparen{\vct{X}}$. Recall that in this work we
represent lineage polynomials via {\em arithmetic circuits}~\cite{arith-complexity}, a standard way to represent polynomials over fields (particularly in the field of algebraic complexity) that we use for polynomials over $\mathbb N$ in the obvious way. Since we are specifically using circuits to model lineage polynomials, we can refer to these circuits as lineage circuits. However, when the meaning is clear, we will drop the term lineage and only refer to them as circuits.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Circuit]\label{def:circuit}
A circuit $\circuit$ is a Directed Acyclic Graph (DAG) whose source gates (in degree of $0$) consist of elements in either $\domN$ or $\vct{X} = \inparen{X_1,\ldots,X_\numvar}$. For each result tuple there exists one sink gate. The internal gates have binary input and are either sum ($\circplus$) or product ($\circmult$) gates.
%
Each gate has the following members: \type, \vari{input}, \val, \vpartial, \degval, \vari{Lweight}, and \vari{Rweight}, where \type is the value type $\{\circplus, \circmult, \var, \tnum\}$ and \vari{input} the list of inputs. Source gates have an extra member \val storing the value. $\circuit_\linput$ ($\circuit_\rinput$) denotes the left (right) input of \circuit.
\end{Definition}
When the underlying DAG is a tree (with edges pointing towards the root), the structure is an expression tree \etree. In such a case, the root of \etree is analogous to the sink of \circuit. The fields \vari{partial}, \degval, \vari{Lweight}, and \vari{Rweight} are used in the proofs of \Cref{sec:proofs-approx-alg}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
The circuits in \Cref{fig:two-step} encode their respective polynomials in column $\poly$.
Note that the ciricuit \circuit representing $AX$ and the circuit \circuit' representing $B\inparen{Y+Z}$ each encode a tree, with edges pointing towards the root.
\begin{wrapfigure}{L}{0.45\linewidth}
\centering
\begin{tikzpicture}[thick]
\node[tree_node] (a1) at (0, 0) {$\boldsymbol{X}$};
\node[tree_node] (b1) at (1.5, 0) {$\boldsymbol{2}$};
\node[tree_node] (c1) at (3, 0) {$\boldsymbol{Y}$};
\node[tree_node] (d1) at (4.5, 0) {$\boldsymbol{-1}$};
\node[tree_node] (a2) at (0.75, 0.75) {$\boldsymbol{\circmult}$};
\node[tree_node] (b2) at (2.25, 0.75) {$\boldsymbol{\circmult}$};
\node[tree_node] (c2) at (3.75, 0.75) {$\boldsymbol{\circmult}$};
\node[tree_node] (a3) at (0.55, 1.5) {$\boldsymbol{\circplus}$};
\node[tree_node] (b3) at (3.75, 1.5) {$\boldsymbol{\circplus}$};
\node[tree_node] (a4) at (2.25, 2.25) {$\boldsymbol{\circmult}$};
\draw[->] (a1) -- (a2);
\draw[->] (a1) -- (a3);
\draw[->] (b1) -- (a2);
\draw[->] (b1) -- (b2);
\draw[->] (c1) -- (c2);
\draw[->] (c1) -- (b2);
\draw[->] (d1) -- (c2);
\draw[->] (a2) -- (b3);
\draw[->] (b2) -- (a3);
\draw[->] (c2) -- (b3);
\draw[->] (a3) -- (a4);
\draw[->] (b3) -- (a4);
\draw[->] (a4) -- (2.25, 2.75);
\end{tikzpicture}
\caption{Circuit encoding of $(X + 2Y)(2X - Y)$}
\label{fig:circuit}
\end{wrapfigure}
We next formally define the relationship of circuits with polynomials. While the definition assumes one sink for notational convenience, it easily generalizes to the multiple sinks case.
\begin{Definition}[$\polyf(\cdot)$]\label{def:poly-func}
$\polyf(\circuit)$ maps the sink of circuit $\circuit$ to its corresponding polynomial (in \abbrSMB). $\polyf(\cdot)$ is recursively defined on $\circuit$ as follows, with addition and multiplication following the standard interpretation for polynomials:
\begin{equation*}
\polyf(\circuit) = \begin{cases}
\polyf(\circuit_\lchild) + \polyf(\circuit_\rchild) &\text{ if \circuit.\type } = \circplus\\
\polyf(\circuit_\lchild) \cdot \polyf(\circuit_\rchild) &\text{ if \circuit.\type } = \circmult\\
\circuit.\val &\text{ if \circuit.\type } = \var \text{ OR } \tnum.
\end{cases}
\end{equation*}
\end{Definition}
$\circuit$ need not encode $\poly\inparen{\vct{X}}$ in the same, default \abbrSMB representation. For instance, $\circuit$ could encode the factorized representation $(X + 2Y)(2X - Y)$ of $\poly\inparen{\vct{X}} = 2X^2+3XY-2Y^2$, as shown in \Cref{fig:circuit}, while $\polyf(\circuit) = \poly\inparen{\vct{X}}$ is always the equivalent \abbrSMB representation.
\begin{Definition}[Circuit Set]\label{def:circuit-set}
$\circuitset{\polyX}$ is the set of all possible circuits $\circuit$ such that $\polyf(\circuit) = \polyX$.
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
The circuit of \Cref{fig:circuit} is an element of $\circuitset{2X^2+3XY-2Y^2}$. One can think of $\circuitset{\polyX}$ as the infinite set of circuits where for each element \circuit, $\polyf\inparen{\circuit} = \polyX$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\medskip
\noindent We are now ready to formally state the final version of \Cref{prob:intro-stmt}.%our \textbf{main problem}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[The Expected Result Multiplicity Problem]\label{def:the-expected-multipl}
Let $\pdb'$ be an arbitrary \abbrCTIDB and $\vct{X}$ be the set of variables annotating tuples in $\tupset'$. Fix an $\raPlus$ query $\query$ and a result tuple $\tup$.
The \expectProblem is defined as follows:\\[-7mm]
\begin{center}
\textbf{Input}: $\circuit \in \circuitset{\polyX}$ for $\poly'\inparen{\vct{X}} = \poly'\pbox{\query,\tupset',\tup}$
\hspace*{2mm}
\textbf{Output}: $\expct_{\vct{W} \sim \bpd}\pbox{\poly'\pbox{\query, \tupset', \tup}\inparen{\vct{W}}}$
\end{center}
\end{Definition}
\input{circuits-model-runtime}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

56
arXiv/pwsem.tex Normal file
View File

@ -0,0 +1,56 @@
%root: main.tex
%!TEX root = ./main.tex
\iffalse
\begin{Example}\label{example:qtilde}
Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blocks. The expanded derivation for $\rpoly(X, Y)$ is
\begin{align*}
(&X^2 + 2XY + Y^2 \mod X^2 - X) \mod Y^2 - Y\\
= ~&X + 2XY + Y^2 \mod Y^2 - Y\\
= ~& X + 2XY + Y
\end{align*}
\end{Example}
\fi
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Let $\abs{\poly}$ be the number of operators in $\poly$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Corollary}\label{cor:expct-sop}
If $\poly$ is a $1$-\abbrBIDB lineage polynomial already in \abbrSMB, then the expectation of $\poly$, i.e., $\expct\pbox{\poly} = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ can be computed in $\bigO{\abs{\poly}}$ time.
\end{Corollary}
\subsubsection{Possible World Semantics}\label{subsub:possible-world-sem}
In this section, we show how the traditional possible worlds semantics corresponds to our setup. Readers can safely skip this part without missing anything vital to the results of this paper.
Queries over probabilistic databases are traditionally viewed as being evaluated using the so-called possible world semantics. A general bag-\abbrPDB can be defined as the pair $\pdb = \inparen{\Omega, \bpd}$ where $\Omega$ is the set of possible worlds represented by $\pdb$ and $\bpd$ the probability distribution over $\Omega$. Under the possible world semantics, the result of a query $\query$ over an incomplete database $\Omega$ is the set of query answers produced by evaluating $\query$ over each possible world $\omega\in\Omega$: $\inset{\query\inparen{\omega}: \omega\in\Omega}$.
The result of a query is the pair $\inparen{\query\inparen{\Omega}, \bpd'}$ where $\bpd'$ is a probability distribution that assigns to each possible query result the sum of the probabilites of the worlds that produce this answer: $\probOf\pbox{\omega\in\Omega} = \sum_{\omega'\in\Omega,\\\query\inparen{\omega'}=\query\inparen{\omega}}\probOf\pbox{\omega'}$.
Suppose that $\pdb'$ is a reduced \abbrOneBIDB from \abbrCTIDB $\pdb$ as defined by~\Cref{def:ctidb-reduct}. Instead of looking only at the possible worlds of $\pdb'$, one can consider the set of all worlds, including those that cannot exist due to, e.g., disjointness. Since $\abs{\tupset} = \numvar$ the all worlds set can be modeled by $\worldvec\in \{0, 1\}^{\numvar\bound}$, such that $\worldvec_{\tup, j} \in \worldvec$ represents whether or not the multiplicity of $\tup$ is $j$ (\emph{here and later, especially in \Cref{sec:algo}, we will rename the variables as $X_1,\dots,X_{\numvar'}$, where $\numvar'=\sum_{\tup\in\tupset}\abs{\block_\tup}$}).
\footnote{
In this example, $\abs{\block_\tup} = \bound$ for all $\tup$.
}
We can denote a probability distribution over all $\worldvec \in \{0, 1\}^{\numvar\bound}$ as $\bpd'$. When $\bpd'$ is the one induced from each $\prob_{\tup, j}$ while assigning $\probOf\pbox{\worldvec} = 0$ for any $\worldvec$ with $\worldvec_{\tup, j}, \worldvec_{\tup, j'} \neq 0$ for $j\neq j'$, we end up with a bijective mapping from $\bpd$ to $\bpd'$, such that each mapping is equivalent, implying the distributions are equivalent, and thus query results.
\Cref{subsec:supp-mat-ti-bi-def} has more details. \medskip
We now make a meaningful connection between possible world semantics and world assignments on the lineage polynomial.
\begin{Proposition}[Expectation of polynomials]\label{prop:expection-of-polynom}
Given a \abbrBPDB $\pdb = (\Omega,\bpd)$, $\raPlus$ query $\query$, and lineage polynomial $\apolyqdt$ for arbitrary result tuple $\tup$,
we have (denoting $\randDB$ as the random variable over $\Omega$):
$ \expct_{\randDB \sim \bpd}[\query(\randDB)(t)] = \expct_{\vct{\randWorld}\sim \pdassign}\pbox{\apolyqdt\inparen{\vct{\randWorld}}}. $
\end{Proposition}
\noindent A formal proof of \Cref{prop:expection-of-polynom} is given in \Cref{subsec:expectation-of-polynom-proof}.\footnote{Although \Cref{prop:expection-of-polynom} follows, e.g., as an obvious consequence of~\cite{IL84a}'s Theorem 7.1, we are unaware of any formal proof for bag-probabilistic databases.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

View File

@ -0,0 +1,5 @@
%!TEX root=./main.tex
\section{Parameterized Complexity}\label{sec:param-compl}
In \Cref{sec:hard}, we utilized common conjectures from fine-grained complexity theory. The notion of $\sharpwonehard$ is a standard notion in {\em parameterized complexity}, which by now is a standard complexity tool in providing data complexity bounds on query processing results~\cite{param-comp}. E.g. the fact that $k$-matching is $\sharpwonehard$ implies that we cannot have an $n^{\Omega(1)}$ runtime. However, these results do not carefully track the exponent in the hardness result. E.g. $\sharpwonehard$ for the general $k$-matching problem does not imply anything specific for the $3$-matching problem. Similar questions have led to intense research into the new sub-field of {\em fine-grained complexity} (see~\cite{virgi-survey}), where we care about the exponent in our hardness assumptions as well-- e.g. \Cref{conj:graph} is based on the popular {\em Triangle detection hypothesis} in this area (cf.~\cite{triang-hard}).

34
arXiv/related-work.tex Normal file
View File

@ -0,0 +1,34 @@
%!TEX root=./main.tex
\section{Related Work}\label{sec:related-work}
\textbf{Probabilistic Databases} (PDBs) have been studied predominantly for set semantics.
Approaches for probabilistic query processing (i.e., computing marginal probabilities of tuples), fall into two broad categories.
\emph{Intensional} (or \emph{grounded}) query evaluation computes the \emph{lineage} of a tuple
and then the probability of the lineage formula.
It has been shown that computing the marginal probability of a tuple is \sharpphard~\cite{valiant-79-cenrp} (by reduction from weighted model counting).
The second category, \emph{extensional} query evaluation,
is in \ptime, but is limited to certain classes of queries.
Dalvi et al.~\cite{DS12} and Olteanu et al.~\cite{FO16} proved dichotomies for UCQs and two classes of queries with negation, respectively.
Amarilli et al. investigated tractable classes of databases for more complex queries~\cite{AB15}.
Another line of work studies which structural properties of lineage formulas lead to tractable cases~\cite{kenig-13-nclexpdc,roy-11-f,sen-10-ronfqevpd}.
In this paper we focus on intensional query evaluation with polynomials.
Many data models have been proposed for encoding PDBs more compactly than as sets of possible worlds.
These include tuple-independent databases~\cite{VS17} (\tis), block-independent databases (\bis)~\cite{RS07}, and \emph{PC-tables}~\cite{GT06}.
%
Fink et al.~\cite{FH12} study aggregate queries over a probabilistic version of the extension of K-relations for aggregate queries proposed in~\cite{AD11d} (\emph{pvc-tables}) that supports bags, and has runtime complexity linear in the size of the lineage.
However, this lineage is encoded as a tree; the size (and thus the runtime) are still superlinear in $\qruntime{\query, \tupset, \bound}$.
The runtime bound is also limited to a specific class of (hierarchical) queries, suggesting the possibility of a generalization of \cite{DS12}'s dichotomy result to \abbrBPDB\xplural.
Several techniques for approximating tuple probabilities have been proposed in related work~\cite{FH13,heuvel-19-anappdsd,DBLP:conf/icde/OlteanuHK10,DS07}, relying on Monte Carlo sampling, e.g.,~\cite{DS07}, or a branch-and-bound paradigm~\cite{DBLP:conf/icde/OlteanuHK10}.
Our approximation algorithm is also based on sampling.
\noindent \textbf{Compressed Encodings} are used for Boolean formulas (e.g, various types of circuits including OBDDs~\cite{jha-12-pdwm}) and polynomials (e.g., factorizations~\cite{factorized-db}) some of which have been utilized for probabilistic query processing, e.g.,~\cite{jha-12-pdwm}.
Compact representations for which probabilities can be computed in linear time include OBDDs, SDDs, d-DNNF, and FBDD.
\cite{DM14c} studies circuits for absorptive semirings while~\cite{S18a} studies circuits that include negation (expressed as the monus operation). Algebraic Decision Diagrams~\cite{bahar-93-al} (ADDs) generalize BDDs to variables with more than two values. Chen et al.~\cite{chen-10-cswssr} introduced the generalized disjunctive normal form.
\Cref{sec:param-compl} covers more related work on fine-grained complexity.
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

20
arXiv/single_p.tex Normal file
View File

@ -0,0 +1,20 @@
%root: main.tex
%!TEX root=./main.tex
\subsection{Single $\prob$ value}
\label{sec:single-p}
While \Cref{thm:mult-p-hard-result} shows that computing $\rpoly(\prob,\dots,\prob)$ for multiple values of $\prob$ in general is hard it does not rule out the possibility that one can compute this value exactly for a {\em fixed} value of $\prob$. Indeed, it is easy to check that one can compute $\rpoly(\prob,\dots,\prob)$ exactly in linear time for $\prob\in \inset{0,1}$. Next we show that these two are the only possibilities:
\begin{Theorem}\label{th:single-p-hard}
Fix $\prob\in (0,1)$. Then assuming \Cref{conj:graph} is true, any algorithm that computes $\rpoly_{G}^3(\prob,\dots,\prob)$ for arbitrary $G = (\vset, \edgeSet)$ exactly has to run in time $\Omega\inparen{\abs{\edgeSet}^{1+\eps_0}}$, where $\eps_0$ is as defined in \Cref{conj:graph}.
\end{Theorem}
Note that \Cref{prop:expection-of-polynom} and \Cref{th:single-p-hard} above imply the hardness result in the first row of \Cref{tab:lbs}.
We note that \Cref{thm:k-match-hard} and \Cref{conj:known-algo-kmatch} (and the lower bounds in the second and third row of Table~\ref{tab:lbs}) need $k$ to be large enough (in particular, we need a family of hard queries). But the above \Cref{th:single-p-hard} (and the lower bound in first row of Table~\ref{tab:lbs}) holds for $k=3$ (and hence for a fixed query).
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End:

120
arXiv/two-step-model.tex Normal file
View File

@ -0,0 +1,120 @@
%root: main.tex
\usetikzlibrary{shapes.geometric}%for cylinder
\usetikzlibrary{shapes.arrows}%for arrow shape
\usetikzlibrary{shapes.misc}
%rid of vertical spacing for booktabs rules
\renewcommand{\aboverulesep}{0pt}
\renewcommand{\belowrulesep}{0pt}
\begin{figure}[t!]
\centering
\resizebox{\textwidth}{5.2cm}{%
\begin{tikzpicture}
%pdb cylinder
\node[cylinder, text width=0.28\textwidth, align=center, draw=black, text=black, cylinder uses custom fill, cylinder body fill=blue!10, aspect=0.12, minimum height=5cm, minimum width=2.5cm, cylinder end fill=blue!50, shape border rotate=90] (cylinder) at (0, 0) {
\tabcolsep=0.1cm
\begin{tabular}{>{\small}c | >{\small}c | >{\small}c}
\multicolumn{2}{c}{$\boldsymbol{T}$}\\
Point & $\Phi$ \\
\midrule
$e_1$ & $A$ \\
$e_2$ & $B$ \\
$e_3$ & $C$ \\
$e_4$ & $E$ \\
\end{tabular}\\
\tabcolsep=0.05cm
\begin{tabular}{>{\footnotesize}c | >{\footnotesize}c | >{\footnotesize}c | >{\footnotesize}c}
\multicolumn{3}{c}{$\boldsymbol{R$}}\\
$\text{Point}_1$ & $\text{Point}_2$ & $\Phi$\\
\midrule
$e_1$ & $e_2$ & $X$\\
$e_2$ & $e_4$ & $Y$\\
$e_2$ & $e_3$ & $Z$\\
\end{tabular}};
%label below cylinder
\node[below=0.2 cm of cylinder]{{\LARGE$ \pdb$}};
%First arrow
\node[single arrow, right=0.25 of cylinder, draw=black, fill=black!65, text=white, minimum height=0.75cm, minimum width=0.25cm](arrow1) {\textbf{\abbrStepOne}};
\node[above=of arrow1](arrow1Label) {$\query_2$};
\usetikzlibrary{arrows.meta}%for the following arrow configurations
\draw[line width=0.5mm, dashed, arrows = -{Latex[length=3mm, open]}] (arrow1Label)->(arrow1);
%Query output (output of step 1)
\node[rectangle, right=0.175 of arrow1, draw=black, text=black, fill=purple!10, minimum height=4.5cm, minimum width=2cm](rect) {
\tabcolsep=0.075cm
\begin{tabular}{>{\normalsize}c | >{\centering\arraybackslash\normalsize}m{1.95cm} | >{\centering\arraybackslash\small}m{1.95cm}}
Point & $\Phi$ & Circuit\\
\midrule
$e_1$ & $AX$ &\resizebox{!}{10mm}{
\begin{tikzpicture}[thick]
\node[gen_tree_node](sink) at (0.5, 0.8){$\boldsymbol{\circmult}$};
\node[gen_tree_node](source1) at (0, 0){$A$};
\node[gen_tree_node](source2) at (1, 0){$X$};
\draw[->](source1)--(sink);
\draw[->] (source2)--(sink);
\end{tikzpicture}
}\\
$e_2$ & $B(Y + Z)$\newline \text{Or}\newline $BY+ BZ$&
\resizebox{!}{16mm} {
\begin{tikzpicture}[thick]
\node[gen_tree_node] (a1) at (1, 0){$Y$};
\node[gen_tree_node] (b1) at (2, 0){$Z$};
%level 1
\node[gen_tree_node] (a2) at (0.75, 0.8){$B$};
\node[gen_tree_node] (b2) at (1.5, 0.8){$\boldsymbol{\circplus}$};
%level 0
\node[gen_tree_node] (a3) at (1.1, 1.6){$\boldsymbol{\circmult}$};
%edges
\draw[->] (a1) -- (b2);
\draw[->] (b1) -- (b2);
\draw[->] (a2) -- (a3);
\draw[->] (b2) -- (a3);
\end{tikzpicture}
}\newline\text{Or}\newline
%%%%%%%%%%%
%Non factorized circuit%
%%%%%%%%%%%
\resizebox{!}{16mm} {
\begin{tikzpicture}[thick]
\node[gen_tree_node] (a2) at (0, 0){$Y$};
\node[gen_tree_node] (b2) at (1, 0){$B$};
\node[gen_tree_node] (c2) at (2, 0){$Z$};
%level 1
\node[gen_tree_node] (a1) at (0.5, 0.8){$\boldsymbol{\circmult}$};
\node[gen_tree_node] (b1) at (1.5, 0.8){$\boldsymbol{\circmult}$};
%level 0
\node[gen_tree_node] (a0) at (1.0, 1.6){$\boldsymbol{\circplus}$};
%edges
\draw[->] (a2) -- (a1);
\draw[->] (b2) -- (a1);
\draw[->] (b2) -- (b1);
\draw[->] (c2) -- (b1);
\draw[->] (a1) -- (a0);
\draw[->] (b1) -- (a0);
\end{tikzpicture}
}\\
\end{tabular}
};
%label below rectangle
\node[below=0.2cm of rect]{{\LARGE $\query_2(\pdb)\inparen{\tup}\equiv \poly\inparen{\vct{X}}$}};
%Second arrow
\node[single arrow, right=0.25 of rect, draw=black, fill=black!65, text=white, minimum height=0.75cm, minimum width=0.25cm](arrow2) {\textbf{\abbrStepTwo}};
%Expectation computation; (output of step 2)
\node[rectangle, right=0.25 of arrow2, rounded corners, draw=black, fill=red!10, text=black, minimum height=4.5cm, minimum width=2cm](rrect) {
\tabcolsep=0.09cm
%\captionof{table}{Q}
\begin{tabular}{>{\small}c | >{\arraybackslash\normalsize}c}
Point & $\mathbb{E}[\poly(\vct{X})]$\\
\midrule
$e_1$ & $\inparen{\prob_{A, 1} +\prob_{A, 2}}\cdot\left(\prob_{X, 1} + 2\prob_{X, 2}\right)$\\
$e_2$ & $\inparen{\prob_{B, 1} + \prob_{B_2}}\inparen{\prob_{Y, 1}+2\prob_{Y, 2} + \prob_{Z, 1} + 2\prob_{Z, 2}}$\\
\end{tabular}
};
%label of rounded rectangle
\node[below=0.2cm of rrect]{{\LARGE $\expct\pbox{\poly(\vct{X})}$}};
\end{tikzpicture}
}
\caption{Intensional Query Evaluation Model $(\query_2 = \project_{\text{Point}}$ $\inparen{T\join_{\text{Point} = \text{Point}_1}R}$ where, for table $R,~\bound = 2$, while for $T,~\bound = 1.)$}
\label{fig:two-step}
\end{figure}

1
comment.cut Normal file
View File

@ -0,0 +1 @@
\input{acknowledgements}

0
experiments.log Normal file
View File

0
intro-rewrite-070921.log Normal file
View File

0
intro-rewrite2.log Normal file
View File

0
lin_sys.log Normal file
View File

0
macros.log Normal file
View File

View File

@ -208,7 +208,7 @@
%consider replacing \pbrace with what is below
\newcommand{\inparen}[1]{\left({#1}\right)}
\newcommand{\inset}[1]{\left\{{#1}\right\}}%we already have this as \pbrace; need to pick one
\newcommand{\intup}[1]{\left\langle{#1}\right\rangle}
\newcommand{\intuple}[1]{\left\langle{#1}\right\rangle}

Some files were not shown because too many files have changed in this diff Show More