Importing prior paper

master
Oliver Kennedy 2022-03-13 13:09:20 -04:00
parent 39b8164cf2
commit ae6dd61298
Signed by: okennedy
GPG Key ID: 3E5F9B3ABD3FDB60
17 changed files with 12999 additions and 5 deletions

819
ACM-Reference-Format.bbx Normal file
View File

@ -0,0 +1,819 @@
\ProvidesFile{ACM-Reference-Format.bbx}[2017-09-27 v0.1 biblatex bibliography style]
% Inherit a default style
\RequireBibliographyStyle{trad-plain}
%%% Localisation strings for ACM
\DefineBibliographyStrings{american}{%
mathesis = {Master's thesis},
phdthesis = {Ph\adddot{}D\adddotspace Dissertation},
editor = {(Ed\adddot)},
editors = {(Eds\adddot)},
edition = {ed\adddot},
}
%%% Formatting for fields
%\DeclareFieldFormat
% [article,inbook,incollection,inproceedings,patent,thesis,unpublished]
% {title}{#1}
\DeclareFieldFormat{pages}{#1}
\DeclareFieldFormat{numpages}{#1 pages}
\DeclareFieldFormat{number}{#1}
\DeclareFieldFormat{articleno}{Article #1}
\DeclareFieldFormat{key}{#1}
\DeclareFieldFormat{urldate}{Retrieved\space{}#1\space{}from}
\DeclareFieldAlias{lastaccessed}{urldate}
\DeclareFieldFormat{url}{\url{#1}}
\DeclareFieldFormat{edition}{%
\printtext[parens]{\ifinteger{#1}
{\mkbibordedition{#1}~\bibstring{edition}}
{#1\isdot~\bibstring{edition}}}}
% Handle urls field containing 'and' separated list of URLs
% https://github.com/plk/biblatex/issues/229
\DeclareListFormat{urls}{%
\url{#1}%
\ifthenelse{\value{listcount}<\value{liststop}}
{\addcomma\space}
{}}
\renewbibmacro*{url}{\iffieldundef{url}{\printlist{urls}}{\printfield{url}}}
%%% Bibmacro definitions
\renewbibmacro*{translator+others}{%
\ifboolexpr{
test \ifusetranslator
and
not test {\ifnameundef{translator}}
}
{\printnames{translator}%
\setunit{\addcomma\space}%
\usebibmacro{translator+othersstrg}%
\clearname{translator}}
{\printfield{key}}}
\newbibmacro*{year}{%
\iffieldundef{year}%
{\printtext{[n.\ d.]}}%
{\printfield{year}}%
}
\renewbibmacro*{date}{\printtext[parens]{\printdate}}
\renewbibmacro*{url+urldate}{\iffieldundef{urlyear}
{}
{\usebibmacro{urldate}%
\setunit*{\addspace}}%
\usebibmacro{url}%
}
\renewbibmacro*{journal+issuetitle}{%
\usebibmacro{journal}%
\setunit*{\addcomma\space}%
\iffieldundef{series}
{}
{\newunit%
\printfield{series}%
\setunit{\addspace}}%
\usebibmacro{volume+number+date+pages+eid}%
\newcommaunit%
% \setunit{\addspace}%
\usebibmacro{issue-issue}%
\setunit*{\addcolon\space}%
\usebibmacro{issue}%
\newunit}
\newbibmacro*{volume+number+date+pages+eid}{%
\printfield{volume}%
\setunit*{\addcomma\space}%
\printfield{number}%
\setunit*{\addcomma\space}%
\printfield{articleno}
\setunit{\addcomma\space}
\usebibmacro{date-ifmonth}
\setunit{\addcomma\space}%
\iffieldundef{pages}%
{\printfield{numpages}}%
{\printfield{pages}}%
\newcommaunit%
\printfield{eid}}%
\renewbibmacro*{chapter+pages}{%
\printfield{chapter}%
\setunit{\bibpagespunct}%
\iffieldundef{pages}%
{\printfield{numpages}}%
{\printfield{pages}}%
\newunit}
\renewbibmacro*{editor+others}{%
\ifboolexpr{
test \ifuseeditor
and
not test {\ifnameundef{editor}}
}
{\printnames{editor}%
\setunit{\addcomma\space}%
\usebibmacro{editor+othersstrg}%
\clearname{editor}}
{\iflistundef{organization}{}{\printlist{organization}}}}
\newbibmacro*{issue-issue}{%
\iffieldundef{issue}%
{}%
{\printfield{issue}%
\setunit*{\addcomma\space}%
\usebibmacro{date-ifmonth}%
}%
\newunit}
\newbibmacro*{maintitle+booktitle+series+number}{%
\iffieldundef{maintitle}
{}
{\usebibmacro{maintitle}%
\newunit\newblock
\iffieldundef{volume}
{}
{\printfield{volume}%
\printfield{part}%
\setunit{\addcolon\space}}}%
\usebibmacro{booktitle}%
\setunit*{\addspace}
\printfield[parens]{series}%
\setunit*{\addspace}%
\printfield{number}%
\setunit*{\addcomma\space}%
\printfield{articleno}
\newunit
}
\renewbibmacro*{booktitle}{%
\ifboolexpr{
test {\iffieldundef{booktitle}}
and
test {\iffieldundef{booksubtitle}}
}
{}
{\printtext[booktitle]{%
\printfield[titlecase]{booktitle}%
\iffieldundef{booksubtitle}{}{
\setunit{\subtitlepunct}%
\printfield[titlecase]{booksubtitle}}%
}%
}%
\printfield{booktitleaddon}}
\renewbibmacro*{volume+number+eid}{%
\printfield{volume}%
\setunit*{\addcomma\space}%
\printfield{number}%
\setunit*{\addcomma\space}%
\printfield{articleno}
\setunit{\addcomma\space}%
\printfield{eid}}
\renewbibmacro*{publisher+location+date}{%
\printlist{publisher}%
\setunit*{\addcomma\space}%
\printlist{location}%
\setunit*{\addcomma\space}%
\usebibmacro{date-ifmonth}%
\newunit}
\newbibmacro{date-ifmonth}{%
\iffieldundef{month}{}{%
\usebibmacro{date}
}%
}
\renewbibmacro*{institution+location+date}{%
\printlist{school}%
\setunit*{\addcomma\space}%
\printlist{institution}%
\setunit*{\addcomma\space}%
\printlist{location}%
\setunit*{\addcomma\space}%
\usebibmacro{date-ifmonth}%
\newunit}
\renewbibmacro*{periodical}{%
\iffieldundef{title}
{}
{\printtext[title]{%
\printfield[titlecase]{title}%
\setunit{\subtitlepunct}%
\printfield[titlecase]{subtitle}}}%
\newunit%
\usebibmacro{journal}}
\renewbibmacro*{issue+date}{%
\iffieldundef{issue}
{\usebibmacro{date}}
{\printfield{issue}%
\setunit*{\addspace}%
\usebibmacro{date}}%
\newunit}
\renewbibmacro*{title+issuetitle}{%
\usebibmacro{periodical}%
\setunit*{\addspace}%
\iffieldundef{series}
{}
{\newunit
\printfield{series}%
\setunit{\addspace}}%
\printfield{volume}%
\setunit*{\addcomma\space}%
\printfield{number}%
\setunit*{\addcomma\space}%
\printfield{articleno}
\setunit{\addcomma\space}%
\printfield{eid}%
\setunit{\addspace}%
\usebibmacro{issue+date}%
\setunit{\addcolon\space}%
\usebibmacro{issue}%
\newunit}
%%% Definitions for drivers (alphabetical)
\DeclareBibliographyDriver{article}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/translator+others}%
\setunit{\labelnamepunct}\newblock%
\usebibmacro{year}%
\newunit%
\usebibmacro{title}%
\newunit%
\printlist{language}%
\newunit\newblock%
\usebibmacro{byauthor}%
\newunit\newblock%
\usebibmacro{bytranslator+others}%
\newunit\newblock%
\printfield{version}%
\newunit\newblock%
\usebibmacro{journal+issuetitle}%
\newunit%
\usebibmacro{byeditor+others}%
\newunit%
\printfield{note}%
\newunit\newblock%
\iftoggle{bbx:isbn}
{\printfield{issn}}
{}%
\newunit\newblock%
\usebibmacro{doi+eprint+url}%
\newunit\newblock%
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock%
\usebibmacro{related}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{book}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/editor+others/translator+others}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}%
\newunit%
\usebibmacro{maintitle+title}%
\newunit%
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{byeditor+others}%
\newunit\newblock
\printfield{edition}%
\newunit
\usebibmacro{series+number}%
\iffieldundef{maintitle}
{\printfield{volume}%
\printfield{part}}
{}%
\newunit
\newunit\newblock
\printfield{volumes}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\usebibmacro{publisher+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit
\printfield{pagetotal}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isbn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{inbook}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\iffieldundef{author}%
{\usebibmacro{byeditor+others}}%
{\usebibmacro{author/translator+others}}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit\newblock
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
% \usebibmacro{in:}%
\usebibmacro{bybookauthor}%
\newunit\newblock
\usebibmacro{maintitle+booktitle}%
\newunit\newblock
\iffieldundef{author}{}%if undef then we already printed editor
{\usebibmacro{byeditor+others}}%
\newunit\newblock
\printfield{edition}%
\newunit
\iffieldundef{maintitle}
{\printfield{volume}%
\printfield{part}}
{}%
\newunit
\printfield{volumes}%
\newunit\newblock
\usebibmacro{series+number}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\usebibmacro{publisher+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isbn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{incollection}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/translator+others}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit\newblock
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{in:}%
\usebibmacro{maintitle+booktitle}%
\newunit\newblock
\usebibmacro{series+number}%
\newunit\newblock
\printfield{edition}%
\newunit
\iffieldundef{maintitle}
{\printfield{volume}%
\printfield{part}}
{}%
\newunit
\printfield{volumes}%
\newunit\newblock
\usebibmacro{byeditor+others}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\usebibmacro{publisher+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isbn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{inproceedings}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/translator+others}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit\newblock
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{in:}%
\usebibmacro{maintitle+booktitle+series+number}%
\newunit\newblock
\usebibmacro{event+venue+date}%
\newunit\newblock
\usebibmacro{byeditor+others}%
\newunit\newblock
\iffieldundef{maintitle}
{\printfield{volume}%
\printfield{part}}
{}%
\newunit
\printfield{volumes}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\printlist{organization}%
\newunit
\usebibmacro{publisher+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isbn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{manual}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/editor+others}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit\newblock
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{byeditor}%
\newunit\newblock
\printfield{edition}%
\newunit\newblock
\usebibmacro{series+number}%
\newunit\newblock
\printfield{type}%
\newunit
\printfield{version}%
\newunit
\printfield{note}%
\newunit\newblock
\printlist{organization}%
\newunit
\usebibmacro{publisher+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit
\printfield{pagetotal}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isbn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{misc}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/editor+others/translator+others}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit\newblock
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{byeditor+others}%
\newunit\newblock
\printfield{howpublished}%
\newunit\newblock
\printfield{type}%
\newunit
\printfield{version}%
\newunit
\printfield{note}%
\newunit\newblock
\usebibmacro{organization+location+date}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{online}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author/editor+others/translator+others}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{byeditor+others}%
\newunit\newblock
\printfield{version}%
\newunit
\printfield{note}%
\newunit\newblock
\printlist{organization}%
\newunit\newblock
\usebibmacro{date-ifmonth}%
\newunit\newblock
\iftoggle{bbx:eprint}
{\usebibmacro{eprint}}
{}%
\newunit\newblock
\usebibmacro{url+urldate}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareFieldFormat[patent]{number}{Patent No.~#1}
\DeclareBibliographyDriver{patent}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}%
\newunit
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\usebibmacro{date}%
\newunit\newblock
\printfield{type}%
\setunit*{\addspace}%
\printfield{number}%
\iflistundef{location}
{}
{\setunit*{\addspace}%
\printtext[parens]{%
\printlist[][-\value{listtotal}]{location}}}%
\newunit\newblock
\usebibmacro{byholder}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{periodical}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{editor}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit
\usebibmacro{title+issuetitle}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byeditor}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{issn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{report}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\printfield{type}%
\setunit*{\addspace}%
\printfield{number}%
\newunit\newblock
\printfield{version}%
\newunit
\printfield{note}%
\newunit\newblock
\usebibmacro{institution+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit
\printfield{pagetotal}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isrn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}
\DeclareBibliographyDriver{thesis}{%
\usebibmacro{bibindex}%
\usebibmacro{begentry}%
\usebibmacro{author}%
\setunit{\labelnamepunct}\newblock
\usebibmacro{year}
\newunit
\usebibmacro{title}%
\newunit
\printlist{language}%
\newunit\newblock
\usebibmacro{byauthor}%
\newunit\newblock
\printfield{type}%
\newunit
\usebibmacro{institution+location+date}%
\newunit\newblock
\usebibmacro{chapter+pages}%
\newunit
\printfield{pagetotal}%
\newunit\newblock
\iftoggle{bbx:isbn}
{\printfield{isbn}}
{}%
\newunit\newblock
\usebibmacro{doi+eprint+url}%
\newunit\newblock
\usebibmacro{addendum+pubstate}%
\setunit{\bibpagerefpunct}\newblock
\usebibmacro{pageref}%
\newunit\newblock
\printfield{note}%
\newunit\newblock
\iftoggle{bbx:related}
{\usebibmacro{related:init}%
\usebibmacro{related}}
{}%
\usebibmacro{finentry}}

2893
ACM-Reference-Format.bst Normal file

File diff suppressed because it is too large Load Diff

5
ACM-Reference-Format.cbx Normal file
View File

@ -0,0 +1,5 @@
\ProvidesFile{ACM-Reference-Format.cbx}[2017-09-27 v0.1]
\RequireCitationStyle{numeric}
\endinput

18
ACM-Reference-Format.dbx Normal file
View File

@ -0,0 +1,18 @@
% Teach biblatex about numpages field
\DeclareDatamodelFields[type=field, datatype=literal]{numpages}
\DeclareDatamodelEntryfields{numpages}
% Teach biblatex about articleno field
\DeclareDatamodelFields[type=field, datatype=literal]{articleno}
\DeclareDatamodelEntryfields{articleno}
% Teach biblatex about urls field
\DeclareDatamodelFields[type=list, datatype=uri]{urls}
\DeclareDatamodelEntryfields{urls}
% Teach biblatex about school field
\DeclareDatamodelFields[type=list, datatype=literal]{school}
\DeclareDatamodelEntryfields[thesis]{school}
\DeclareDatamodelFields[type=field, datatype=literal]{key}
\DeclareDatamodelEntryfields{key}

2561
acmart.cls Normal file

File diff suppressed because it is too large Load Diff

6328
acmart.dtx Normal file

File diff suppressed because it is too large Load Diff

30
acmart.ins Normal file
View File

@ -0,0 +1,30 @@
%
% Doctrip file for acmart
% This file is in public domain
% $Id: acmart.ins,v 1.1 2015/11/23 22:42:55 boris Exp $
%
\def\batchfile{acmart.ins}
\input docstrip
\keepsilent
\showprogress
\askforoverwritefalse
\generate{%
\file{acmart.cls}{\from{acmart.dtx}{class}}
}
\obeyspaces
\Msg{*****************************************************}%
\Msg{* Congratulations! You successfully generated the *}%
\Msg{* acmart package. *}%
\Msg{* *}%
\Msg{* Please move the file acmart.cls to where LaTeX *}%
\Msg{* files are stored in your system. The manual is *}%
\Msg{* acmart.pdf. *}%
\Msg{* *}%
\Msg{* The package is released under LPPL *}%
\Msg{* *}%
\Msg{* Happy TeXing! *}%
\Msg{*****************************************************}%

126
main.tex
View File

@ -1,14 +1,130 @@
\documentclass{article}
\usepackage[utf8]{inputenc}
\documentclass[sigconf]{acmart}
%\documentclass{vldb}
\title{Parallelizing Python}
\author{okennedy }
\date{March 2022}
\usepackage[a-1b]{pdfx}
\usepackage{booktabs} % For formal tables
\usepackage{xspace}
\usepackage[utf8]{inputenc}
\usepackage{stmaryrd}
\usepackage{balance} % for \balance command ON LAST PAGE (only there!)
\usepackage{cleveref}
\usepackage{pifont}
\usepackage{todonotes}
\usepackage{setspace}
\usepackage{balance}
\usepackage[noend]{algpseudocode}
\usepackage{algorithm}
\usepackage{multirow}
\usepackage{colortbl}
\newcommand{\trimfigurespacing}{\vspace*{-5mm}}
\newcommand{\OK}[1]{\todo[backgroundcolor=blue!25]{\tiny \textbf{Oliver says:} #1}}
\newcommand{\BG}[1]{\todo[backgroundcolor=red!25]{\tiny \textbf{Boris says:} #1}}
\newcommand{\ND}[1]{\todo[backgroundcolor=green!25]{\tiny \textbf{Nachiket says:} #1}}
\definecolor{PineGreen}{HTML}{007B62}
\definecolor{Purple} {HTML}{99479B}
\definecolor{NavyBlue} {HTML}{006EB8}
\definecolor{BrickRed} {HTML}{B6321C}
\definecolor{Black} {HTML}{000000}
% \newcommand{\reva}[1]{\textcolor{Black}{#1}}
% \newcommand{\revb}[1]{\textcolor{Black}{#1}}
% \newcommand{\revc}[1]{\textcolor{Black}{#1}}
% \newcommand{\revm}[1]{\textcolor{Black}{#1}}
\input{preamble}
\newtheorem{example}{Example}
\newtheorem{definition}{Definition}
\newcommand{\systemname}{DAGBook\xspace}
\newcommand{\TheTitle}{\systemname: Parallel Execution with Partial Provenance}
\pagestyle{plain}
\AtBeginDocument{%
\providecommand\BibTeX{{%
\normalfont B\kern-0.5em{\scshape i\kern-0.25em b}\kern-0.8em\TeX}}}
\begin{document}
\fancyhead{}
\title{\TheTitle}
\author{Nachiket Deo}
\affiliation{%
\institution{University of Connecticut}
nachideo.em@gmail.com
}
\author{Boris Glavic}
\affiliation{%
\institution{Illinois Institute of Technology}
bglavic@iit.edu
}
\author{Oliver Kennedy}
\affiliation{%
\institution{University at Buffalo}
okennedy@buffalo.edu
}
% The default list of authors is too long for headers.
% \renewcommand{\shortauthors}{Spoth, Xie et al.}
%
% The code below should be generated by the tool at
% http://dl.acm.org/ccs.cfm
% Please copy and paste the code instead of the example below.
%
% \keywords{JSON Schemas, Independence, Markov Models}
\begin{abstract}
\input{sections/abstract.tex}
\end{abstract}
\maketitle
\section{Introduction}
\label{sec:introduction}
\input{sections/introduction.tex}
\section{\systemname}
\label{sec:system}
\input{sections/system}
\section{Task Isolation}
\label{sec:isolation}
\input{sections/isolation}
\section{Jupyter Import}
\label{sec:import}
\input{sections/import}
\section{Performance}
\label{sec:experiments}
\input{sections/experiments}
\section{Related Work}
\label{sec:related}
\input{sections/related}
\section{Conclusions}
\label{sec:conclusions}
\input{sections/conclusions}
\paragraph{Acknowledgements}
\label{sec:acknowledgements}
\input{sections/acknowledgements.tex}
\bibliographystyle{abbrv}
\balance
\bibliography{main}
\end{document}

1
sections/abstract.tex Normal file
View File

@ -0,0 +1 @@
ABSTRACT

View File

@ -0,0 +1 @@
ACKS

0
sections/conclusions.tex Normal file
View File

5
sections/experiments.tex Normal file
View File

@ -0,0 +1,5 @@
\begin{itemize}
\item Serial vs Parallel Runtime
\item Prediction accuracy
\item Tradeoffs between pruning and non-pruned (any pruning mispredictions) export lists
\end{itemize}

1
sections/import.tex Normal file
View File

@ -0,0 +1 @@
AST construction goes here

24
sections/introduction.tex Normal file
View File

@ -0,0 +1,24 @@
%!TEX root=../main.tex
Workflow systems \OK{cite several systems: Vistrails, etc...} help users to break complex tasks like ETL processes, model-fitting, and more, into a series of smaller steps.
Users explicitly declare dependencies between steps, permitting parallel execution of mutually independent steps.
A recent trend in industry has been to instead encode such tasks through computational notebooks like Jupyter or Zeppelin.
Notebooks likewise allow users to declare tasks as a series of steps, but do not require the user to explicitly declare dependencies.
Consequently, notebook execution frameworks like \OK{reference a few, e.g., netflix's} simply execute the steps of the workflow sequentially (i.e., without parallelism).
\begin{figure}
FIGURE
\caption{The number of python cells in a notebooks scraped from github~\cite{pimentel} against the number of sequential steps required}
\label{fig:parallelismSurvey}
\end{figure}
To assess the potential for improvement, we conducted a preliminary survey on an archive of Jupyter notebooks scraped from Github by Pimentel et. al.~\cite{pimentel}.
Our survey included only notebooks using a python kernel and known to execute successfully; A total of 800\OK{fill in the exact number} notebooks met these criteria.
We used the python \texttt{ast} module to construct an inter-cell dataflow graph (e.g., using the methodology of \OK{citations}).
As a proxy measure for potential speedup, we considered the depth of this graph in relation to the total number of python cells in the notebook.
\Cref{fig:parallelismSurvey} relates these measures in a XXX.
Although XXX percent of the notebooks do require sequential execution, as many as XXX percent can XXX.
In this paper, we present \systemname, a workflow system designed to facilitate parallel execution of Jupyter notebooks.

57
sections/isolation.tex Normal file
View File

@ -0,0 +1,57 @@
%!TEX root=../main.tex
A typical notebook like Jupyter maintains a single python interpreter instance (called a kernel).
Code from each cell is executed in this interpreter sequentially in this interpreter.
Python, its libraries, and other dependencies are designed under the assumption of a global interpreter lock (GIL) that only permits a single thread running at a time.
The single-kernel approach of Jupyter is thus unsuitable for use with \systemname.
Instead, \systemname defaults to running each cell's code in a freshly allocated interpreter instance.
However, running each cell in its own interpreter presents a problem for passing state between cells.
Under normal circumstances, data flow between cells in a notebook occurs through the global namespace, a dictionary of key-value pairs.
The namespace is used for everything from variables, to function, class, and module definitions, and for symbols imported from other files.
When cells are run in a single interpreter, the global namespace is preserved between cells.
In \systemname, the interpreter running a given cell must be able to reconstruct the global namespace, or at least the necessary subset of it needed to run the cell.
A naive solution would rely exclusively on object serialization, for example via python's native \texttt{pickle} library.
In this scenario, when a cell finishes executing, all elements of its global namespace are serialized and exported as artifacts into \systemname.
Conversely, when a cell accesses a symbol in the global namespace that is not already present, the corresponding artifact can be retrieved and the object deserialized\footnote{\systemname implements on-demand deserialization by using placeholder ``proxy'' objects.}
This naive solution presents three primary challenges.
First, the global namespace may contain elements that are only used within a cell; serializing and exporting these is an unnecessary performance hit and waste of space and/or memory.
Second, generic serialization mechanisms (like \texttt{pickle}) can be slow, and may not support every value (e.g., stateful values like \texttt{IO} handles, or function definitions).
Finally, exported values depend on other values in the global namespace (e.g., an exported function that uses the \texttt{global} keyword).
\paragraph{Limiting Serialization}
\systemname avoids the overhead of serializing the entire global namespace through the translation step.
As we discuss in more detail in \Cref{sec:import}, the import process predicts the list of artifacts that the cell will produce or consume.
By default, only artifacts included in this list are serialized.
However, as before, the possibility of mispredictions should not lead to incorrect outputs.
Likewise as before, over-prediction is never a problem; our focus is on entries in the global namespace that are accessed by a subsequent cell without being exported.
\paragraph{Specialized Migration}
\begin{itemize}
\item Default to pickling
\item Provide a special-case bypass (e.g. dataframes via arrow / parquet)
\item Mutable state (i.e., figuring out which variables to export)
\item Side-effecting variables:
\begin{itemize}
\item Record the characteristics of the endpoint if possible (e.g., a file) and re-open it later?
\item This is not something that I'd expect to see pipelined through multiple cells... maybe mark the value as an unpredicted read/write dependency?
\item Might be a workaround to allow checkpointed state
\end{itemize}
\item Functions and Classes
\begin{itemize}
\item Here, the code needs to be imported explicitly
\item (cloudpickle allows serializing functions... can we do that here as well)
\item Note that the exported function may introduce chained dependencies that also need to be registered in cells that use the function. These dependencies may be to cells that appear after (which means we need to extend the misprediction cases).
\end{itemize}
\end{itemize}
\paragraph{Chained Dependencies}
TODO
\paragraph{Interpreter Re-Use}
\begin{itemize}
\item Cells that re-use the same state already have a mutual dependency. We may be able to re-use the same interpreter.
\item We might also be able to re-use python state with some sort of fork/join trickery.
\item Q: is this something we can pull off here, or is this strictly future work?
\end{itemize}

5
sections/related.tex Normal file
View File

@ -0,0 +1,5 @@
\begin{itemize}
\item Provenance for jupyter or python
\item Dataflow analysis / program slicing
\item Textbook Transactions / Optimistic concurrency control
\end{itemize}

130
sections/system.tex Normal file
View File

@ -0,0 +1,130 @@
%!TEX root=../main.tex
\systemname is a workflow system, and relies on an internal representation of notebooks that differs significantly from that of Jupyter.
We present the internal representation first, and discuss how Jupyter notebooks are translated into this model in \Cref{sec:import}.
A \systemname notebook is an ordered list of atomic units of work called \emph{cells}.
Communication between cells is mediated by a \emph{scope}, a partial mapping from identifiers (e.g., variable names) to \emph{artifacts} (e.g., python literals).
We define the baseline semantics of the notebook by an in-order execution of the cells.
Each cell\ldots
(i) reads zero or more identifiers from the scope emitted by the preceding cell, and
(ii) writes zero or more identifier/artifact mappings, extending the preceding cell's scope.
We refer to the set of identifiers read (resp., written) by a cell as the cell's read- (resp., write-)set.
The scope emitted by a cell consists of all mappings written by the cell and the mappings from the preceding scope not in the cell's write set.
\begin{example}
...
\end{example}
\paragraph{Approximate Dependencies}
A full dependency analysis of python is intractable\OK{Cite}.
Even a bounded analysis is impractical; for example libraries with global state can introduce hidden dependencies.
In short, obtaining exact read- and write-sets is impossible without actually running the cell's code;
Needless to say, this is not desirable.
As a result, \systemname computes both best-effort \emph{predicted} read- and write-sets for each cell through static analysis (see \Cref{sec:import}), as well as \emph{actual} read- and write-sets once the cell is actually executed.
\begin{algorithm}
\caption{\texttt{build\_dag}$(\mathcal N)$}
\label{alg:buildDag}
\begin{algorithmic}[1]
\Require{$\mathcal N$: A sequence of cells}
\Ensure{$\mathcal G$: An edge-list for the cell dependency DAG}
\State{$\mathcal S \leftarrow \{\}$; $\mathcal G \leftarrow \{\}$}
\For{$c \in \mathcal N$}
\State{$\mathcal G \leftarrow \mathcal G \cup \{\; (c, \mathcal S[r])\;|\;r \in c.\texttt{reads}\;\}$}
% \For{$r \in c.\texttt{reads}$}
% \State{$\mathcal G \leftarrow \mathcal G \cup \{\; (c, \mathcal S[r])\;\}$}
% \EndFor
\For{$w \in c.\texttt{writes}$}
\State{$\mathcal S \leftarrow \mathcal S \cup \{\;w \mapsto c\;\}$}
\EndFor
\EndFor
\end{algorithmic}
\end{algorithm}
\paragraph{Scheduling}
Scheduling the workflow requires converting the provided sequence of cells into a partial order.
\Cref{alg:buildDag} accomplishes this by simulating sequential execution of the workflow to create a dependency DAG.
The algorithm maintains a virtual scope ($\mathcal S$), a mapping from artifact identifiers to the most recent cell to write the artifact (lines 5-6).
Each artifact read creates an edge from the reading cell to the cell that wrote the artifact read.
Once the dependency DAG is available, scheduling proceeds as expected.
All cells with no out-edges in the DAG are scheduled initially.
When a cell $c$ finishes running, it is marked as done and all cells with an out-edge to $c$ are inspected to see if they are runnable.
If all of a cell's out-edges are to cells marked done, the cell is marked as runnable and scheduled.
This process continues until all cells are marked done.
\paragraph{Prediction Error}
When the workflow is first loaded, \systemname relies on predicted read-and write-sets for each cell.
As we show, our best-effort dependency predictor has high accuracy.
However, when it fails, we need to be able to recover the workflow to a sane state.
We assume that cells are idempotent, as is typically the case for our target workloads of ETL and model-fitting\footnote{
Users can still explicitly mark cells as being non-idempotent (e.g., for a cell that deploys a model into production).
Such cells are conservatively executed serially by blocking on all preceding cels.
}.
There are four possibilities for prediction inaccuracies:
Cell execution completes without reading an artifact that was in the predicted read-set (overprediction),
the cell reads an artifact not in the predicted read-set (underprediction),
and same two possibilities for the write-set.
Overprediction, whether for reads or writes, may limit opportuniites for parallel execution.
However, it does not pose a correctness problem, and by the time it is discovered the cell will have already finished running.
The primary challenge is thus coping with reads and writes that are not in the predicted read- or write-set, respectively.
In either case, the change may add or remove edges to/from the dependency DAG, requiring re-execution of running or even already completed cells.
To streamline updates to the dependency DAG, \systemname caches the virtual input scope ($\mathcal S$) computed for each cell by \Cref{alg:buildDag} (denote by $\mathcal S_c$ the scope emitted by the cell prior to $c$).
When cell $c$ performs an underpredicted read for identifier $r$, an edge $(c, \mathcal S_c[r])$.
There are now two possibilities: either cell $\mathcal S_c[r]$ is marked done or it is not.
In the former case, the dependency has already been satisfied and execution may proceed unimpeded.
In the latter case, execution of cell $c$ is immediately paused until $S_c[r]$ has completed.
\begin{algorithm}
\caption{\texttt{update\_dag}$(\mathcal N, c_0, w)$}
\label{alg:updateDag}
\begin{algorithmic}[1]
\Require{$\mathcal N$: A sequence of cells}
\Require{$c_0 \in \mathcal N$: A cell performing an unpredicted write}
\Require{$w$: The identifier of the unpredicted write}
\Ensure{$\forall c \in \mathcal N : \mathcal S_c$: Cached input scopes}
\Ensure{$\mathcal G$: An edge-list for the cell dependency DAG}
\For{$c \in \mathcal N$ s.t. $c > c_0$}
\State{$\mathcal S_c \leftarrow \mathcal S_c - \{\;w \rightarrow *\;\} \cup \{\;w \rightarrow c_0\;\}$}
\If{$w \in c.\texttt{reads}$}
\If{$c.\texttt{done} \vee c.\texttt{running}$}
\State{\texttt{abort}$(\mathcal N, c)$}
\EndIf
\State{$\mathcal G \leftarrow \mathcal G - \{\;c \rightarrow *\;\} \cup \{\; (c, \mathcal S[r])\;|\;r \in c.\texttt{reads}\;\}$}
\EndIf
\If{$w \in c.\texttt{writes}$}
\Return
\EndIf
\EndFor
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{\texttt{abort}$(\mathcal N, c_0)$}
\label{alg:abort}
\begin{algorithmic}[1]
\Require{$\mathcal N$: A sequence of cells}
\Require{$c_0 \in \mathcal N$: A cell to abort}
\If{$c_0$.\texttt{running}}
\State{Abort $c_0$'s execution}
\ElsIf{$c_0$.\texttt{done}}
\State{Clear $c_0$'s results}
\For{$c$ s.t. $(c_0, c) \in \mathcal G$}
\State{\texttt{abort}$(\mathcal N, c)$}
\EndFor
\EndIf
\end{algorithmic}
\end{algorithm}
An unpredicted write from cell $c$ on identifier $r$ may redirect edges in the dependency DAG from another cell to $c$.
The process is summarized in \Cref{alg:updateDag}, which iterates through every cell following $c_0$ in order.
The iteration stops at the first cell (if one exists) to overwrite $w$ (line 7).
The cached scope for each cell is updated (line 2), removing prior writes to $w$ and adding $c_0$'s write.
If another cell reads the unpredicted $w$, the dependency DAG must be updated.
All out-edges from the cell are removed and recomputed (lines 6).
If the cell was already running or has already completed (lines 4-5), its execution is stopped, results are cleared, and any running dependencies are recursively aborted (\Cref{alg:abort}).