model
parent
cd1a2329d8
commit
39c98f7204
2
main.tex
2
main.tex
|
@ -4,6 +4,7 @@
|
|||
\usepackage{todonotes}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{xspace}
|
||||
\usepackage{colortbl}
|
||||
|
||||
\input{macros}
|
||||
|
||||
|
@ -157,6 +158,7 @@
|
|||
\maketitle
|
||||
|
||||
\input{sections/introduction}
|
||||
\input{sections/model}
|
||||
\input{sections/overview}
|
||||
\input{sections/formalism}
|
||||
\input{sections/system}
|
||||
|
|
|
@ -2,32 +2,32 @@
|
|||
\section{Formalism}
|
||||
\label{sec:formalism}
|
||||
|
||||
\newcommand{\comprehension}[2]{\left\{\;#1\;|\;#2\;\right\}}
|
||||
% \newcommand{\comprehension}[2]{\left\{\;#1\;|\;#2\;\right\}}
|
||||
|
||||
\newcommand{\spreadsheet}{\mathbb S}
|
||||
\newcommand{\encodedSpreadsheet}{\hat \spreadsheet}
|
||||
\newcommand{\columnDomain}{\mathcal C}
|
||||
\newcommand{\columnRange}{C}
|
||||
\newcommand{\column}{c}
|
||||
\newcommand{\rowDomain}{\mathcal R}
|
||||
\newcommand{\rowRange}{R}
|
||||
\newcommand{\row}{r}
|
||||
\newcommand{\rowOffset}{\delta}
|
||||
\newcommand{\cell}{a}
|
||||
\newcommand{\exprDomain}{\mathcal E}
|
||||
\newcommand{\expr}{e}
|
||||
\newcommand{\patternDomain}{\mathcal P}
|
||||
\newcommand{\pattern}{P}
|
||||
\newcommand{\valueDomain}{\mathcal V}
|
||||
\newcommand{\rangeOf}[2]{(#1\texttt{:}#2)}
|
||||
\newcommand{\range}{\rangeOf{\columnRange}{\rowRange}}
|
||||
\newcommand{\cellRef}[2]{(#1\texttt{:}#2)}
|
||||
\newcommand{\evalOf}[2]{\left\llbracket\; #2 \;\right\rrbracket_{#1}}
|
||||
\newcommand{\patternOf}[2]{\left\llbracket\; #1 \;\right\rrbracket_{#2}}
|
||||
\newcommand{\depsOf}[1]{\textbf{deps}\left(#1\right)}
|
||||
\newcommand{\tdepsOf}[1]{\ensuremath{\text{\textbf{deps}\ast\left(#1\right)}\xspace}}
|
||||
\newcommand{\DG}[1]{\ensuremath{G_{#1}}\xspace}
|
||||
\newcommand{\TDG}[1]{\ensuremath{G_{#1}^*}\xspace}
|
||||
% \newcommand{\spreadsheet}{\mathbb S}
|
||||
% \newcommand{\encodedSpreadsheet}{\hat \spreadsheet}
|
||||
% \newcommand{\columnDomain}{\mathcal C}
|
||||
% \newcommand{\columnRange}{C}
|
||||
% \newcommand{\column}{c}
|
||||
% \newcommand{\rowDomain}{\mathcal R}
|
||||
% \newcommand{\rowRange}{R}
|
||||
% \newcommand{\row}{r}
|
||||
% \newcommand{\rowOffset}{\delta}
|
||||
% \newcommand{\cell}{a}
|
||||
% \newcommand{\exprDomain}{\mathcal E}
|
||||
% \newcommand{\expr}{e}
|
||||
% \newcommand{\patternDomain}{\mathcal P}
|
||||
% \newcommand{\pattern}{P}
|
||||
% \newcommand{\valueDomain}{\mathcal V}
|
||||
% \newcommand{\rangeOf}[2]{(#1\texttt{:}#2)}
|
||||
% \newcommand{\range}{\rangeOf{\columnRange}{\rowRange}}
|
||||
% \newcommand{\cellRef}[2]{(#1\texttt{:}#2)}
|
||||
% \newcommand{\evalOf}[2]{\left\llbracket\; #2 \;\right\rrbracket_{#1}}
|
||||
% \newcommand{\patternOf}[2]{\left\llbracket\; #1 \;\right\rrbracket_{#2}}
|
||||
% \newcommand{\depsOf}[1]{\textbf{deps}\left(#1\right)}
|
||||
% \newcommand{\tdepsOf}[1]{\ensuremath{\text{\textbf{deps}\ast\left(#1\right)}\xspace}}
|
||||
% \newcommand{\DG}[1]{\ensuremath{G_{#1}}\xspace}
|
||||
% \newcommand{\TDG}[1]{\ensuremath{G_{#1}^*}\xspace}
|
||||
|
||||
Let $\columnDomain$ and $\rowDomain$ denote a domain of column and row (respectively) labels, and let $\exprDomain$ and $\valueDomain$ denote a domain of expressions and values; We will define $\exprDomain$ in greater detail below.
|
||||
We define a \emph{spreadsheet} $\spreadsheet : (\columnDomain \times \rowDomain) \rightarrow \exprDomain$ as a mapping from \emph{cells} ($(\column, \row) \in (\columnDomain \times \rowDomain)$) to expressions.
|
||||
|
|
|
@ -0,0 +1,185 @@
|
|||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Spreadsheet Datamodel}
|
||||
\label{sec:spre-datam}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Colors
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\definecolor{tabexprcolor}{rgb}{0,0.5,0}
|
||||
|
||||
|
||||
\newcommand{\comprehension}[2]{\left\{\;#1\;|\;#2\;\right\}}
|
||||
|
||||
\newcommand{\ds}{D}
|
||||
\newcommand{\spreadsheet}{\mathbb S}
|
||||
\newcommand{\encodedSpreadsheet}{\hat \spreadsheet}
|
||||
\newcommand{\columnDomain}{\mathcal C}
|
||||
\newcommand{\columnRange}{C}
|
||||
\newcommand{\column}{c}
|
||||
\newcommand{\rowDomain}{\mathcal R}
|
||||
\newcommand{\rowRange}{R}
|
||||
\newcommand{\row}{r}
|
||||
\newcommand{\rowOffset}{\delta}
|
||||
\newcommand{\cell}{a}
|
||||
\newcommand{\exprDomain}{\mathcal E}
|
||||
\newcommand{\expr}{e}
|
||||
\newcommand{\patternDomain}{\mathcal P}
|
||||
\newcommand{\pattern}{P}
|
||||
\newcommand{\valueDomain}{\mathcal V}
|
||||
\newcommand{\valat}[3]{#1[#2,#3]}
|
||||
\newcommand{\rangeOf}[2]{(#1\texttt{:}#2)}
|
||||
\newcommand{\range}{\rangeOf{\columnRange}{\rowRange}}
|
||||
\newcommand{\cellRef}[2]{(#1,#2)}
|
||||
\newcommand{\evalOf}[2]{\left\llbracket\; #2 \;\right\rrbracket_{#1}}
|
||||
\newcommand{\patternOf}[2]{\left\llbracket\; #1 \;\right\rrbracket_{#2}}
|
||||
\newcommand{\depsOf}[1]{\textbf{deps}\left(#1\right)}
|
||||
\newcommand{\tdepsOf}[1]{\ensuremath{\text{\textbf{deps}\ast\left(#1\right)}\xspace}}
|
||||
\newcommand{\DG}[1]{\ensuremath{G_{#1}}\xspace}
|
||||
\newcommand{\TDG}[1]{\ensuremath{G_{#1}^*}\xspace}
|
||||
\newcommand{\errorval}{\ensuremath{\bot}\xspace}
|
||||
\newcommand{\rframe}{\ensuremath{\mathcal{F}}\xspace}
|
||||
\newcommand{\se}[1]{{\textcolor{tabexprcolor}{#1}}}
|
||||
\newcommand{\uv}[1]{{\textcolor{red}{#1}}}
|
||||
\newcommand{\upd}{U}
|
||||
\newcommand{\cu}[3]{#1#2 = #3}
|
||||
\newcommand{\cellupdate}{u}
|
||||
\newcommand{\dom}{\textsc{Dom}}
|
||||
\newcommand{\acu}{\cu{\column}{\row}{\expr}}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Spreadsheets}
|
||||
\label{sec:spreadsheets}
|
||||
|
||||
Let $\columnDomain$ denote a domain of column labels and $\rowDomain \subset \mathbb{Z}$ be a set of row positions, and let $\exprDomain$ and $\valueDomain$ denote a domain of expressions and values; We will define $\exprDomain$ in greater detail below.
|
||||
We define a \emph{spreadsheet} $\spreadsheet : (\columnDomain \times \rowDomain) \rightarrow \exprDomain$ as a partial mapping from \emph{cells} ($(\column, \row) \in (\columnDomain \times \mathbb{Z})$) to expressions. We use $\valat{\spreadsheet}{\column}{\row}$ to denote $\spreadsheet((\column, \row))$ and set $\valat{\spreadsheet}{\column}{\row} = \errorval$ if $(\column,\row)$ is not in the domain of $\spreadsheet$ where $\errorval$ is a distinguished error value from $\valueDomain$. For convenience, we will use $\column\row$ as a shorthand for $(\column, \row)$.
|
||||
|
||||
An expression $\expr \in \exprDomain$ is a formula defined over literals from $\valueDomain$, the standard arithmetic operators, and references to other cells in the spreadsheet ($\cellRef{\column}{\row}$).
|
||||
The expression $\expr$ may be evaluated in the context of a spreadsheet ($\evalOf{\spreadsheet}{\cdot} : \exprDomain \rightarrow \valueDomain$) as follows:
|
||||
(i) Literals evaluate to themselves, (ii) Arithmetic formulas are evaluated in the usual way, and (iii) References to the spreadsheet are evaluated recursively:
|
||||
$$\evalOf{\spreadsheet}{\cellRef{\column}{\row}} \equiv \evalOf{\spreadsheet}{\spreadsheet(\column, \row)}$$
|
||||
By convention, cyclic references evaluate to the distinguished error value $\errorval$ in $\valueDomain$.
|
||||
|
||||
We define the dependencies of an expression ($\depsOf{\expr}$) to be the set of cells referenced by $\expr$.
|
||||
Expression dependencies induce a graph $\DG{\spreadsheet}\tuple{V, E}$ over the spreadsheet, where each cell is a node (i.e., $V = \columnDomain \times \rowDomain$), and each dependency is a (directed) edge:
|
||||
$$E = \bigcup_{\cell \in \columnDomain \times \rowDomain}
|
||||
\{\;\cell \rightarrow \cell'\;|\;\cell' \in \depsOf{\spreadsheet(\cell)}\;\} $$
|
||||
We use $\TDG{\spreadsheet}$ to denote the graph $\tuple{V,E^*}$ where $E^*$ is the transitive closure of $E$, i.e., $\TDG{\spreadsheet}$ stores both direct and indirect dependencies among the cells in the spreadsheet.
|
||||
|
||||
We define a \emph{range} $\rangeOf{\columnRange}{\rowRange}$ as the Cartesian product of a set of columns ($\columnRange \subseteq \columnDomain$) and range of row positions ($R = [l,h] \in \mathbb{Z} \times \mathbb{Z}$).
|
||||
$$\rangeOf{\columnRange}{\rowRange} \equiv \{\;(\column, \row)\;|\;\column\in \columnRange, \row \in \rowRange\;\}$$
|
||||
|
||||
Note that if all cells expressions of a spreadsheet are constants (a spreadsheet without formulas), then $\evalOf{\spreadsheet}{\cellRef{\column}{\row}} = \valat{\spreadsheet}{\column}{\row}$.
|
||||
|
||||
As an example, consider the spreadsheet shown on the top of \Cref{fig:example-spreadsheet-and-a}. The expressions in columns \emph{A} and \emph{B} are constant expressions while the expressions in column \emph{C} are arithmetic expressions that references cells from columns \emph{A} and \emph{B}. The result of evaluating the spreadsheet to assign each cell a concrete value is shown on the top right of this figure, e.g., \emph{C1} evaluates to $\evalOf{\spreadsheet}{A1 + B1} = \evalOf{\spreadsheet}{A1} + \evalOf{\spreadsheet}{B1} = 15 + 50 = 65$.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{figure}[t]
|
||||
\centering
|
||||
\newcommand{\tabhead}[1]{\cellcolor{black}{\textcolor{white}{#1}}}
|
||||
|
||||
\begin{minipage}{0.48\linewidth}
|
||||
\centering
|
||||
\textbf{Spreadsheet $\spreadsheet$}\\
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{tabular}{c|c|c|c|}
|
||||
\cline{2-4}
|
||||
& \tabhead{A} & \tabhead{B} & \tabhead{C}\\ \hline
|
||||
\tabhead{1} &\se{15} &\se{50} &\se{A1 + B1}\\ \hline
|
||||
\tabhead{2} &\se{20} &\se{60} &\se{A2 + B2} \\ \hline
|
||||
\tabhead{3} &\se{25} &\se{100} &\se{A3 + B3}\\ \hline
|
||||
\tabhead{4} &\se{50} &\se{0} &\se{A4 + B4}\\ \hline
|
||||
\end{tabular}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\end{minipage} %
|
||||
%
|
||||
\begin{minipage}{0.49\linewidth}
|
||||
\centering
|
||||
\textbf{Evaluated Spreadsheet $\evalOf{\spreadsheet}{\cdot}$}\\
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{tabular}{c|c|c|c|}
|
||||
\cline{2-4}
|
||||
& \tabhead{A} & \tabhead{B} & \tabhead{C}\\ \hline
|
||||
\tabhead{1} & 15 & 50 & 65\\ \hline
|
||||
\tabhead{2} & 20 & 60 & 80 \\ \hline
|
||||
\tabhead{3} & 25 & 100 & 125\\ \hline
|
||||
\tabhead{4} & 50 & 0 & 50\\ \hline
|
||||
\end{tabular}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\end{minipage}
|
||||
$,$\\ \vspace{5mm}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{minipage}{1 \linewidth}
|
||||
\centering
|
||||
\textbf{Update} $\upd = \{ \cu{A}{1}{20}, \cu{C}{3}{2 \cdot A3 + B3} \}$
|
||||
\end{minipage}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
$,$\\ %\vspace{5mm}
|
||||
\begin{minipage}{0.48\linewidth}
|
||||
\centering
|
||||
\textbf{Updated Spreadsheet $\upd(\spreadsheet)$}\\
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{tabular}{c|c|c|c|}
|
||||
\cline{2-4}
|
||||
& \tabhead{A} & \tabhead{B} & \tabhead{C}\\ \hline
|
||||
\tabhead{1} &\uv{20} &\se{50} &\se{A1 + B1}\\ \hline
|
||||
\tabhead{2} &\se{20} &\se{60} &\se{A2 + B2} \\ \hline
|
||||
\tabhead{3} &\se{25} &\se{100} &\uv{2 $\cdot$ A3 + B3}\\ \hline
|
||||
\tabhead{4} &\se{50} &\se{0} &\se{A4 + B4}\\ \hline
|
||||
\end{tabular}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\end{minipage} %
|
||||
%
|
||||
\begin{minipage}{0.49\linewidth}
|
||||
\centering
|
||||
\textbf{Evaluated Spreadsheet $\evalOf{\upd(\spreadsheet)}{\cdot}$}\\
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{tabular}{c|c|c|c|}
|
||||
\cline{2-4}
|
||||
& \tabhead{A} & \tabhead{B} & \tabhead{C}\\ \hline
|
||||
\tabhead{1} & \uv{20} & 50 & \uv{70}\\ \hline
|
||||
\tabhead{2} & 20 & 60 & 80 \\ \hline
|
||||
\tabhead{3} & 25 & 100 & \uv{150}\\ \hline
|
||||
\tabhead{4} & 50 & 0 & 50\\ \hline
|
||||
\end{tabular}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\end{minipage}
|
||||
|
||||
\caption{Example spreadsheet (expressions are shown in \textcolor{tabexprcolor}{dark green} to distinguish them from values), result of evaluating the spreadsheet, and an update applied to the spreadsheet (updated expressions and values are shown in \uv{red}).}\label{fig:example-spreadsheet-and-a}
|
||||
\end{figure}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Updates}
|
||||
\label{sec:updates}
|
||||
|
||||
An update $\upd$ to a spreadsheet is a set of cell updates $\cellupdate$ of the form $\acu$ which assigns to cell $\column\row$ the expression $\expr$. For an update $\upd$, $\dom(\upd)$ called the domain of the updates contains all cells $\column\row$ such that there exists $\cellupdate \in \upd$ such that $\cellupdate = (\acu)$, i.e., the update modified the expression of cell $\column\row$. Applying an update $\upd$ to a spreadsheet $\spreadsheet$ return an updated spreadsheet $\upd(\spreadsheet)$ defined as:
|
||||
|
||||
\[
|
||||
\valat{\upd(\spreadsheet)}{\column}{\row} =
|
||||
\begin{cases}
|
||||
\upd({\column}{\row}) &\text{\textbf{if}}\,\, \column\row \in \dom(\upd)\\
|
||||
\valat{\spreadsheet}{\column}{\row} &\text{\textbf{otherwise}}\\
|
||||
\end{cases}
|
||||
\]
|
||||
|
||||
Note that this way to represent updates can be quite verbose, e.g., if inserting a row at some position in the spreadsheet, the expressions of all following rows have to be updated as they are moved down by one row.
|
||||
We will discuss the more compact representation of updates as overlays used by our approach in \BG{section}
|
||||
For example, consider the update shown in \Cref{fig:example-spreadsheet-and-a}. This update changes the constant expression in cell \emph{A1} and the arithmetic expression in cell \emph{C3}. When evaluating the resulting spreadsheet $\upd(\spreadsheet)$, the values of three cells (shown in red), the values of three cells are changed.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Spreadsheet Access to Datasets}
|
||||
\label{sec:spre-access-datas}
|
||||
|
||||
To uniformly model spreadsheet access to relational data as well as to data already represented as spreadsheets, we assume an input dataset $\ds$ and let $\columnDomain$ and $\rowDomain$ denote a domain of column and row (respectively) labels for $\ds$.
|
||||
For a relational table, $\columnDomain$ are the columns of the table and $\rowDomain$ could be the values of a list of key attributes or be $\mathbb{N}$ representing the order of a row on disk. If $\ds$ is a spreadsheet input dataset, then $\rowDomain$ is $\mathbb{N}$ and encodes the position of the row. For a dataset $\ds$ we use $\valat{\ds}{\row}{\column}$ to denote that value of column $\column$ of row $\row$ in $\ds$.
|
||||
|
||||
A \emph{spreadsheet overlay} for a dataset $\ds$ is a pair $(\ds, \rframe)$ where $\rframe: \rowDomain \to \mathbb{Z}$ determines the positions of rows from $\ds$
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
%%% Local Variables:
|
||||
%%% mode: latex
|
||||
%%% TeX-master: "../main"
|
||||
%%% End:
|
|
@ -4,7 +4,6 @@
|
|||
|
||||
\BG{define the spreadsheet model first?}
|
||||
|
||||
\newcommand{\errorval}{\ensuremath{\bot}\xspace}
|
||||
|
||||
A spreadsheet is a regular grid of cells, which are defined by formulas.
|
||||
A cell's formula may be a literal value, or an expression defining a computation that may be based on the value of other cells.
|
||||
|
|
Loading…
Reference in New Issue