Merge branch 'main' of git.odin.cse.buffalo.edu:VizierDB/paper-Vizier-SpreadsheetOverlay

main
Oliver Kennedy 2023-03-20 10:32:33 -04:00
commit 772ba3aaa7
Signed by: okennedy
GPG Key ID: 3E5F9B3ABD3FDB60
5 changed files with 324 additions and 26 deletions

View File

@ -2,6 +2,7 @@
\newcommand{\tuple}[1]{\left<#1\right>}
\newcommand{\partitle}[1]{\medskip\noindent\textbf{#1.}}
\usepackage{todonotes}
% \usepackage[disable]{todonotes}

View File

@ -4,6 +4,7 @@
\usepackage{todonotes}
\usepackage{amsmath}
\usepackage{xspace}
\usepackage{colortbl}
\usepackage{listings}
\usepackage{algorithm}
\usepackage{algpseudocode}
@ -160,6 +161,7 @@
\maketitle
\input{sections/introduction}
\input{sections/model}
\input{sections/overview}
\input{sections/formalism}
\input{sections/system}

View File

@ -2,32 +2,32 @@
\section{Formalism}
\label{sec:formalism}
\newcommand{\comprehension}[2]{\left\{\;#1\;|\;#2\;\right\}}
% \newcommand{\comprehension}[2]{\left\{\;#1\;|\;#2\;\right\}}
\newcommand{\spreadsheet}{\mathbb S}
\newcommand{\encodedSpreadsheet}{\hat \spreadsheet}
\newcommand{\columnDomain}{\mathcal C}
\newcommand{\columnRange}{C}
\newcommand{\column}{c}
\newcommand{\rowDomain}{\mathcal R}
\newcommand{\rowRange}{R}
\newcommand{\row}{r}
\newcommand{\rowOffset}{\delta}
\newcommand{\cell}{a}
\newcommand{\exprDomain}{\mathcal E}
\newcommand{\expr}{e}
\newcommand{\patternDomain}{\mathcal P}
\newcommand{\pattern}{P}
\newcommand{\valueDomain}{\mathcal V}
\newcommand{\rangeOf}[2]{(#1\texttt{:}#2)}
\newcommand{\range}{\rangeOf{\columnRange}{\rowRange}}
\newcommand{\cellRef}[2]{(#1\texttt{:}#2)}
\newcommand{\evalOf}[2]{\left\llbracket\; #2 \;\right\rrbracket_{#1}}
\newcommand{\patternOf}[2]{\left\llbracket\; #1 \;\right\rrbracket_{#2}}
\newcommand{\depsOf}[1]{\textbf{deps}\left(#1\right)}
\newcommand{\tdepsOf}[1]{\ensuremath{\text{\textbf{deps}\ast\left(#1\right)}\xspace}}
\newcommand{\DG}[1]{\ensuremath{G_{#1}}\xspace}
\newcommand{\TDG}[1]{\ensuremath{G_{#1}^*}\xspace}
% \newcommand{\spreadsheet}{\mathbb S}
% \newcommand{\encodedSpreadsheet}{\hat \spreadsheet}
% \newcommand{\columnDomain}{\mathcal C}
% \newcommand{\columnRange}{C}
% \newcommand{\column}{c}
% \newcommand{\rowDomain}{\mathcal R}
% \newcommand{\rowRange}{R}
% \newcommand{\row}{r}
% \newcommand{\rowOffset}{\delta}
% \newcommand{\cell}{a}
% \newcommand{\exprDomain}{\mathcal E}
% \newcommand{\expr}{e}
% \newcommand{\patternDomain}{\mathcal P}
% \newcommand{\pattern}{P}
% \newcommand{\valueDomain}{\mathcal V}
% \newcommand{\rangeOf}[2]{(#1\texttt{:}#2)}
% \newcommand{\range}{\rangeOf{\columnRange}{\rowRange}}
% \newcommand{\cellRef}[2]{(#1\texttt{:}#2)}
% \newcommand{\evalOf}[2]{\left\llbracket\; #2 \;\right\rrbracket_{#1}}
% \newcommand{\patternOf}[2]{\left\llbracket\; #1 \;\right\rrbracket_{#2}}
% \newcommand{\depsOf}[1]{\textbf{deps}\left(#1\right)}
% \newcommand{\tdepsOf}[1]{\ensuremath{\text{\textbf{deps}\ast\left(#1\right)}\xspace}}
% \newcommand{\DG}[1]{\ensuremath{G_{#1}}\xspace}
% \newcommand{\TDG}[1]{\ensuremath{G_{#1}^*}\xspace}
Let $\columnDomain$ and $\rowDomain$ denote a domain of column and row (respectively) labels, and let $\exprDomain$ and $\valueDomain$ denote a domain of expressions and values; We will define $\exprDomain$ in greater detail below.
We define a \emph{spreadsheet} $\spreadsheet : (\columnDomain \times \rowDomain) \rightarrow \exprDomain$ as a mapping from \emph{cells} ($(\column, \row) \in (\columnDomain \times \rowDomain)$) to expressions.

296
sections/model.tex Normal file
View File

@ -0,0 +1,296 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Spreadsheet Datamodel}
\label{sec:spre-datam}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Colors
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\definecolor{tabexprcolor}{rgb}{0,0.5,0}
\newcommand{\comprehension}[2]{\left\{\;#1\;|\;#2\;\right\}}
\newcommand{\ds}{D}
\newcommand{\spreadsheet}{\mathbb S}
\newcommand{\encodedSpreadsheet}{\hat \spreadsheet}
\newcommand{\columnDomain}{\mathcal C}
\newcommand{\columnRange}{C}
\newcommand{\column}{c}
\newcommand{\rowDomain}{\mathcal R}
\newcommand{\rowRange}{R}
\newcommand{\row}{r}
\newcommand{\rowOffset}{\delta}
\newcommand{\cell}{a}
\newcommand{\exprDomain}{\mathcal E}
\newcommand{\expr}{e}
\newcommand{\patternDomain}{\mathcal P}
\newcommand{\pattern}{P}
\newcommand{\valueDomain}{\mathcal V}
\newcommand{\valat}[3]{#1[#2,#3]}
\newcommand{\rangeOf}[2]{(#1\texttt{:}#2)}
\newcommand{\range}{\rangeOf{\columnRange}{\rowRange}}
\newcommand{\cellRef}[2]{(#1,#2)}
\newcommand{\evalOf}[2]{\left\llbracket\; #2 \;\right\rrbracket_{#1}}
\newcommand{\patternOf}[2]{\left\llbracket\; #1 \;\right\rrbracket_{#2}}
\newcommand{\depsOf}[1]{\textbf{deps}\left(#1\right)}
\newcommand{\tdepsOf}[1]{\ensuremath{\text{\textbf{deps}\ast\left(#1\right)}\xspace}}
\newcommand{\DG}[1]{\ensuremath{G_{#1}}\xspace}
\newcommand{\TDG}[1]{\ensuremath{G_{#1}^*}\xspace}
\newcommand{\errorval}{\ensuremath{\bot}\xspace}
\newcommand{\se}[1]{{\textcolor{tabexprcolor}{#1}}}
\newcommand{\uv}[1]{{\textcolor{red}{#1}}}
\newcommand{\upd}{U}
\newcommand{\cu}[3]{#1#2 = #3}
\newcommand{\cellupdate}{u}
\newcommand{\dom}{\textsc{Dom}}
\newcommand{\acu}{\cu{\column}{\row}{\expr}}
\newcommand{\rframe}{\ensuremath{\mathcal{F}}\xspace}
\newcommand{\rtrans}{\ensuremath{\mathcal{T}}\xspace}
\newcommand{\overlay}{\mathcal{O}}
\newcommand{\ol}[2]{\tuple{#1,#2}}
\newcommand{\oins}{\mathcal{I}}
\newcommand{\oup}{\mathcal{U}}
\newcommand{\aol}{\ol{\rtrans}{\oup}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Spreadsheets}
\label{sec:spreadsheets}
Let $\columnDomain$ denote a domain of column labels and $\rowDomain \subset \mathbb{Z}$ be a set of row positions, and let $\exprDomain$ and $\valueDomain$ denote a domain of expressions and values; We will define $\exprDomain$ in greater detail below.
We define a \emph{spreadsheet} $\spreadsheet : (\columnDomain \times \rowDomain) \rightarrow \exprDomain$ as a partial mapping from \emph{cells} ($(\column, \row) \in (\columnDomain \times \mathbb{Z})$) to expressions. We use $\valat{\spreadsheet}{\column}{\row}$ to denote $\spreadsheet((\column, \row))$ and set $\valat{\spreadsheet}{\column}{\row} = \errorval$ if $(\column,\row)$ is not in the domain of $\spreadsheet$ where $\errorval$ is a distinguished error value from $\valueDomain$. For convenience, we will use $\column\row$ as a shorthand for $(\column, \row)$.
An expression $\expr \in \exprDomain$ is a formula defined over literals from $\valueDomain$, the standard arithmetic operators, and references to other cells in the spreadsheet ($\cellRef{\column}{\row}$).
The expression $\expr$ may be evaluated in the context of a spreadsheet ($\evalOf{\spreadsheet}{\cdot} : \exprDomain \rightarrow \valueDomain$) as follows:
(i) Literals evaluate to themselves, (ii) Arithmetic formulas are evaluated in the usual way, and (iii) References to the spreadsheet are evaluated recursively:
$$\evalOf{\spreadsheet}{\cellRef{\column}{\row}} \equiv \evalOf{\spreadsheet}{\spreadsheet(\column, \row)}$$
By convention, cyclic references evaluate to the distinguished error value $\errorval$ in $\valueDomain$.
We define the dependencies of an expression ($\depsOf{\expr}$) to be the set of cells referenced by $\expr$.
Expression dependencies induce a graph $\DG{\spreadsheet}\tuple{V, E}$ over the spreadsheet, where each cell is a node (i.e., $V = \columnDomain \times \rowDomain$), and each dependency is a (directed) edge:
$$E = \bigcup_{\cell \in \columnDomain \times \rowDomain}
\{\;\cell \rightarrow \cell'\;|\;\cell' \in \depsOf{\spreadsheet(\cell)}\;\} $$
We use $\TDG{\spreadsheet}$ to denote the graph $\tuple{V,E^*}$ where $E^*$ is the transitive closure of $E$, i.e., $\TDG{\spreadsheet}$ stores both direct and indirect dependencies among the cells in the spreadsheet.
Note that if all cells expressions of a spreadsheet are constants (a spreadsheet without formulas), then $\evalOf{\spreadsheet}{\cellRef{\column}{\row}} = \valat{\spreadsheet}{\column}{\row}$.
As an example, consider the spreadsheet shown on the top of \Cref{fig:example-spreadsheet-and-a}. The expressions in columns \emph{A} and \emph{B} are constant expressions while the expressions in column \emph{C} are arithmetic expressions that references cells from columns \emph{A} and \emph{B}. The result of evaluating the spreadsheet to assign each cell a concrete value is shown on the top right of this figure, e.g., \emph{C1} evaluates to $\evalOf{\spreadsheet}{A1 + B1} = \evalOf{\spreadsheet}{A1} + \evalOf{\spreadsheet}{B1} = 15 + 50 = 65$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{figure}[t]
\centering
\newcommand{\tabhead}[1]{\cellcolor{black}{\textcolor{white}{#1}}}
\begin{minipage}{0.48\linewidth}
\centering
\textbf{Spreadsheet $\spreadsheet$}\\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{tabular}{c|c|c|c|}
\cline{2-4}
& \tabhead{A} & \tabhead{B} & \tabhead{C}\\ \hline
\tabhead{1} &\se{15} &\se{50} &\se{A1 + B1}\\ \hline
\tabhead{2} &\se{20} &\se{60} &\se{A2 + B2} \\ \hline
\tabhead{3} &\se{25} &\se{100} &\se{A3 + B3}\\ \hline
\tabhead{4} &\se{50} &\se{0} &\se{A4 + B4}\\ \hline
\end{tabular}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{minipage} %
%
\begin{minipage}{0.49\linewidth}
\centering
\textbf{Evaluated Spreadsheet $\evalOf{\spreadsheet}{\cdot}$}\\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{tabular}{c|c|c|c|}
\cline{2-4}
& \tabhead{A} & \tabhead{B} & \tabhead{C}\\ \hline
\tabhead{1} & 15 & 50 & 65\\ \hline
\tabhead{2} & 20 & 60 & 80 \\ \hline
\tabhead{3} & 25 & 100 & 125\\ \hline
\tabhead{4} & 50 & 0 & 50\\ \hline
\end{tabular}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{minipage}
$,$\\ \vspace{5mm}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{minipage}{1 \linewidth}
\centering
\textbf{Update} $\upd = \{ \cu{A}{1}{20}, \cu{C}{3}{2 \cdot A3 + B3} \}$
\end{minipage}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$,$\\ %\vspace{5mm}
\begin{minipage}{0.48\linewidth}
\centering
\textbf{Updated Spreadsheet $\upd(\spreadsheet)$}\\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{tabular}{c|c|c|c|}
\cline{2-4}
& \tabhead{A} & \tabhead{B} & \tabhead{C}\\ \hline
\tabhead{1} &\uv{20} &\se{50} &\se{A1 + B1}\\ \hline
\tabhead{2} &\se{20} &\se{60} &\se{A2 + B2} \\ \hline
\tabhead{3} &\se{25} &\se{100} &\uv{2 $\cdot$ A3 + B3}\\ \hline
\tabhead{4} &\se{50} &\se{0} &\se{A4 + B4}\\ \hline
\end{tabular}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{minipage} %
%
\begin{minipage}{0.49\linewidth}
\centering
\textbf{Evaluated Spreadsheet $\evalOf{\upd(\spreadsheet)}{\cdot}$}\\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{tabular}{c|c|c|c|}
\cline{2-4}
& \tabhead{A} & \tabhead{B} & \tabhead{C}\\ \hline
\tabhead{1} & \uv{20} & 50 & \uv{70}\\ \hline
\tabhead{2} & 20 & 60 & 80 \\ \hline
\tabhead{3} & 25 & 100 & \uv{150}\\ \hline
\tabhead{4} & 50 & 0 & 50\\ \hline
\end{tabular}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{minipage}
\caption{Example spreadsheet (expressions are shown in \textcolor{tabexprcolor}{dark green} to distinguish them from values), result of evaluating the spreadsheet, and an update applied to the spreadsheet (updated expressions and values are shown in \uv{red}).}\label{fig:example-spreadsheet-and-a}
\end{figure}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Updates}
\label{sec:updates}
An update $\upd$ to a spreadsheet is a set of cell updates $\cellupdate$ of the form $\acu$ which assigns to cell $\column\row$ the expression $\expr$. For an update $\upd$, $\dom(\upd)$ called the domain of the updates contains all cells $\column\row$ such that there exists $\cellupdate \in \upd$ such that $\cellupdate = (\acu)$, i.e., the update modified the expression of cell $\column\row$. Applying an update $\upd$ to a spreadsheet $\spreadsheet$ return an updated spreadsheet $\upd(\spreadsheet)$ defined as:
\[
\valat{\upd(\spreadsheet)}{\column}{\row} =
\begin{cases}
\upd({\column}{\row}) &\text{\textbf{if}}\,\, \column\row \in \dom(\upd)\\
\valat{\spreadsheet}{\column}{\row} &\text{\textbf{otherwise}}\\
\end{cases}
\]
Note that this way to represent updates can be quite verbose, e.g., if inserting a row at some position in the spreadsheet, the expressions of all following rows have to be updated as they are moved down by one row.
We will discuss the more compact representation of updates as overlays used by our approach in \BG{section}
For example, consider the update shown in \Cref{fig:example-spreadsheet-and-a}. This update changes the constant expression in cell \emph{A1} and the arithmetic expression in cell \emph{C3}. When evaluating the resulting spreadsheet $\upd(\spreadsheet)$, the values of three cells (shown in red), the values of three cells are changed.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Spreadsheet Access to Datasets}
\label{sec:spre-access-datas}
To uniformly model spreadsheet access to relational data as well as to data already represented as spreadsheets, we assume an input dataset $\ds$ and let $\columnDomain$ and $\rowDomain$ denote a domain of column and row (respectively) labels for $\ds$.
For a relational table, $\columnDomain$ are the columns of the table and $\rowDomain$ could be the values of a list of key attributes or be $\mathbb{N}$ representing the order of a row on disk. If $\ds$ is a spreadsheet input dataset, then $\rowDomain$ is $\mathbb{N}$ and encodes the position of the row. For a dataset $\ds$ we use $\valat{\ds}{\row}{\column}$ to denote that value of column $\column$ of row $\row$ in $\ds$.
A \emph{spreadsheet overlay} for a dataset $\ds$ is a pair $(\ds, \rframe)$ where $\rframe: \rowDomain \to \mathbb{Z}$ called a reference frame is an injective map that determines the positions of rows from $\ds$ in the spreadsheet. Such an overlay defines a spreadsheet $\spreadsheet_{\ds, \rframe}$:
\[
\valat{\spreadsheet_{\ds, \rframe}}{\column}{\row} = \valat{\ds}{\column}{\rframe^{-1}(\row)}
\]
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Overlay Updates}
\label{sec:overlay-updates}
We now introduce how to represent updates compactly as \emph{overlays}. An overlay update $\overlay = \aol$ encodes a update $\rtrans$ to the reference frame $\rframe$ of an overlay spreadsheet and a pattern update $\oup$ that compactly encodes updates to the formulas in a range of cells as a templates formula where cell references are expressed as offsets (and is also used to express insertions). We next discuss these components in more detail and then define the result of applying an overlay update $\overlay$ to a spreadsheet overlay $(\rframe, \ds)$. In overlays we make use of ranges to compactly represent insertions and updates.
A \emph{range} $\rangeOf{\columnRange}{\rowRange}$ is the Cartesian product of a set of columns ($\columnRange \subseteq \columnDomain$) and range of row positions ($R = [l,h] \in \mathbb{Z} \times \mathbb{Z}$):
$$\rangeOf{\columnRange}{\rowRange} \equiv \{\;(\column, \row)\;|\;\column\in \columnRange, \row \in \rowRange\;\}$$
%Some times we use only row ranges $R = [l,h]$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\partitle{Reference Frame Transformations}
The reference frame update $\rtrans$ of an overlay $\overlay$ is an injective mapping $\mathbb{Z} \to \mathbb{Z} \cup \errorval$ that updates positions of rows in the result of the update. The value $\errorval$ indicates deletions of rows (rows that do not have a position after the update).
The new reference frame for the spreadsheet overlay after applying $\overlay$ is $\rframe' = \rtrans \circ \mathcal F$, where $\circ$ denotes function composition. As an example, consider inserting a new row between rows 2 and 3 in the spreadsheet from \Cref{fig:example-spreadsheet-and-a}. The positions of rows $3$ and $4$ is increased by one, while rows $1$ and $2$ retrain their position:
$$\rtrans(x) = \begin{cases}
x & \textbf{if } x \leq 2\\
x+1 & \textbf{otherwise}
\end{cases}$$
When a row is deleted then its position is mapped to $\errorval$. For instance, deleting the first row in \Cref{fig:example-spreadsheet-and-a} results in the following reference frame update:
$$\rtrans(x) = \begin{cases}
\errorval & \textbf{if } x =1\\
x-1 & \textbf{otherwise}
\end{cases}$$
Note that the common operations of inserting and deleting rows can be expressed as reference frame updates of constant size independent of the size of the spreadsheet.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\partitle{Pattern Updates}
%
The pattern update $\oup$ of an overlay update $\overlay$ is a set of pairs $\{ (\rangeOf{C_i}{R_i}, \pattern_i) \}$ where $\rangeOf{C_i}{R_i}$ is a range and $\pattern_i$ is a \emph{pattern formula}, i.e., a formula which may contain both absolute cell references (as in regular formulas) and references where rows are relative offsets (written as $+i$ or $-i$). We required that the ranges $\rangeOf{C_i}{R_i}$ are pairwise disjoint. The semantics of $(\rangeOf{C_i}{R_i}, \pattern_i)$ is that every cell $(\column, \row)$ in $\rangeOf{C_i}{R_i}$ is assigned a formula that is generated by replacing any relative references of the form $(\column, \delta)$ in $\pattern_i$ with $(\column, \row + \delta)$. We use $\pattern_i(\cell)$ to denote the instantiation of pattern $\pattern_i$ for cell $\cell$.
For instance, to store a running sum of the values in column \emph{C} as the cell values in column \emph{D} for the spreadsheet from \Cref{fig:example-spreadsheet-and-a}, we can use the following pattern update:
\[
\oup_{running} = (\rangeOf{D1}{D1}, (C,1)), (\rangeOf{D2}{D4}, (C,+0) + (D,-1))
\]
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{figure}[t]
\centering
\newcommand{\tabhead}[1]{\cellcolor{black}{\textcolor{white}{#1}}}
\begin{minipage}{1\linewidth}
\centering
\textbf{Spreadsheet $\spreadsheet$} \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{tabular}{c|c|c|c|c|}
\cline{2-4}
& \tabhead{A} & \tabhead{B} & \tabhead{C} & \tabhead{D} \\ \hline
\tabhead{1} & \se{15} & \se{50} & \se{A1 + B1} & \uv{C1} \\ \hline
\tabhead{2} & \se{20} & \se{60} & \se{A2 + B2} & \uv{C2 + D1} \\ \hline
\tabhead{3} & \se{25} & \se{100} & \se{A3 + B3} & \uv{C3 + D2} \\ \hline
\tabhead{4} & \se{50} & \se{0} & \se{A4 + B4} & \uv{C4 + D3} \\ \hline
\end{tabular}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{minipage} %
$\,$\\
\begin{minipage}{1\linewidth}
\centering
\textbf{Evaluated Spreadsheet $\evalOf{\spreadsheet}{\cdot}$} \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{tabular}{c|c|c|c|c|}
\cline{2-4}
& \tabhead{A} & \tabhead{B} & \tabhead{C} & \tabhead{D} \\ \hline
\tabhead{1} & 15 & 50 & 65 & \uv{65} \\ \hline
\tabhead{2} & 20 & 60 & 80 & \uv{145} \\ \hline
\tabhead{3} & 25 & 100 & 125 & \uv{270} \\ \hline
\tabhead{4} & 50 & 0 & 50 & \uv{320} \\ \hline
\end{tabular}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{minipage}
\caption{Example overlay update and result (updated expressions and values are shown in \uv{red}).}\label{fig:example-overlay-update}
\end{figure}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\partitle{Semantics for Overlay Updates}
%
Applying an overlay update $\overlay$ to a spreadsheet $\spreadsheet$ results in an updated spreadsheet $\overlay(\spreadsheet$ computed by applying the reference frame update and then applying all pattern updates:
\begin{align*}
\valat{\overlay(\spreadsheet)}{\column}{\row} &=
\begin{cases}
\valat{\spreadsheet}{\column}{\rtrans^{-1}(\row)} & \text{\textbf{if}} \exists \row': \rtrans(\row') = \row\\
\pattern_i((\column,\row)) & \text{\textbf{if}} \exists i: (\column,\row) \in \rangeOf{C_i}{R_i} \\
\errorval &\text{\textbf{otherwise}}\\
\end{cases}
\end{align*}
For example, the result of applying the overlay update $\overlay_{running} = (\rtrans_{id},\oup_{running})$ where $\rframe_{id}(x) = x$ to our running example spreadsheet is shown in \Cref{fig:example-overlay-update}. The column \emph{D} is filled with new formulas that compute the hollowing sum.
Several remarks are in order. First, note that overlays can be used to encode common spreadsheet update operations in constant space, including . Second, \cite{tang-23-efcsfg} uses similar ideas to compress the dependencies in a spreadsheet using ranges and patterns. However, that work does not compress the updates themselves, but instead generates a compact representation of dependencies in a given spreadsheet.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Updating Datasets}
\label{sec:updating-datasets}
In addition to the advantage that versions of a spreadsheet can be tracked as sequences of overlay updates, another major advantage of modeling spreadsheets as overlays over datasets and updates as overlays on top of these overlays, is that it enables the updates on top of a spreadsheet overlay for a dataset $\ds$ to be reapplied on top of an updated version $\ds'$ as long as we can consistently identify row labels in $\ds'$ (e.g., if $\rowDomain$ is a semantic key for the dataset this will be possible).
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "../main"
%%% reftex-default-bibliography: ("../main.bib")
%%% End:

View File

@ -4,7 +4,6 @@
\BG{define the spreadsheet model first?}
\newcommand{\errorval}{\ensuremath{\bot}\xspace}
A spreadsheet is a regular grid of cells, which are defined by formulas.
A cell's formula may be a literal value, or an expression defining a computation that may be based on the value of other cells.