Started adjusting figures and equations in the Introduction.

master
Aaron Huber 2021-03-08 12:48:22 -05:00
parent 4a55032435
commit 0530ffc5cf
2 changed files with 37 additions and 27 deletions

View File

@ -21,14 +21,6 @@ We now introduce useful definitions and notation related to polynomials. We use
\end{Definition}
\noindent For example the monomial $XY$ has $\var(XY)=\inset{X,Y}$.
%\begin{Definition}[Expression Tree]\label{def:express-tree}
%An expression tree $\circuit$ is a binary %an ADT logically viewed as an n-ary
%tree, whose internal nodes are from the set $\{+, \times\}$, with leaf nodes being either from the set $\mathbb{R}$ $(\tnum)$ or from the set of monomials $(\var)$. The members of $\circuit$ are \type, \val, \vari{partial}, \vari{children}, and \vari{weight}, where \type is the type of value stored in the node $\circuit$ (i.e. one of $\{+, \times, \var, \tnum\}$, \val is the value stored, and \vari{children} is the list of $\circuit$'s children where $\circuit_\lchild$ is the left child and $\circuit_\rchild$ the right child. Remaining fields hold values whose semantics we will fix later. When $\circuit$ is used as input of ~\Cref{alg:mon-sam} and ~\Cref{alg:one-pass}, the values of \vari{partial} and \vari{weight} will not be set. %SEMANTICS FOR \circuit: \vari{partial} is the sum of $\circuit$'s coefficients , n, and \vari{weight} is the probability of $\circuit$ being sampled.
%\end{Definition}
%Note that $\circuit$ need not encode an expression in the standard monomial basis. For instance, $\circuit$ could represent a compressed form of the polynomial in~\Cref{eq:poly-eg}, such as $(x + 2y)(2x - y)$.
\revision{
\begin{Definition}[Pure Expansion]
The pure expansion of a polynomial $\poly$ is formed by computing all product of sums occurring in $\poly$, without combining like monomials. The pure expansion of $\poly$ generalizes ~\Cref{def:smb} by allowing monomials $m_i = m_j$ for $i \neq j$.
@ -41,17 +33,15 @@ The pure expansion of a polynomial $\poly$ is formed by computing all product of
%\revision{$\expansion{\circuit}$} is the reduced pure expansion of $\revision{\circuit}$.
The logical view of \revision{$\expansion{\circuit}$} is a list of tuples $(\monom, \coef)$, where $\monom$ is a set of variables and $\coef$ is in $\reals$.
\revision{$\expansion{\circuit}$} has the following recursive definition ($\circ$ is list concatenation).
{\small
\begin{multline*}
\expansion{\circuit} =
$\expansion{\circuit} =
\begin{cases}
\revision{\expansion{\circuit_\linput} \circ \expansion{\circuit_\rinput}} &\textbf{ if }\revision{\circuit.\type = \circplus}\\
\left\{(\monom_\linput \cup \monom_\rinput, \coef_\linput \cdot \coef_\rinput) ~|~\right.&\\ \quad \left.(\monom_\linput, \coef_\linput) \in \revision{\expansion{\circuit_\linput}}, (\monom_\rinput, \coef_\rinput) \in \revision{\expansion{\circuit_\rinput}}\right\} &\textbf{ if }\revision{\circuit.\type = \circmult}\\
\expansion{\circuit_\linput} \circ \expansion{\circuit_\rinput} &\textbf{ if }\revision{\circuit.\type = \circplus}\\
\left\{(\monom_\linput \cup \monom_\rinput, \coef_\linput \cdot \coef_\rinput) ~|~(\monom_\linput, \coef_\linput) \in \expansion{\circuit_\linput}, (\monom_\rinput, \coef_\rinput) \in \expansion{\circuit_\rinput}\right\} &\textbf{ if }\revision{\circuit.\type = \circmult}\\
\elist{(\emptyset, \revision{\circuit.\val})} &\textbf{ if }\revision{\circuit}.\type = \tnum\\
\elist{(\{\revision{\circuit}.\val\}, 1)} &\textbf{ if }\revision{\circuit}.\type = \var.\\
\end{cases}
\end{multline*}
}
\end{cases}
$
\end{Definition}
\revision{

View File

@ -120,8 +120,9 @@ The corresponding variable assignment is $\{\;W_a \mapsto \top, W_b \mapsto \top
\end{Example}
\begin{figure}[t]
\begin{subfigure}{0.2\textwidth}
\begin{subfigure}{0.45\textwidth}
\centering
\resizebox{!}{8mm}{
\begin{tabular}{ c | c c c}
$\rel$ & A & $\Phi_{set}$ & $\Phi_{bag}$\\
\hline
@ -129,11 +130,12 @@ The corresponding variable assignment is $\{\;W_a \mapsto \top, W_b \mapsto \top
& b & $W_b$ & $W_b$\\
& c & $W_c$ & $W_c$\\
\end{tabular}
%\caption{Atom 1 of query $\poly$ in ~\Cref{intro:ex}}
} %\caption{Atom 1 of query $\poly$ in ~\Cref{intro:ex}}
\label{subfig:ex-atom1}
\end{subfigure}
\begin{subfigure}{0.24\textwidth}
\begin{subfigure}{0.45\textwidth}
\centering
\resizebox{!}{8mm}{
\begin{tabular}{ c | c c c c}
$E$ & A & B & $\Phi_{set}$ & $\Phi_{bag}$ \\
\hline
@ -141,6 +143,7 @@ The corresponding variable assignment is $\{\;W_a \mapsto \top, W_b \mapsto \top
& b & c & $\top$ & $1$\\
& c & a & $\top$ & $1$\\
\end{tabular}
}
%\caption{Atom 3 of query $\poly$ in ~\Cref{intro:ex}}
\label{subfig:ex-atom3}
\end{subfigure}
@ -162,6 +165,7 @@ The corresponding variable assignment is $\{\;W_a \mapsto \top, W_b \mapsto \top
\trimfigurespacing
\end{figure}
Following prior efforts~\cite{feng:2019:sigmod:uncertainty,DBLP:conf/pods/GreenKT07,GL16}, we generalize this model of Set-PDBs to bags using $\semN$-valued random variables (i.e., $Dom(W_i) \subseteq \mathbb N$) and constants (annotation $\Phi_{bag}$ in the example).
Without loss of generality, we assume that input relations are sets (i.e. $Dom(W_i) = \{0, 1\}$), while query evaluation follows bag semantics.
@ -170,16 +174,32 @@ Continuing the prior example, we are given the following Boolean (resp,. count)
$$\poly() :- R(A), E(A, B), R(B)$$
The lineage of the result in a Set-PDB (resp., Bag-PDB) is a Boolean (polynomial) formula over random variables annotating the input relations (i.e., $W_a$, $W_b$, $W_c$).
Because the query result is a nullary relation, we write $Q(\cdot)$ to denote the function that evaluates the lineage over one specific assignment of values to the variables (i.e., the value of the lineage in the corresponding possible world):
\begin{align*}
\poly_{set}(W_a, W_b, W_c) &= W_aW_b \vee W_bW_c \vee W_cW_a\\
\poly_{bag}(W_a, W_b, W_c) &= W_aW_b + W_bW_c + W_cW_a
\end{align*}
$$
\begin{tabular}{c c}
\begin{minipage}[b]{0.45\linewidth}
$\poly_{set}(W_a, W_b, W_c) = W_aW_b \vee W_bW_c \vee W_cW_a$
\end{minipage}\hspace*{5mm}
&
\begin{minipage}[b]{0.45\linewidth}
$\poly_{bag}(W_a, W_b, W_c) = W_aW_b + W_bW_c + W_cW_a$
\end{minipage}\\
\end{tabular}
$$
These functions compute the existence (resp., count) of the nullary tuple resulting from applying $\poly$ on the PDB of \Cref{fig:intro-ex}.
For the same possible world as in the prior example:
\begin{align*}
&\poly_{set}(\top, \top, \bot) = \top\top \vee \top\bot \vee \top\bot = \top\\
&\poly_{bag}(1, 1, 0) = 1 \cdot 1 + 1\cdot 0 + 0 \cdot 1 = 1
\end{align*}
$$
\begin{tabular}{c c}
\begin{minipage}[b]{0.45\linewidth}
$\poly_{set}(\top, \top, \bot) = \top\top \vee \top\bot \vee \top\bot = \top$
\end{minipage}
&
\begin{minipage}[b]{0.45\linewidth}
$\poly_{bag}(1, 1, 0) = 1 \cdot 1 + 1\cdot 0 + 0 \cdot 1 = 1$
\end{minipage}\\
\end{tabular}
$$
The Set-PDB query is satisfied in this possible world and the Bag-PDB result tuple has a multiplicity of 1.
The marginal probability (resp., expected count) of this query is computed over all possible worlds:
% \AR{What is $\mu$ below?}