paper-BagRelationalPDBsAreHard/poly-form.tex

172 lines
8.4 KiB
TeX

%root: main.tex
%!TEX root = ./main.tex
%\onecolumn
\subsection{Reduced Polynomials and Equivalences}
We now introduce some terminology % for polynomials
and develop a reduced form (a closed form of the polynomial's expectation) for polynomials over probability distributions derived from a \bi or \ti.
%We will use $(X + Y)^2$ as a running example.
Note that a polynomial over $\vct{X}=(X_1,\dots,X_n)$ is formally defined as (with $c_\vct{i} \in \domN$):
\AH{My attempt to clear up any confusion in the ambiguity of $c_{\vct{i}}$. We may want to say that $\domain\inparen{c_\vct{i}} = \domR$ instead?}
\begin{equation}
\label{eq:sop-form}
\poly\inparen{X_1,\dots,X_n}=\sum_{\vct{d}=(d_1,\dots,d_n)\in \semN^n} c_{\vct{d}}\cdot \prod_{i=1}^n X_i^{d_i}.
\end{equation}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Standard Monomial Basis]\label{def:smb}
From above, the term $\prod_{i=1}^n X_i^{d_i}$ is a {\em monomial}. A polynomial $\poly\inparen{\vct{X}}$ is in standard monomial basis (\abbrSMB) when we keep only the terms with $c_{\vct{i}}\ne 0$ from \Cref{eq:sop-form}.
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Unless othewise noted, we consider all polynomials to be in \abbrSMB representation.
When it is unclear, we use $\smbOf{\poly}$ to denote the \abbrSMB form of a polynomial $\poly$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Degree]\label{def:degree}
The degree of polynomial $\poly(\vct{X})$ is the largest $\sum_{i=1}^n d_i$ such that $c_{(d_1,\dots,d_n)}\ne 0$. % maximum sum of exponents, over all monomials in $\smbOf{\poly(\vct{X})}$.
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
The degree of the polynomial $X^2+2XY+Y^2$ is $2$.
Product terms in lineage arise only from join operations (\Cref{fig:nxDBSemantics}), so intuitively, the degree of a lineage polynomial is analogous to the largest number of joins in any clause of the UCQ query that created it.
In this paper we consider only finite degree polynomials.
We call a polynomial $\poly\inparen{\vct{X}}$ a \emph{\bi-lineage polynomial} (resp., \emph{\ti-lineage polynomial}, or simply lineage polynomial), if there exists a \AH{Which formalism? UCQ?}$\raPlus$ query $\query$, \bi $\pxdb$ (\ti $\pxdb$, or $\semNX$-PDB $\pxdb$), and tuple $\tup$ such that $\poly\inparen{\vct{X}} = \query(\pxdb)(\tup)$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Modding with a set]\label{def:mod-set}
Let $S$ be a {\em set} of polynomials over $\vct{X}$. Then $\poly(\vct{X})\mod{S}$ is the polynomial obtained by taking the mod of $\poly(\vct{X})$ over {\em all} polynomials in $S$ (order does not matter).
\end{Definition}
For example for a set of polynomials $S=\inset{X^2-X, Y^2-Y}$, taking the polynomial $2X^2 + 3XY - 2Y^2\mod S$ yields $2X+3XY-2Y$.
%
\begin{Definition}[$\mathcal B$, $\mathcal T$]\label{def:mod-set-polys}
Given the set of BIDB variables $\inset{X_{i,j}}$, define
\setlength\parindent{0pt}
\vspace*{-3mm}
{\small
\begin{tabular}{@{}l l}
\begin{minipage}[b]{0.45\linewidth}
\centering
\begin{equation*}
\mathcal{B}=\comprehension{X_{i,j}\cdot X_{i,j'}}{i \in [\ell], j\neq j' \in [~\abs{\block_i}~]}
\end{equation*}
\end{minipage}%
\hspace{13mm}
&
\begin{minipage}[b]{0.45\linewidth}
\centering
\begin{equation*}
\mathcal{T}=\comprehension{X_{i,j}^2-X_{i,j}}{i \in [\ell], j \in [~\abs{\block_i}~]}
\end{equation*}
\end{minipage}
\\
\end{tabular}
}
\end{Definition}
%
\begin{Definition}[Reduced \bi Polynomials]\label{def:reduced-bi-poly}
Let $\poly(\vct{X})$ be a \bi-lineage polynomial.
The reduced form $\rpoly(\vct{X})$ of $\poly(\vct{X})$ is: $\rpoly(\vct{X}) = \poly(\vct{X}) \mod \inparen{\mathcal{T} \cup \mathcal{B}}$
% \begin{equation*}
% \rpoly(\vct{X}) = \poly(\vct{X}) \mod \inparen{\mathcal{T} \cup \mathcal{B}}%X_i^2 - X_i \mod X_{\block_s, t}X_{\block_s, u}
% \end{equation*}
%for all $i$ in $[\numvar]$ and for all $s$ in $\ell$, such that for all $t, u$ in $[\abs{\block_s}]$, $t \neq u$.
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
All exponents $e > 1$ in $\smbOf{\poly(\vct{X})}$ are reduced to $e = 1$ via mod $\mathcal{T}$. Performing the modulus of $\rpoly(\vct{X})$ with $\mathcal{B}$ ensures the disjoint condition of \bi, removing monomials with lineage variables from the same block.
%, (recall the constraint on tuples from the same block being disjoint in a \bi).% any monomial containing more than one tuple from a block has $0$ probability and can be ignored).
%
For the special case of \tis, the second step is not necessary since every block contains a single tuple.
%Alternatively, one can think of $\rpoly$ as the \abbrSMB of $\poly(\vct{X})$ when the product operator is idempotent.
%
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \begin{Definition}[$\rpoly(\vct{X})$] \label{def:qtilde}
% Define $\rpoly(X_1,\ldots, X_\numvar)$ as the reduced version of $\poly(X_1,\ldots, X_\numvar)$, of the form
% $\rpoly(X_1,\ldots, X_\numvar) = $
% \[\poly(X_1,\ldots, X_\numvar) \mod X_1^2-X_1\cdots\mod X_\numvar^2 - X_\numvar.\]
% \end{Definition}
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%Removing this example to save space
\iffalse
\begin{Example}\label{example:qtilde}
Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blocks. The expanded derivation for $\rpoly(X, Y)$ is
\begin{align*}
(&X^2 + 2XY + Y^2 \mod X^2 - X) \mod Y^2 - Y\\
= ~&X + 2XY + Y^2 \mod Y^2 - Y\\
= ~& X + 2XY + Y
\end{align*}
\end{Example}
\fi
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Intuitively, $\rpoly(\textbf{X})$ is the \abbrSMB form of $\poly(\textbf{X})$ such that if any $X_j$ term has an exponent $e > 1$, it is reduced to $1$, i.e. $X_j^e\mapsto X_j$ for any $e > 1$.
%
%When considering $\bi$ input, it becomes necessary to redefine $\rpoly(\vct{X})$.
%
%\noindent The usefulness of this will reduction become clear in \Cref{lem:exp-poly-rpoly}.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Valid Worlds]
For probability distribution $\pd$, % and its corresponding probability mass function $\probOf$,
the set of valid worlds $\valworlds$ consists of all the worlds with probability value greater than $0$; i.e., for random world variable vector $\vct{W}$
\[
\valworlds = \comprehension{\vct{w}}{\probOf[\vct{W} = \vct{w}] > 0}
\]
\end{Definition}
%We state additional equivalences between $\poly(\vct{X})$ and $\rpoly(\vct{X})$ in \Cref{app:subsec-pre-poly-rpoly} and \Cref{app:subsec-prop-q-qtilde}.
\noindent Next, we show why the reduced form is useful for our purposes:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Define all variables $X_i$ in $\poly$ to be independent.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Lemma}\label{lem:exp-poly-rpoly}
Let $\pxdb$ be a \bi over variables $\vct{X} = \{X_1, \ldots, X_\numvar\}$ and with probability distribution $\pd$ induced by the tuple probability vector $\probAllTup = (\prob_1, \ldots, \prob_\numvar)$ over all $\vct{w}$ in $\valworlds$. For any \bi-lineage polynomial $\poly(\vct{X})$ based on $\pxdb$ and query $\query$ we have:
% The expectation over possible worlds in $\poly(\vct{X})$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
\begin{equation*}
\expct_{\vct{W}\sim \pd}\pbox{\poly(\vct{W})} = \rpoly(\probAllTup).
\end{equation*}
\end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Note that in the preceding lemma, we have assigned $\vct{p}$
%(introduced in \Cref{subsec:def-data})
to the variables $\vct{X}$. Intuitively, \Cref{lem:exp-poly-rpoly} states that when we replace each variable $X_i$ with its probability $\prob_i$ in the reduced form of a \bi-lineage polynomial and evaluate the resulting expression in $\mathbb{R}$, then the result is the expectation of the polynomial.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Corollary}\label{cor:expct-sop}
If $\poly$ is a \bi-lineage polynomial already in \abbrSMB, then the expectation of $\poly$, i.e., $\expct\pbox{\poly} = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ can be computed in $\bigO{\size\inparen{\poly}}$, where $\size\inparen{\poly}$ (\Cref{def:size}) is proportional to the total number of multiplication/addition operators in $\poly$.
\end{Corollary}
%\AH{What if $\poly$ is not in \abbrSMB form?}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End: