paper-BagRelationalPDBsAreHard/poly-form.tex

172 lines
8.4 KiB
TeX
Raw Normal View History

%root: main.tex
2020-06-26 17:27:52 -04:00
%!TEX root = ./main.tex
2020-07-14 11:45:57 -04:00
%\onecolumn
2020-12-14 23:34:12 -05:00
\subsection{Reduced Polynomials and Equivalences}
2021-04-08 22:30:03 -04:00
We now introduce some terminology % for polynomials
2021-04-10 13:20:30 -04:00
and develop a reduced form (a closed form of the polynomial's expectation) for polynomials over probability distributions derived from a \bi or \ti.
2021-04-07 23:27:51 -04:00
%We will use $(X + Y)^2$ as a running example.
2021-06-09 12:42:26 -04:00
Note that a polynomial over $\vct{X}=(X_1,\dots,X_n)$ is formally defined as (with $c_\vct{i} \in \domN$):
2021-06-11 11:22:58 -04:00
\AH{My attempt to clear up any confusion in the ambiguity of $c_{\vct{i}}$. We may want to say that $\domain\inparen{c_\vct{i}} = \domR$ instead?}
2021-04-08 22:30:03 -04:00
\begin{equation}
\label{eq:sop-form}
2021-06-09 12:42:26 -04:00
\poly\inparen{X_1,\dots,X_n}=\sum_{\vct{d}=(d_1,\dots,d_n)\in \semN^n} c_{\vct{d}}\cdot \prod_{i=1}^n X_i^{d_i}.
2021-04-08 22:30:03 -04:00
\end{equation}
2021-04-08 22:30:03 -04:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Standard Monomial Basis]\label{def:smb}
2021-06-09 12:42:26 -04:00
From above, the term $\prod_{i=1}^n X_i^{d_i}$ is a {\em monomial}. A polynomial $\poly\inparen{\vct{X}}$ is in standard monomial basis (\abbrSMB) when we keep only the terms with $c_{\vct{i}}\ne 0$ from \Cref{eq:sop-form}.
\end{Definition}
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2021-06-11 11:22:58 -04:00
Unless othewise noted, we consider all polynomials to be in \abbrSMB representation.
When it is unclear, we use $\smbOf{\poly}$ to denote the \abbrSMB form of a polynomial $\poly$.
2021-04-07 23:27:51 -04:00
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2020-12-14 23:34:12 -05:00
\begin{Definition}[Degree]\label{def:degree}
The degree of polynomial $\poly(\vct{X})$ is the largest $\sum_{i=1}^n d_i$ such that $c_{(d_1,\dots,d_n)}\ne 0$. % maximum sum of exponents, over all monomials in $\smbOf{\poly(\vct{X})}$.
2020-12-14 23:34:12 -05:00
\end{Definition}
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2021-04-08 22:30:03 -04:00
The degree of the polynomial $X^2+2XY+Y^2$ is $2$.
2021-04-10 09:48:26 -04:00
Product terms in lineage arise only from join operations (\Cref{fig:nxDBSemantics}), so intuitively, the degree of a lineage polynomial is analogous to the largest number of joins in any clause of the UCQ query that created it.
2020-12-20 17:13:52 -05:00
In this paper we consider only finite degree polynomials.
2021-06-09 12:42:26 -04:00
We call a polynomial $\poly\inparen{\vct{X}}$ a \emph{\bi-lineage polynomial} (resp., \emph{\ti-lineage polynomial}, or simply lineage polynomial), if there exists a \AH{Which formalism? UCQ?}$\raPlus$ query $\query$, \bi $\pxdb$ (\ti $\pxdb$, or $\semNX$-PDB $\pxdb$), and tuple $\tup$ such that $\poly\inparen{\vct{X}} = \query(\pxdb)(\tup)$.
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2021-03-06 20:34:18 -05:00
\begin{Definition}[Modding with a set]\label{def:mod-set}
Let $S$ be a {\em set} of polynomials over $\vct{X}$. Then $\poly(\vct{X})\mod{S}$ is the polynomial obtained by taking the mod of $\poly(\vct{X})$ over {\em all} polynomials in $S$ (order does not matter).
2021-03-06 20:34:18 -05:00
\end{Definition}
For example for a set of polynomials $S=\inset{X^2-X, Y^2-Y}$, taking the polynomial $2X^2 + 3XY - 2Y^2\mod S$ yields $2X+3XY-2Y$.
%
2021-04-10 00:19:16 -04:00
\begin{Definition}[$\mathcal B$, $\mathcal T$]\label{def:mod-set-polys}
Given the set of BIDB variables $\inset{X_{i,j}}$, define
\setlength\parindent{0pt}
\vspace*{-3mm}
{\small
\begin{tabular}{@{}l l}
\begin{minipage}[b]{0.45\linewidth}
\centering
\begin{equation*}
\mathcal{B}=\comprehension{X_{i,j}\cdot X_{i,j'}}{i \in [\ell], j\neq j' \in [~\abs{\block_i}~]}
\end{equation*}
\end{minipage}%
\hspace{13mm}
&
\begin{minipage}[b]{0.45\linewidth}
\centering
\begin{equation*}
\mathcal{T}=\comprehension{X_{i,j}^2-X_{i,j}}{i \in [\ell], j \in [~\abs{\block_i}~]}
\end{equation*}
\end{minipage}
\\
\end{tabular}
}
\end{Definition}
%
2020-12-14 23:34:12 -05:00
\begin{Definition}[Reduced \bi Polynomials]\label{def:reduced-bi-poly}
Let $\poly(\vct{X})$ be a \bi-lineage polynomial.
2021-04-08 22:30:03 -04:00
The reduced form $\rpoly(\vct{X})$ of $\poly(\vct{X})$ is: $\rpoly(\vct{X}) = \poly(\vct{X}) \mod \inparen{\mathcal{T} \cup \mathcal{B}}$
% \begin{equation*}
% \rpoly(\vct{X}) = \poly(\vct{X}) \mod \inparen{\mathcal{T} \cup \mathcal{B}}%X_i^2 - X_i \mod X_{\block_s, t}X_{\block_s, u}
% \end{equation*}
%for all $i$ in $[\numvar]$ and for all $s$ in $\ell$, such that for all $t, u$ in $[\abs{\block_s}]$, $t \neq u$.
\end{Definition}
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
2020-12-20 18:29:52 -05:00
2021-04-10 00:19:16 -04:00
All exponents $e > 1$ in $\smbOf{\poly(\vct{X})}$ are reduced to $e = 1$ via mod $\mathcal{T}$. Performing the modulus of $\rpoly(\vct{X})$ with $\mathcal{B}$ ensures the disjoint condition of \bi, removing monomials with lineage variables from the same block.
%, (recall the constraint on tuples from the same block being disjoint in a \bi).% any monomial containing more than one tuple from a block has $0$ probability and can be ignored).
2021-04-07 23:27:51 -04:00
%
For the special case of \tis, the second step is not necessary since every block contains a single tuple.
2020-12-20 00:13:58 -05:00
%Alternatively, one can think of $\rpoly$ as the \abbrSMB of $\poly(\vct{X})$ when the product operator is idempotent.
%
2020-12-14 23:34:12 -05:00
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \begin{Definition}[$\rpoly(\vct{X})$] \label{def:qtilde}
% Define $\rpoly(X_1,\ldots, X_\numvar)$ as the reduced version of $\poly(X_1,\ldots, X_\numvar)$, of the form
% $\rpoly(X_1,\ldots, X_\numvar) = $
2020-07-08 16:48:37 -04:00
2020-12-14 23:34:12 -05:00
% \[\poly(X_1,\ldots, X_\numvar) \mod X_1^2-X_1\cdots\mod X_\numvar^2 - X_\numvar.\]
% \end{Definition}
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2021-04-07 23:27:51 -04:00
%%Removing this example to save space
\iffalse
\begin{Example}\label{example:qtilde}
Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blocks. The expanded derivation for $\rpoly(X, Y)$ is
\begin{align*}
2020-12-16 12:38:21 -05:00
(&X^2 + 2XY + Y^2 \mod X^2 - X) \mod Y^2 - Y\\
= ~&X + 2XY + Y^2 \mod Y^2 - Y\\
= ~& X + 2XY + Y
\end{align*}
\end{Example}
2021-04-07 23:27:51 -04:00
\fi
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
2020-12-14 23:34:12 -05:00
% Intuitively, $\rpoly(\textbf{X})$ is the \abbrSMB form of $\poly(\textbf{X})$ such that if any $X_j$ term has an exponent $e > 1$, it is reduced to $1$, i.e. $X_j^e\mapsto X_j$ for any $e > 1$.
%
2020-12-14 23:34:12 -05:00
%When considering $\bi$ input, it becomes necessary to redefine $\rpoly(\vct{X})$.
%
2020-12-20 00:13:58 -05:00
%\noindent The usefulness of this will reduction become clear in \Cref{lem:exp-poly-rpoly}.
%
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[Valid Worlds]
2021-06-09 13:12:37 -04:00
For probability distribution $\pd$, % and its corresponding probability mass function $\probOf$,
the set of valid worlds $\valworlds$ consists of all the worlds with probability value greater than $0$; i.e., for random world variable vector $\vct{W}$
\[
2021-06-09 13:12:37 -04:00
\valworlds = \comprehension{\vct{w}}{\probOf[\vct{W} = \vct{w}] > 0}
\]
\end{Definition}
2020-07-08 16:48:37 -04:00
2021-04-10 09:48:26 -04:00
%We state additional equivalences between $\poly(\vct{X})$ and $\rpoly(\vct{X})$ in \Cref{app:subsec-pre-poly-rpoly} and \Cref{app:subsec-prop-q-qtilde}.
\noindent Next, we show why the reduced form is useful for our purposes:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2020-12-14 13:58:56 -05:00
2020-12-17 17:08:48 -05:00
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2020-12-17 17:08:48 -05:00
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2020-07-08 16:48:37 -04:00
2020-12-14 23:34:12 -05:00
%Define all variables $X_i$ in $\poly$ to be independent.
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Lemma}\label{lem:exp-poly-rpoly}
2021-06-09 13:12:37 -04:00
Let $\pxdb$ be a \bi over variables $\vct{X} = \{X_1, \ldots, X_\numvar\}$ and with probability distribution $\pd$ induced by the tuple probability vector $\probAllTup = (\prob_1, \ldots, \prob_\numvar)$ over all $\vct{w}$ in $\valworlds$. For any \bi-lineage polynomial $\poly(\vct{X})$ based on $\pxdb$ and query $\query$ we have:
2020-12-14 23:34:12 -05:00
% The expectation over possible worlds in $\poly(\vct{X})$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
\begin{equation*}
2021-06-09 13:12:37 -04:00
\expct_{\vct{W}\sim \pd}\pbox{\poly(\vct{W})} = \rpoly(\probAllTup).
\end{equation*}
\end{Lemma}
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2021-04-08 22:30:03 -04:00
Note that in the preceding lemma, we have assigned $\vct{p}$
%(introduced in \Cref{subsec:def-data})
2020-12-16 12:38:21 -05:00
to the variables $\vct{X}$. Intuitively, \Cref{lem:exp-poly-rpoly} states that when we replace each variable $X_i$ with its probability $\prob_i$ in the reduced form of a \bi-lineage polynomial and evaluate the resulting expression in $\mathbb{R}$, then the result is the expectation of the polynomial.
2020-12-17 17:08:48 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2020-06-26 17:27:52 -04:00
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2020-06-15 18:38:10 -04:00
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Corollary}\label{cor:expct-sop}
If $\poly$ is a \bi-lineage polynomial already in \abbrSMB, then the expectation of $\poly$, i.e., $\expct\pbox{\poly} = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ can be computed in $\bigO{\size\inparen{\poly}}$, where $\size\inparen{\poly}$ (\Cref{def:size}) is proportional to the total number of multiplication/addition operators in $\poly$.
2020-06-17 10:58:02 -04:00
\end{Corollary}
%\AH{What if $\poly$ is not in \abbrSMB form?}
2020-12-17 17:08:48 -05:00
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2020-12-17 17:08:48 -05:00
2020-12-14 13:58:56 -05:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End: