%root: main.tex %!TEX root = ./main.tex %\onecolumn \subsection{Reduced Polynomials and Equivalences} We now introduce some terminology for polynomials and develop a reduced form for polynomials --- a closed form of the polynomial's expectation over probability distributions derived from a \bi or \ti. We will use $(X + Y)^2$ as a running example. \begin{Definition}[Standard Monomial Basis]\label{def:smb} A monomial is a product of variable terms, each raised to a non-negative integer power. A polynomial in \termSMB (\abbrSMB) has the form: $\sum_{i=1}^n c_i \cdot m_i$ for each of its $n$ terms, where each $c_i \neq 0$ is an integer and each $m_i$ is a monomial and $m_i \neq m_j$ for $i \neq j$. We use $\smbOf{\poly}$ to denote the \abbrSMB of $\poly$. \end{Definition} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% In this paper we consider the default representation of a polynomial to be in \abbrSMB. The \abbrSMB for the running example is $X^2 +2XY + Y^2$. Note that the example's SOP expansion $X^2 + XY + XY + Y^2$ is is not $\smbOf{(X+Y)^2}$ since $XY$ appears twice. % \BG{Maybe inline degree?} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Definition}[Degree]\label{def:degree} The degree of polynomial $\poly(\vct{X})$ is the maximum sum of exponents, over all monomials in $\smbOf{\poly(\vct{X})}$. \end{Definition} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% The degree of the running example polynomial is $2$. Product terms in lineage arise only as a consequence of join operations, so intuitively, the degree of a lineage polynomial is analogous to the largest number of joins in any clause of the UCQ query that created it. In this paper we consider only finite degree polynomials. % % Throughout this paper, we also make the following \textit{assumption}. % % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % \begin{Assumption}\label{assump:poly-smb} % All polynomials considered are in standard monomial basis, i.e., $\poly(\vct{X}) = \sum\limits_{\vct{d} \in \mathbb{N}^\numvar}q_d \cdot \prod\limits_{i = 1, d_i \geq 1}^{\numvar}X_i^{d_i}$, where $q_d$ is the coefficient for the monomial encoded in $\vct{d}$ and $d_i$ is the $i^{th}$ element of $\vct{d}$. % \end{Assumption} % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % We call a polynomial $\query(\vct{X})$ a \emph{\bi-lineage polynomial} (resp., \emph{\ti-lineage polynomial}, or simply lineage polynomial), if %\AH{Why is it required for the tuple to be n-ary? I think this slightly confuses me since we have n tuples.} % OK: agreed w/ AH, this can be treated as implicit there exists a $\raPlus$ query $\query$, \bi $\pxdb$ (\ti $\pxdb$, or $\semNX$-PDB $\pxdb$), and tuple $\tup$ such that $\query(\vct{X}) = \query(\pxdb)(\tup)$. % Before proceeding, note that the following is assume that polynomials are \bis (which subsume \tis as a special case). As a special case of \bis, the following applies to \tis as well. Recall that in a \bi $\pxdb$, tuples are partitioned into $\ell$ blocks $\block_1, \ldots, \block_\ell$ where tuple $t_{i,j} \in \block_i$ is associated with a probability $\prob_{\tup_{i,j}} = \pd[X_{i,j} = 1]$, and is annotated with a unique variable $X_{i,j}$.\footnote{ Although only a single independent, $[\abs{\block_i}+1]$-valued variable is customarily used per block, we decompose it into $\abs{\block_i}$ correlated $\{0,1\}$-valued variables per block that can be used directly in polynomials (without an indicator function). For $t_j \in b_i$, the event $(X_{i,j} = 1)$ corresponds to the event $(X_i = j)$ in the customary annotation scheme. } Because blocks are independent and tuples from the same block are disjoint, the probabilities $\prob_{\tup_{i,j}}$ and the blocks induce the probability distribution $\pd$ of $\pxdb$. We will write a \bi-lineage polynomial $\poly(\vct{X})$ for a \bi with $\ell$ blocks as $\poly(\vct{X})$ = $\poly(X_{1, 1},\ldots, X_{1, \abs{\block_1}},$ $\ldots, X_{\ell, \abs{\block_\ell}})$, where $\abs{\block_i}$ denotes the size of $\block_i$.\footnote{Later on in the paper, especially in~\Cref{sec:algo}, we will overload notation and rename the variables as $X_1,\dots,X_n$, where $n=\sum_{i=1}^\ell \abs{b_i}$.} %\SF{Where is $\block_{i, j}$ used? Is it $X_{\block_{1, 1}}$ or $X_{\block_1, 1}$ ?} % and the probability distribution of $\pxdb$ is uniquely determined based on a probability vector $\vct{p}$ that associates each tuple a probability % variables are independent of each other (or disjoint if they are from the same block) and each variable $X$ is associated with a probability $\vct{p}(X) = \pd[X = 1]$. Thus, we are dealing with polynomials $\poly(\vct{X})$ that are annotations of a tuple in the result of a query $\query$ over a BIDB $\pxdb$ where $\vct{X}$ is the set of variables that occur in annotations of tuples of $\pxdb$. % While the definition of polynomial $\poly(\vct{X})$ over a $\bi$ input doesn't change, we introduce an alternative notation which will come in handy. Given $\ell$ blocks, we write $\poly(\vct{X})$ = $\poly(X_{\block_1, 1},\ldots, X_{\block_1, \abs{\block_1}},$ $\ldots, X_{\block_\ell, \abs{\block_\ell}})$, where $\abs{\block_i}$ denotes the size of $\block_i$, and $\block_{i, j}$ denotes tuple $j$ residing in block $i$ for $j$ in $[\abs{\block_i}]$. % The number of tuples in the $\bi$ instance can be (trivially) computed as $\numvar = \sum\limits_{i = 1}^{\ell}\abs{\block_i}$ . %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Definition}[Modding with a set]\label{def:mod-set} Let $S$ be a {\em set} of polynomials over $\vct{X}$. Then $\poly(\vct{X})\mod{S}$ is the polynomial obtained by taking the mod of $\poly(\vct{X})$ over {\em all} polynomials in $S$ (order does not matter). \end{Definition} For example for a set of polynomials $S=\inset{X^2-X, Y^2-Y}$, taking the polynomial $2X^2 + 3XY - 2Y^2\mod S$ yields $2X+3XY-2Y$. % \begin{Definition}\label{def:mod-set-polys} Given the set of BIDB variables $\inset{X_{i,j}}$, define \setlength\parindent{0pt} \vspace*{-3mm} {\small \begin{tabular}{@{}l l} \begin{minipage}[b]{0.45\linewidth} \centering \begin{equation*} \mathcal{B}=\comprehension{X_{i,j}\cdot X_{i,j'}}{i \in [\ell], j\neq j' \in [~\abs{\block_i}~]} \end{equation*} \end{minipage}% \hspace{13mm} & \begin{minipage}[b]{0.45\linewidth} \centering \begin{equation*} \mathcal{T}=\comprehension{X_{i,j}^2-X_{i,j}}{i \in [\ell], j \in [~\abs{\block_i}~]} \end{equation*} \end{minipage} \\ \end{tabular} } \end{Definition} % \begin{Definition}[Reduced \bi Polynomials]\label{def:reduced-bi-poly} Let $\poly(\vct{X})$ be a \bi-lineage polynomial. The reduced form $\rpoly(\vct{X})$ of $\poly(\vct{X})$ is: \begin{equation*} \rpoly(\vct{X}) = \smbOf{\poly(\vct{X})} \mod \inparen{\mathcal{T} \cup \mathcal{B}}%X_i^2 - X_i \mod X_{\block_s, t}X_{\block_s, u} \end{equation*} %for all $i$ in $[\numvar]$ and for all $s$ in $\ell$, such that for all $t, u$ in $[\abs{\block_s}]$, $t \neq u$. \end{Definition} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % All exponents $e > 1$ in $\smbOf{\poly(\vct{X})}$ are reduced to $e = 1$ via mod $\mathcal{T}$. Performing the modulus of $\rpoly(\vct{X})$ with $\mathcal{B}$ ensures the disjoint condition of \bi, removing monomials with lineage variables from the same block.%, (recall the constraint on tuples from the same block being disjoint in a \bi).% any monomial containing more than one tuple from a block has $0$ probability and can be ignored). For the special case of \tis, the second step is not necessary since every block contains a single tuple. %Alternatively, one can think of $\rpoly$ as the \abbrSMB of $\poly(\vct{X})$ when the product operator is idempotent. % % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % \begin{Definition}[$\rpoly(\vct{X})$] \label{def:qtilde} % Define $\rpoly(X_1,\ldots, X_\numvar)$ as the reduced version of $\poly(X_1,\ldots, X_\numvar)$, of the form % $\rpoly(X_1,\ldots, X_\numvar) = $ % \[\poly(X_1,\ldots, X_\numvar) \mod X_1^2-X_1\cdots\mod X_\numvar^2 - X_\numvar.\] % \end{Definition} % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Example}\label{example:qtilde} Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blocks. The expanded derivation for $\rpoly(X, Y)$ is \begin{align*} (&X^2 + 2XY + Y^2 \mod X^2 - X) \mod Y^2 - Y\\ = ~&X + 2XY + Y^2 \mod Y^2 - Y\\ = ~& X + 2XY + Y \end{align*} \end{Example} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Intuitively, $\rpoly(\textbf{X})$ is the \abbrSMB form of $\poly(\textbf{X})$ such that if any $X_j$ term has an exponent $e > 1$, it is reduced to $1$, i.e. $X_j^e\mapsto X_j$ for any $e > 1$. % %When considering $\bi$ input, it becomes necessary to redefine $\rpoly(\vct{X})$. % %\noindent The usefulness of this will reduction become clear in \Cref{lem:exp-poly-rpoly}. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Definition}[Valid Worlds] For probability distribution $\probDist$, % and its corresponding probability mass function $\probOf$, the set of valid worlds $\eta$ consists of all the worlds with probability value greater than $0$; i.e., for variable vector $\vct{W}$ \[ \eta = \comprehension{\vct{w}}{\probOf[\vct{W} = \vct{w}] > 0} \] \end{Definition} %We state additional equivalences between $\poly(\vct{X})$ and $\rpoly(\vct{X})$ in~\Cref{app:subsec-pre-poly-rpoly} and~\Cref{app:subsec-prop-q-qtilde}. \noindent Next, we show why the reduced form is useful for our purposes: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %Define all variables $X_i$ in $\poly$ to be independent. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Lemma}\label{lem:exp-poly-rpoly} Let $\pxdb$ be a \bi over variables $\vct{X} = \{X_1, \ldots, X_\numvar\}$ and with probability distribution $\probDist$ produced by the tuple probability vector $\probAllTup = (\prob_1, \ldots, \prob_\numvar)$ over all $\vct{w}$ in $\eta$. For any \bi-lineage polynomial $\poly(\vct{X})$ based on $\pxdb$ and query $\query$ we have: % The expectation over possible worlds in $\poly(\vct{X})$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numvar)$. \begin{equation*} \expct_{\vct{W}\sim \probDist}\pbox{\poly(\vct{W})} = \rpoly(\probAllTup). \end{equation*} \end{Lemma} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Note that in the preceding lemma, we have assigned $\vct{p}$ %(introduced in \Cref{subsec:def-data}) to the variables $\vct{X}$. Intuitively, \Cref{lem:exp-poly-rpoly} states that when we replace each variable $X_i$ with its probability $\prob_i$ in the reduced form of a \bi-lineage polynomial and evaluate the resulting expression in $\mathbb{R}$, then the result is the expectation of the polynomial. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Corollary}\label{cor:expct-sop} If $\poly$ is a \bi-lineage polynomial, then the expectation of $\poly$, i.e., $\expct\pbox{\poly} = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ can be computed in $O(\size\inparen{\smbOf{\poly}})$, where $\size\inparen{\poly}$ denotes the total number of multiplication/addition operators in $\poly$. \end{Corollary} %\AH{What if $\poly$ is not in \abbrSMB form?} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% Local Variables: %%% mode: latex %%% TeX-master: "main" %%% End: