From ece02d0b1de9e7e748c3d450ac0cad365d34fc72 Mon Sep 17 00:00:00 2001
From: Boris Glavic <lordpretzel@gmail.com>
Date: Mon, 14 Dec 2020 22:34:12 -0600
Subject: [PATCH] poly

---
 macros.tex    |  26 +++++++---
 poly-form.tex | 131 +++++++++++++++++++++++++++++---------------------
 2 files changed, 94 insertions(+), 63 deletions(-)

diff --git a/macros.tex b/macros.tex
index ab3510a..7738598 100644
--- a/macros.tex
+++ b/macros.tex
@@ -91,6 +91,7 @@
 \renewcommand{\algorithmicrequire}{\textbf{Input:}}
 \renewcommand{\algorithmicensure}{\textbf{Output:}}
 \newcommand{\smb}{\poly\left(\vct{X}\right)}%smb for standard monomial basis
+\newcommand{\smbOf}[1]{\textsc{SMB}(#1)}
 \newcommand{\etreeset}[1]{\vari{ET}\left(#1\right)}
 \newcommand{\expandtree}[1]{\vari{E}(#1)}
 \newcommand{\elist}[1]{\vari{List}\pbox{#1}}
@@ -335,18 +336,27 @@
 \newcommand{\sharpwonehard}{\#W[1]-hard\xspace}
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%
+% TERMINOLOGY AND ABBREVIATIONS
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \newcommand{\expectProblem}{\textsc{Expected Result Multiplicity Problem}\xspace}
+\newcommand{\termSMB}{standard monomial basis\xspace}
+\newcommand{\abbrSMB}{SMB\xspace}
+\newcommand{\termSOP}{sum of products\xspace}
+\newcommand{\abbrSOP}{SOP}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\newcommand{\eps}{\epsilon}
+\newcommand{\inparen}[1]{\left({#1}\right)}
+\newcommand{\inset}[1]{\left\{{#1}\right\}}
+
+
+%%%Adding stuff below so that long chain of display equatoons can be split across pages
+\allowdisplaybreaks
+
 
 %%% Local Variables:
 %%% mode: latex
 %%% TeX-master: "main"
 %%% End:
-
-%%%Adding stuff below so that long chain of display equatoons can be split across pages
-\allowdisplaybreaks
-
-\newcommand{\eps}{\epsilon}
-\newcommand{\inparen}[1]{\left({#1}\right)}
-\newcommand{\inset}[1]{\left\{{#1}\right\}}
diff --git a/poly-form.tex b/poly-form.tex
index 13c6a0b..15ca3ee 100644
--- a/poly-form.tex
+++ b/poly-form.tex
@@ -1,64 +1,93 @@
 %root: main.tex
 %!TEX root = ./main.tex
 %\onecolumn
-\subsection{Polynomial Formulation and Equivalences}
+\subsection{Reduced Polynomials and Equivalences}
 
-Since we have shown that computing the expected multiplicity of a result tuple is equivalent to computing the expectation of a polynomial (for that tuple) given a probability distribution over all possible assignments of variables in the polynomial to $\{0,1\}$, we from now on focus on this problem exclusively.
-Before proceeding, note that the following is assuming \bis (which subsume \tis as a special case). Thus, variables are independent of each other and each variable $X$ is associated with a probability $\vct{p}(X) = \pd[X = 1]$.
+Since we have shown that computing the expected multiplicity of a query result tuple is equivalent to computing the expectation of a polynomial (for that tuple) given a probability distribution over all possible assignments of variables in the polynomial to $\{0,1\}$, we from now on focus on this problem exclusively.
+We now introduce some basic terminology for polynomials and then develop a reduced normal form for polynomials that preserves a polynomial expectation for probability distributions that stems from \bis or \tis.
 Let us use the expression $(x + y)^2$ as a running example in this section.
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{Definition}[Monomial]\label{def:monomial}
-A monomial is a product of a set of variables, each raised to a non-negative integer power.
-\end{Definition}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% \begin{Definition}[Monomial]\label{def:monomial}
+% A monomial is a product of a set of variables, each raised to a non-negative integer power.
+% \end{Definition}
+% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-For instance, the term $2xy$ contains a single monomial $xy$. % \Cref{def:monomial} the monomial is $xy$.
+% For instance, the term $2xy$ contains a single monomial $xy$.
+% \Cref{def:monomial} the monomial is $xy$.
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Definition}[Standard Monomial Basis]\label{def:smb}
-  A polynomial is in standard monomial basis when it is of the form:
+A monomial is a product of a set of variables, each raised to a non-negative integer power.
+  A polynomial is in \termSMB (\abbrSMB) when it is of the form:
   \[
     \sum_{i=1}^n c_i \cdot m_i
   \]
-where each $c_i$ is a positive integer and each $m_i$ is a monomial and $m_i \neq m_j$ for $i \neq j$.
+where each $c_i$ is a positive integer and each $m_i$ is a monomial and $m_i \neq m_j$ for $i \neq j$. Given a polynomial $\poly$ we denote its \abbrSMB as $\smbOf{\poly}$.
 %  fully expanded out such that no product of sums exist and where each unique monomial appears exactly once.
 \end{Definition}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-The standard monomial basis for the running example is $x^2 +2xy + y^2$.  While $x^2 + xy + xy + y^2$ is an expanded form of the expression, it is not the standard monomial basis since $xy$ appears more than once.
-
-Throughout this paper, we also make the following \textit{assumption}.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{Assumption}\label{assump:poly-smb}
-All polynomials considered are in standard monomial basis, i.e., $\poly(\vct{X}) = \sum\limits_{\vct{d} \in \mathbb{N}^\numvar}q_d \cdot \prod\limits_{i = 1, d_i \geq 1}^{\numvar}X_i^{d_i}$, where $q_d$ is the coefficient for the monomial encoded in $\vct{d}$ and $d_i$ is the $i^{th}$ element of $\vct{d}$.
-\end{Assumption}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-While the definition of polynomial $\poly(\vct{X})$ over a $\bi$ input doesn't change, we introduce an alternative notation which will come in handy.  Given $\ell$ blocks, we write $\poly(\vct{X})$ = $\poly(X_{\block_1, 1},\ldots, X_{\block_1, \abs{\block_1}},$ $\ldots, X_{\block_\ell, \abs{\block_\ell}})$, where $\abs{\block_i}$ denotes the size of $\block_i$, and $\block_{i, j}$ denotes tuple $j$ residing in block $i$ for $j$ in $[\abs{\block_i}]$.
-The number of tuples in the $\bi$ instance can be (trivially) computed as $\numvar = \sum\limits_{i = 1}^{\ell}\abs{\block_i}$ .
+The \abbrSMB for the running example is $x^2 +2xy + y^2$.  While $x^2 + xy + xy + y^2$ is an expanded form of the expression, it is not the standard monomial basis since $xy$ appears more than once.
 
+\BG{Maybe inline degree?}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Definition}[Degree]\label{def:degree}
-The degree of polynomial $\poly(\vct{X})$ is the maximum sum of the exponents of a monomial, over all monomials when $\poly(\vct{X})$ is in SOP form.
+The degree of polynomial $\poly(\vct{X})$ is the maximum sum of the exponents of a monomial, over all monomials in $\smbOf{\poly(\vct{X})}$.
 \end{Definition}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-The degree of the running example is $2$.  In this paper we consider only finite degree polynomials.
+The degree of the running example polynomial is $2$. In this paper we consider only finite degree polynomials.
+
+% Throughout this paper, we also make the following \textit{assumption}.
+
+% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% \begin{Assumption}\label{assump:poly-smb}
+% All polynomials considered are in standard monomial basis, i.e., $\poly(\vct{X}) = \sum\limits_{\vct{d} \in \mathbb{N}^\numvar}q_d \cdot \prod\limits_{i = 1, d_i \geq 1}^{\numvar}X_i^{d_i}$, where $q_d$ is the coefficient for the monomial encoded in $\vct{d}$ and $d_i$ is the $i^{th}$ element of $\vct{d}$.
+% \end{Assumption}
+% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+We call a polynomial $\query(\vct{X})$ a \emph{\bi-lineage polynomial} (\emph{\ti-lineage polynomial}), if there exists an n-ary $\raPlus$ query $\query$, \bi $\pxdb$ (\ti $\pxdb$), and n-ary tuple $\tup$ such that $\query(\vct{X}) = \query(\pxdb)(\tup)$. % Before proceeding, note that the following is assume that polynomials are  \bis (which subsume \tis as a special case).
+Note the \tis are a special case of \bis and, thus, the following applies to \tis as well.
+Recall that in a \bi $\pdbx$ with tuples $t_1, \ldots, t_n$, each input tuple $t_i$ is annotated with a unique variable $X_i$. The tuples of $\pdbx$ are partitioned into $\ell$ blocks $\block_1, \ldots, \block_\ell$ and each tuple $t_i$ is associated with a probability $\vct{p}(\tup_i) = \pd[X_i = 1]$. Together with the assumption that blocks are assumed to be independent and tuples from the same block are disjoint events, $\vct{p}$ and the blocks induce a the probability distribution $\pd$ of $\pdbx$.
+We will write a \bi-lineage polynomial $\poly(\vct{X})$ for a \bi with $\ell$ blocks as
+$\poly(\vct{X})$ = $\poly(X_{\block_1, 1},\ldots, X_{\block_1, \abs{\block_1}},$ $\ldots, X_{\block_\ell, \abs{\block_\ell}})$, where $\abs{\block_i}$ denotes the size of $\block_i$, and $\block_{i, j}$ denotes tuple $j$ residing in block $i$ for $j$ in $[\abs{\block_i}]$.
+% and the probability distribution of $\pdbx$ is  uniquely determined based on a probability vector $\vct{p}$ that associates each tuple a probability
+% variables are independent of each other (or disjoint if they are from the same block) and each variable $X$ is associated with a probability $\vct{p}(X) = \pd[X = 1]$. Thus, we are dealing with polynomials $\poly(\vct{X})$ that are annotations of a tuple in the result of a query $\query$ over a BIDB $\pxdb$ where $\vct{X}$ is the set of variables that occur in annotations of tuples of $\pxdb$.
+
+% While the definition of polynomial $\poly(\vct{X})$ over a $\bi$ input doesn't change, we introduce an alternative notation which will come in handy.  Given $\ell$ blocks, we write $\poly(\vct{X})$ = $\poly(X_{\block_1, 1},\ldots, X_{\block_1, \abs{\block_1}},$ $\ldots, X_{\block_\ell, \abs{\block_\ell}})$, where $\abs{\block_i}$ denotes the size of $\block_i$, and $\block_{i, j}$ denotes tuple $j$ residing in block $i$ for $j$ in $[\abs{\block_i}]$.
+% The number of tuples in the $\bi$ instance can be (trivially) computed as $\numvar = \sum\limits_{i = 1}^{\ell}\abs{\block_i}$ .
+
+
+
+
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{Definition}[$\rpoly(\vct{X})$] \label{def:qtilde}
-Define $\rpoly(X_1,\ldots, X_\numvar)$ as the reduced version of $\poly(X_1,\ldots, X_\numvar)$, of the form
-$\rpoly(X_1,\ldots, X_\numvar) = $
-
-\[\poly(X_1,\ldots, X_\numvar) \mod X_1^2-X_1\cdots\mod X_\numvar^2 - X_\numvar.\]
+\begin{Definition}[Reduced \bi Polynomials]\label{def:reduced-bi-poly}
+  Let $\poly(\vct{X})$ be a \bi-lineage polynomial.
+  The reduced form $\rpoly(\vct{X})$ of $\poly(\vct{X})$ is defined as
+\begin{equation*}
+\rpoly(\vct{X}) = \smbOf{\poly(\vct{X})} \mod X_i^2 - X_i \mod X_{\block_s, t}X_{\block_s, u}
+\end{equation*}
+for all $i$ in $[\numvar]$ and for all $s$ in $\ell$, such that for all $t, u$ in $[\abs{block_s}]$, $t \neq u$.
 \end{Definition}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
+Intuitively, in the reduced form all exponents $e > 1$ are reduced to $e = 1$ and, all monomials containing more than one variable from the same block $\block$ are dropped. Note that for the special case of \tis, there is no dropping of monomials since every block contains a single tuple.
+Alternatively, one can think of $\rpoly$ as the \abbrSMB of $\poly(\vct{X})$ when the product operator is idempotent.
+
+% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% \begin{Definition}[$\rpoly(\vct{X})$] \label{def:qtilde}
+% Define $\rpoly(X_1,\ldots, X_\numvar)$ as the reduced version of $\poly(X_1,\ldots, X_\numvar)$, of the form
+% $\rpoly(X_1,\ldots, X_\numvar) = $
+
+% \[\poly(X_1,\ldots, X_\numvar) \mod X_1^2-X_1\cdots\mod X_\numvar^2 - X_\numvar.\]
+% \end{Definition}
+% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Example}\label{example:qtilde}
-Consider when $\poly(x, y) = (x + y)(x + y)$.  Then the expanded derivation for $\rpoly(x, y)$ is
+Consider $\poly(x, y) = (x + y)(x + y)$ where $x$ and $y$ are from different blocks.  Then the expanded derivation for $\rpoly(x, y)$ is
 \begin{align*}
 (&x^2 + 2xy + y^2 \mod x^2 - x) \mod y^2 - y\\
 = ~&x + 2xy + y^2 \mod y^2 - y\\
@@ -67,23 +96,12 @@ Consider when $\poly(x, y) = (x + y)(x + y)$.  Then the expanded derivation for
 \end{Example}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-Intuitively, $\rpoly(\textbf{X})$ is the SOP form of $\poly(\textbf{X})$ such that if any $X_j$ term  has an exponent $e > 1$, it is reduced to $1$, i.e. $X_j^e\mapsto X_j$ for any $e > 1$.
-Alternatively, one can gain intuition for $\rpoly$ by thinking of $\rpoly$ as the resulting SOP of $\poly(\vct{X})$ with an idemptent product operator.
+% Intuitively, $\rpoly(\textbf{X})$ is the \abbrSMB form of $\poly(\textbf{X})$ such that if any $X_j$ term  has an exponent $e > 1$, it is reduced to $1$, i.e. $X_j^e\mapsto X_j$ for any $e > 1$.
 
-When considering $\bi$ input, it becomes necessary to redefine $\rpoly(\vct{X})$.
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{Definition}[$\rpoly$ $\bi$ Redefinition]
-A polynomial $\poly(\vct{X})$ over a $\bi$ instance is reduced to $\rpoly(\vct{X})$ with the following criteria.  First, all exponents $e > 1$ are reduced to $e = 1$.  Second, all monomials sharing the same $\block$ are dropped.  Formally this is expressed as
+%When considering $\bi$ input, it becomes necessary to redefine $\rpoly(\vct{X})$.
 
-\begin{equation*}
-\rpoly(\vct{X}) = \poly(\vct{X}) \mod X_i^2 - X_i \mod X_{\block_s, t}X_{\block_s, u}
-\end{equation*}
-for all $i$ in $[\numvar]$ and for all $s$ in $\ell$, such that for all $t, u$ in $[\abs{block_s}]$, $t \neq u$.
-\end{Definition}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-The usefulness of this reduction will be seen in ~\cref{lem:exp-poly-rpoly}.
+The usefulness of this reduction become clear in \Cref{lem:exp-poly-rpoly}.
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Lemma}\label{lem:pre-poly-rpoly}
@@ -91,37 +109,40 @@ When $\poly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \{0,\ldots, B\}^\n
 \end{Lemma}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{proof}
-Follows by the construction of $\rpoly$ in \cref{def:qtilde}. \qed
+Follows by the construction of $\rpoly$ in \cref{def:reduced-bi-poly}. \qed
 \end{proof}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-
 Note the following fact:
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{Proposition}\label{proposition:q-qtilde}
-\[\text{For all } (X_1,\ldots, X_\numvar) \in \{0, 1\}^\numvar, \poly(X_1,\ldots, X_\numvar) = \rpoly(X_1,\ldots, X_\numvar).\]
+\begin{Proposition}\label{proposition:q-qtilde} For any \bi-lineage polynomial $\poly(X_1, \ldots, X_\numvar)$ and all $\vct{w} \in \{0,1\}^\numvar$,
+  \[
+    \poly(\vct{w}) = \rpoly(\vct{w}).
+    \]
 \end{Proposition}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{proof}[Proof for Proposition ~\ref{proposition:q-qtilde}]
-Note that any $\poly$ in factorized form is equivalent to its sum of product expansion.  For each term in the expanded form, further note that for all $b \in \{0, 1\}$ and all $e \geq 1$, $b^e = b$. \qed
+Note that any $\poly$ in factorized form is equivalent to its \abbrSMB expansion.  For each term in the expanded form, further note that for all $b \in \{0, 1\}$ and all $e \geq 1$, $b^e = b$. \qed
 \end{proof}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 
-Define all variables $X_i$ in $\poly$ to be independent.
+%Define all variables $X_i$ in $\poly$ to be independent.
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Lemma}\label{lem:exp-poly-rpoly}
-The expectation over possible worlds in $\poly(\vct{X})$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
+Let $\pxdb$ be a \bi over variables $\vct{X} = \{X_1, \ldots, X_\numvar\}$ and with probability distribution $\vct{p} = (\prob_1, \ldots, \prob_\numvar)$. For any \bi-lineage polynomial $\poly(\vct{X})$ we have
+  % The expectation over possible worlds in $\poly(\vct{X})$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
 \begin{equation*}
-\expct_{\vct{w}}\pbox{\poly(\vct{w})}  = \rpoly(\prob_1,\ldots, \prob_\numvar).
+\expct_{\vct{X}}\pbox{\poly(\vct{X})}  = \rpoly(\vct{p}).
 \end{equation*}
 \end{Lemma}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-Note that in the preceding lemma, we have assigned $\vct{p}$ (introduced in ~\cref{subsec:def-data}) to the variables $\vct{X}$.
+Note that in the preceding lemma, we have assigned $\vct{p}$ (introduced in \Cref{subsec:def-data}) to the variables $\vct{X}$. Intuitively, \Cref{lem:exp-poly-rpoly} states that when we replace each variable $X_i$ with its probability $\prob_i$ in the reduced form a \bi-lineage polynomial and evaluate the resulting expression in $\mathbb{R}$, then the result is the expectation of the polynomial.
 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{proof}[Proof for Lemma ~\ref{lem:exp-poly-rpoly}]
 %Using the fact above, we need to compute \[\sum_{(\wbit_1,\ldots, \wbit_\numvar) \in \{0, 1\}}\rpoly(\wbit_1,\ldots, \wbit_\numvar)\].  We therefore argue that
 %\[\sum_{(\wbit_1,\ldots, \wbit_\numvar) \in \{0, 1\}}\rpoly(\wbit_1,\ldots, \wbit_\numvar) = 2^\numvar \cdot \rpoly(\frac{1}{2},\ldots, \frac{1}{2}).\]
@@ -157,7 +178,7 @@ Finally, observe \cref{p1-s5} by construction in \cref{lem:pre-poly-rpoly}, that
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Corollary}\label{cor:expct-sop}
-If $\poly$ is given as a sum of monomials, the expectation of $\poly$, i.e., $\expct\pbox{\poly} = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ can be computed in $O(|\poly|)$, where $|\poly|$ denotes the total number of multiplication/addition operators.
+If $\poly$ is given as a sum of monomials, the expectation of $\poly$, i.e., $\expct\pbox{\poly} = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ can be computed in $O(|\poly|)$, where $|\poly|$ denotes the total number of multiplication/addition operators in $\poly$.
 \end{Corollary}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{proof}[Proof For Corollary ~\ref{cor:expct-sop}]