Changes/restructuring S 2.

2022-02-07 12:09:43 -05:00 · 2022-02-07 12:09:43 -05:00 · f54b54a7bc
parent 69d98d5947
commit f54b54a7bc
4 changed files with 57 additions and 36 deletions
--- a/macros.tex
+++ b/macros.tex
@ -227,6 +227,8 @@
 \newcommand{\numvar}{n}
 %Vector
 \newcommand{\vct}[1]{{\bf #1}}
+%norm
+\newcommand{\norm}[1]{\left\lVert#1\right\rVert}
 %using \wVec for world bit vector notation<-----Is this still the case?
 %Polynomial
 \newcommand{\poly}{\Phi}
--- a/poly-form.tex
+++ b/poly-form.tex
@ -1,37 +1,10 @@
 %root: main.tex
 %!TEX root = ./main.tex
 %\onecolumn
-\subsection{Reduced Polynomials and Equivalences}
-
-We now introduce some terminology 
-and develop a reduced form of lineage polynomials for a \abbrBIDB or \abbrTIDB.
-Note that a polynomial over $\vct{X}=(X_1,\dots,X_n)$ with individual degree $B <\infty$ 
-is formally defined as (where $c_{\vct{d}}\in \semN$): 
-\begin{equation}
-  \label{eq:sop-form}
-\poly\inparen{X_1,\dots,X_n}=\sum_{\vct{d}\in\{0,\ldots,B\}^n} c_{\vct{d}}\cdot \prod_{i=1}^n X_i^{d_i}.
-\end{equation}
-%where $c_{\vct{d}}\in \semN$.
+%\subsection{Reduced Polynomials and Equivalences}

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{Definition}[Standard Monomial Basis]\label{def:smb}
-The term $\prod_{i=1}^n X_i^{d_i}$ in \Cref{eq:sop-form} is a {\em monomial}. A polynomial $\poly\inparen{\vct{X}}$ is in standard monomial basis (\abbrSMB) when we keep only the terms with $c_{\vct{d}}\ne 0$ from \Cref{eq:sop-form}.
-\end{Definition}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-Unless othewise noted, we consider all polynomials to be in \abbrSMB representation. 
-When it is unclear, we use $\smbOf{\poly}$ to denote the \abbrSMB form of a polynomial $\poly$.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{Definition}[Degree]\label{def:degree-of-poly}
-The degree of polynomial $\poly(\vct{X})$ is the largest $\sum_{i=1}^n d_i$ such that $c_{(d_1,\dots,d_n)}\ne 0$. % maximum sum of exponents, over all monomials in $\smbOf{\poly(\vct{X})}$.
-\end{Definition}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-As an example, the degree of the polynomial $X^2+2XY^2+Y^2$ is $3$.
-Product terms in lineage arise only from join operations (\Cref{fig:nxDBSemantics}), so intuitively, the degree of a lineage polynomial is analogous to the largest number of joins needed to produce a result tuple.
-%in any clause of the $\raPlus$ query that created it.
-We call a polynomial $\poly\inparen{\vct{X}}$ a \emph{\bi-lineage polynomial} (resp., \emph{\ti-lineage polynomial}, or simply lineage polynomial), if there exists a $\raPlus$ query $\query$, \bi (\ti) $\pdb$, and result tuple $\tup$ such that $\poly\inparen{\vct{X}} = \apolyqdt\inparen{\vct{X}}.$
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
+\AH{\Cref{def:reduced-poly} replaces this.}
 \begin{Definition}[Reduced \bi Polynomials]\label{def:reduced-bi-poly}
  Let $\poly(\vct{X})$ be a \bi-lineage polynomial.
  The reduced form $\rpoly(\vct{X})$ of $\poly(\vct{X})$ is the same as \Cref{def:reduced-poly} with the added constraint that all monomials with variables $X_{\block, i}, X_{\block, j}, i\neq j$ from the same block $\block$ are omitted.
@ -55,6 +28,7 @@ Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blo


 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\AH{Reduction \emph{I think} combined with~\Cref{lem:tidb-reduce-poly} should replace this.  In fact~\Cref{lem:tidb-reduce-poly} works for also for \abbrBIDB\xplural, so, maybe still stating this, but that it follows from~\Cref{lem:tidb-reduce-poly}.}
 \begin{Lemma}\label{lem:exp-poly-rpoly}
 Let $\pdb$ be a \abbrBIDB over $\numvar$ input tuples such that the probability distribution $\pdassign$ over $\{0,1\}^\numvar$ (the all worlds set) is induced by the probability vector $\probAllTup = (\prob_1, \ldots, \prob_\numvar)$.  As in \Cref{lem:tidb-reduce-poly} for \abbrTIDB, any \abbrBIDB-lineage polynomial $\poly(\vct{X})$ based on $\pdb$ and query $\query$ we have:
  % The expectation over possible worlds in $\poly(\vct{X})$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
--- a/prob-def.tex
+++ b/prob-def.tex
@ -84,10 +84,9 @@ The circuit of \Cref{fig:circuit} is an element of $\circuitset{2X^2+3XY-2Y^2}$.

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \medskip
-\AH{We do not have a formal definition (other than the short sentence in the intro) stating or reminding the reader of what $\dbbase$ is.}
 \noindent We are now ready to formally state the final version of \Cref{prob:intro-stmt}.%our \textbf{main problem}.
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\AH{It might be useful/instructive to formally define $\pdassign$.}
+
 \begin{Definition}[The Expected Result Multiplicity Problem]\label{def:the-expected-multipl}
 Let $\pdb$ be an arbitrary \abbrBIDB-PDB and $\vct{X}$ be the set of variables annotating tuples in $\dbbase$.  Fix an $\raPlus$ query $\query$ and a result tuple $\tup$.
  The \expectProblem is defined as follows:\\[-7mm]
--- a/ra-to-poly.tex
+++ b/ra-to-poly.tex
@ -3,17 +3,57 @@
 %\onecolumn
 \section{Background and Notation}\label{sec:background}

+\subsection{Polynomial Definition and Terminology}
+%We now introduce some terminology 
+%and develop a reduced form of lineage polynomials for a \abbrBIDB or \abbrTIDB.
+%Note that 
+\secrev{A }
+ polynomial over $\vct{X}=(X_1,\dots,X_n)$ with individual degree $B <\infty$ 
+is formally defined as (where $c_{\vct{d}}\in \semN$): 
+\begin{equation}
+  \label{eq:sop-form}
+\poly\inparen{X_1,\dots,X_n}=\secrev{\sum_{\vct{d}\in\{0,\ldots,B\}^\tupset} c_{\vct{d}}\cdot \prod_{\tup\in\tupset} X_\tup^{d_\tup}.}
+\end{equation}
+%where $c_{\vct{d}}\in \semN$.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{Definition}[Standard Monomial Basis]\label{def:smb}
+The term $\prod_{\tup\in\tupset} X_\tup^{d_\tup}$ in \Cref{eq:sop-form} is a {\em monomial}. A polynomial $\poly\inparen{\vct{X}}$ is in standard monomial basis (\abbrSMB) when we keep only the terms with $c_{\vct{d}}\ne 0$ from \Cref{eq:sop-form}.
+\end{Definition}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+Unless othewise noted, we consider all polynomials to be in \abbrSMB representation. 
+When it is unclear, we use $\smbOf{\poly}$ to denote the \abbrSMB form of a polynomial $\poly$.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{Definition}[Degree]\label{def:degree-of-poly}
+The degree of polynomial $\poly(\vct{X})$ is the largest \secrev{$\norm{\vct{d}}_1$}% = \sum_{\tup\in\tupset} d_\tup$ 
+such that $c_{(d_1,\dots,d_n)}\ne 0$. % maximum sum of exponents, over all monomials in $\smbOf{\poly(\vct{X})}$.
+\end{Definition}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+As an example, the degree of the polynomial $X^2+2XY^2+Y^2$ is $3$.
+Product terms in lineage arise only from join operations (\Cref{fig:nxDBSemantics}), so intuitively, the degree of a lineage polynomial is analogous to the largest number of joins needed to produce a result tuple.
+%in any clause of the $\raPlus$ query that created it.
+\secrev{
+We call a polynomial $\poly\inparen{\vct{X}}$ a \emph{\abbrCTIDB-lineage polynomial} (%resp., \emph{\ti-lineage polynomial}, 
+or simply lineage polynomial), if there exists a $\raPlus$ query $\query$, \abbrCTIDB $\pdb$, and result tuple $\tup$ such that $\poly\inparen{\vct{X}} = \apolyqdt\inparen{\vct{X}}.$
+}
+
 \subsection{Probabilistic Databases}

 Following the typical representation of bags in production databases, for query inputs, we will use \abbrBPDB\xplural with multiplicities $\{0, 1\}$ (see \Cref{sec:gener-results-beyond} for more on this choice).
+
+\AH{Not sure that we need such a general \abbrPDB description, since we now only deal with \abbrCTIDB\xplural and $1$-\abbrBIDB\xplural.  I \emph{think} the discussion of possible world semantics is necessary for the definition of~\Cref{prop:expection-of-polynom}...unless of course this is rewritten in a way that does not necessitate such discussion.}
 % and a unique tuple-id field to allow duplicate tuples.

 An \textit{incomplete database} $\idb$ is a set of deterministic databases $\db$ called possible worlds.
-A \textit{probabilistic database} $\pdb$ is a pair $(\idb, \pd)$ where $\idb$ is an incomplete database and $\pd$ is a probability distribution over $\idb$. Queries over probabilistic databases are evaluated using the so-called possible world semantics. Under the possible world semantics, the result of a query $\query$ over an incomplete database $\idb$ is the set of query answers produced by evaluating $\query$ over each possible world: $\query(\idb) = \comprehension{\query(\db)}{\db \in \idb}$.
-
-For a probabilistic  database $\pdb = (\idb, \pd)$,  the result of a query is the pair $(\query(\idb), \pd')$ where $\pd'$ is a probability distribution over $\query(\idb)$  that assigns to each possible query result the sum of the probabilities of the worlds that produce this answer:
+\secrev{
+A \abbrCTIDB $\pdb$ is a pair $\inparen{\worlds, \bpd}$ such that $\worlds$ is an incomplete database and $\bpd$ is a probability distribution over $\worlds$.  Queries over probabilistic databases (and thus \abbrCTIDB\xplural) are evaluated using the so-called possible world semantics.  Under the possible world semantics, the result of a query $\query$ over an incomplete database $\worlds$ is the set of query answers produced by evaluating $\query$ over each possible world $\worldvec\in\worlds$: $\inset{\query\inparen{\worldvec}: \worldvec\in\worlds}$.

+The result of a query is the pair $\inparen{\query\inparen{\worlds}, \bpd'}$ where $\bpd'$ is a probability distribution that assigns to each possible query result the sum of the probabilites of the worlds that produce this answer: $\probOf\pbox{\worldvec\in\worlds} = \sum\limits_{\substack{\worldvec'\in\worlds,\\\query\inparen{\worldvec'}=\query\inparen{\worldvec}}}\probOf\pbox{\worldvec'}$.
+}
+\AH{Move~\Cref{prop:expection-of-polynom} to after/at the end of~\Cref{subsec:tidbs-and-bidbs}.}
 Recall \Cref{fig:nxDBSemantics} which defines the lineage polynomial $\apolyqdt$ for any $\raPlus$ query.  We now make a meaningful connection between possible world semantics and world assignments on the lineage polynomial.
+\AH{Wondering if this proposition should be rewritten and proved in the setting of \abbrCTIDB and $1$-\abbrBIDB.  The proof is (I think) \emph{trivial} for \abbrCTIDB since all world are possible worlds and by definition we have a mapping between both.  May need to say something about the base polynomials and semantics of query evaluation on the polynomials.}

 \begin{Proposition}[Expectation of polynomials]\label{prop:expection-of-polynom}
 Given a \abbrBPDB $\pdb = (\idb,\pd)$, $\raPlus$ query $\query$, and lineage polynomial $\apolyqdt$ for arbitrary result tuple $\tup$, %$\semNX$-\abbrPDB $\pxdb = (\idb_{\semNX}',\pd')$ where $\rmod(\pxdb) = \pdb$,
@ -35,8 +75,14 @@ the tuples in $\dbbase$ can be partitioned into a set of $\ell$ blocks such that
 }
 Each tuple $\tup_{i, j}$ is annotated with a random variable $\randWorld_{i, j} \in \{0, 1\}$ denoting its presence in a possible world $\db$.  The probability distribution $\pd$ over $\dbbase$ is the one induced from individual tuple probabilities $\prob_{i, j}\in \vct{\prob}=\inparen{\prob_{1, 1},\ldots,\prob_{\abs{\block},\ldots,\abs{\block_{\abs{\block}}}}}$ (where $\forall i$, $\sum_j p_{i,j}\le 1$) and the conditions on the blocks.  A \abbrTIDB is a \abbrBIDB where each block has size exactly $1$.

-Instead of looking only at the possible worlds of $\pdb$, one can consider all worlds, including those that cannot exist due to disjointness.  Then all worlds set can be modeled by $\vct{\randWorld}\in \{0, 1\}^\numvar$,\footnote{Here and later, especially in \Cref{sec:algo}, we will rename the variables as $X_1,\dots,X_n$, where $n=\sum_{i=1}^\ell \abs{b_i}$.} such that $\randWorld_k \in \vct{\randWorld}$ represents the presence of $\tup_{i, j}$ (where $k = \sum_{\ell = 1}^{i - 1} \abs{b_\ell} + j$).  We denote a probability distribution over all $\vct{\randWorld} \in \{0, 1\}^\numvar$ as $\pdassign$.  When $\pdassign$ is the one induced from each $\prob_{i, j}$ while assigning $\probOf\pbox{\vct{\randWorld}} = 0$ for any $\vct{\randWorld}$ with $\randWorld_{i, j} = \randWorld_{i, k} = 1$ for any block $i$ and $j\neq k$, we end up with a bijective mapping from $\pd$ to $\pdassign$, such that each mapping is equivalent, implying the distributions are equivalent.
-\Cref{subsec:supp-mat-ti-bi-def} has more details. % explains \abbrTIDB\xplural and \abbrBIDB\xplural in greater detail.
+\AH{Need to check if changing the notation to $\prob_{\tup, j}\in\vct{\prob}=\inparen{\prob_{\tup, j}}_{\tup\in\tupset, j\in\pbox{\bound}}$.}
+
+Instead of looking only at the possible worlds of $\pdb$, one can consider all worlds, including those that cannot exist due to disjointness.  Then all worlds set can be modeled by $\vct{\randWorld}\in \{0, 1\}^\numvar$
+\AH{We can use the new notation $\vct{W}\in\inset{0, 1}^\tupset$ here.}
+,\footnote{Here and later, especially in \Cref{sec:algo}, we will rename the variables as $X_1,\dots,X_n$, where $n=\sum_{i=1}^\ell \abs{b_i}$.} such that $\randWorld_k \in \vct{\randWorld}$ represents the presence of $\tup_{i, j}$ (where $k = \sum_{\ell = 1}^{i - 1} \abs{b_\ell} + j$).  We denote a probability distribution over all $\vct{\randWorld} \in \{0, 1\}^\numvar$ as $\pdassign$.  When $\pdassign$ is the one induced from each $\prob_{i, j}$ while assigning $\probOf\pbox{\vct{\randWorld}} = 0$ for any $\vct{\randWorld}$ with $\randWorld_{i, j} = \randWorld_{i, k} = 1$ for any block $i$ and $j\neq k$, we end up with a bijective mapping from $\pd$ to $\pdassign$, such that each mapping is equivalent, implying the distributions are equivalent.
+\Cref{subsec:supp-mat-ti-bi-def} has more details. 
+
+\AH{Above, we need to use new notation of $\bpd$ instead of $\pd$, and we can use $\bpd'$ for the mapping discussion and note that $\bpd\equiv\bpd'$.}% explains \abbrTIDB\xplural and \abbrBIDB\xplural in greater detail.


 %%% Local Variables: