Pulled more S2 material into the appendix

master
Aaron Huber 2020-12-19 15:04:14 -05:00
parent 5263bde6ac
commit f04846020d
5 changed files with 47 additions and 36 deletions

View File

@ -46,8 +46,7 @@ $\semNX$-PDBs are a complete representation system for $\semN$-PDBs that is clos
\end{Proposition}
\subsection{Proof of~\Cref{prop:semnx-pdbs-are-a-}}
\AH{I made small changes to the proof, noteably the summation, the variable definition and the world subscript, the latter of which I am not sure if it is the best notation or not.}
To prove that $\semNX$-PDBs are complete consider the following construction that for any $\semN$-PDB $\pdb = (\idb, \pd)$ produces an $\semNX$-PDB $\pxdb = (\db, \pd')$ such that $\rmod(\pxdb) = \pdb$. Let $\idb = \{D_1, \ldots, D_{\abs{\idb}}\}$ and let $max(D_i)$ denote $max_{\tup} D_i(\tup)$. For each world $D_i$ we create a corresponding variable $X_i$.
%variables $X_{i1}$, \ldots, $X_{im}$ where $m = max(D_i)$.
In $\db$ we assign each tuple $\tup$ the polynomial:
@ -94,10 +93,33 @@ Note that the main difference to the standard definitions of \tis and \bis is th
A well-known result for set semantics PDBs is that while not all finite PDBs can be encoded as \tis, any finite PDB can be encoded using a \ti and a query. An analog result holds in our case: any finite $\semN$-PDB can be encoded as a bag \ti and a query (WHAT CLASS? ADD PROOF)
}
\subsection{~\Cref{lem:pre-poly-rpoly}}\label{app:subsec-pre-poly-rpoly}
\begin{Lemma}\label{lem:pre-poly-rpoly}
If
$\poly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}} \cdot \prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i^{d_i}$
then
$\rpoly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \eta} q_{\vct{d}}\cdot\prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i$% \;\;\; for some $\eta \subseteq \{0,\ldots, B\}^\numvar$
\end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{proof}[Proof for~\Cref{lem:pre-poly-rpoly}]
Follows by the construction of $\rpoly$ in \cref{def:reduced-bi-poly}. \qed
\end{proof}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Proof for Proposition ~\ref{proposition:q-qtilde}}
\subsection{Proposition ~\ref{proposition:q-qtilde}}\label{app:subsec-prop-q-qtilde}
\noindent Note the following fact:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Proposition}\label{proposition:q-qtilde} For any \bi-lineage polynomial $\poly(X_1, \ldots, X_\numvar)$ and all $\vct{w} \in \eta$, it holds that
$% \[
\poly(\vct{w}) = \rpoly(\vct{w}).
$% \]
\end{Proposition}
\begin{proof}[Proof for~\Cref{proposition:q-qtilde}]
Note that any $\poly$ in factorized form is equivalent to its \abbrSMB expansion. For each term in the expanded form, further note that for all $b \in \{0, 1\}$ and all $e \geq 1$, $b^e = b$. \qed
\end{proof}
@ -108,10 +130,10 @@ Let $\poly$ be the generalized polynomial, i.e., the polynomial of $\numvar$ var
Then, assigning $\vct{w}$ to $\vct{X}$, for expectation we have
\begin{align}
\expct_{\vct{w}}\pbox{\poly(\vct{w})} &= \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \expct_{\vct{w}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar w_i^{d_i}}\label{p1-s1}\\
&= \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i^{d_i}}\label{p1-s2}\\
&= \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i}\label{p1-s3}\\
&= \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\
\expct_{\vct{w}}\pbox{\poly(\vct{w})} &= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \expct_{\vct{w}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar w_i^{d_i}}\label{p1-s1}\\
&= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i^{d_i}}\label{p1-s2}\\
&= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i}\label{p1-s3}\\
&= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\
&= \rpoly(\prob_1,\ldots, \prob_\numvar)\label{p1-s5}
\end{align}
@ -221,7 +243,7 @@ When $\eset{1} \equiv \oneint$, the inner edges $(e_i, 1)$ of $\eset{2}$ are all
\item $3$-path ($\threepath$)
\end{itemize}
When $\eset{1} \equiv\threepath$ it is the case that all edges beginning with $e_1$ and ending with $e_3$ are successively connected. This means that the edges of $\eset{2}$ form a $6$-path in the edges of $f_2^{-1}(\eset{1})$, where all edges from $(e_1, 0),\ldots,(e_3, 1)$ are successively connected. For a $3$-matching to exist in $f_2^{-1}(\eset{1})$, we cannot pick both $(e_i,0)$ and $(e_i,1)$. % there must be at least one edge separating edges picked from a sequence.
There are four such possibilities: $\pbrace{(e_1, 0), (e_2, 0), (e_3, 0)}, \pbrace{(e_1, 0), (e_2, 0), (e_3, 1)}, \pbrace{(e_1, 0), (e_2, 1), (e_3, 1)},$\newline $\pbrace{(e_1, 1), (e_2, 1), (e_3, 1)}$ . Thus, there are four possible 3-matchings in $f_2^{-1}(\eset{1})$.
There are four such possibilities: $\pbrace{(e_1, 0), (e_2, 0), (e_3, 0)}$, $\pbrace{(e_1, 0), (e_2, 0), (e_3, 1)}$, $\pbrace{(e_1, 0), (e_2, 1), (e_3, 1)},$ $\pbrace{(e_1, 1), (e_2, 1), (e_3, 1)}$ . Thus, there are four possible 3-matchings in $f_2^{-1}(\eset{1})$.
\begin{itemize}
\item Triangle ($\tri$)
@ -362,8 +384,10 @@ Consider now the random variables $\randvar_1,\dots,\randvar_\numvar$, where eac
\[Y_i= \onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \prod_{X_i\in \var\inparen{v}} p_i,\]
where the indicator variable handles the check in~\Cref{alg:check-duplicate-block}
Then for random variable $\randvar_i$, it is the case that
\[\expct\pbox{\randvar_i} = \sum\limits_{(\monom, \coef) \in \expandtree{\etree} }\frac{\onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot c\cdot\prod_{X_i\in \var\inparen{v}} p_i }{\abs{\etree}(1,\dots,1)} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)},\]
\begin{align*}
\expct\pbox{\randvar_i} &= \sum\limits_{(\monom, \coef) \in \expandtree{\etree} }\frac{\onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot c\cdot\prod_{X_i\in \var\inparen{v}} p_i }{\abs{\etree}(1,\dots,1)} \\
&= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)},
\end{align*}
where in the first equality we use the fact that $\vari{sgn}_{\vari{i}}\cdot \abs{\coef}=\coef$ and the second equality follows from~\cref{eq:tilde-Q-bi} with $X_i$ substituted by $\prob_i$.
Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$. It is also true that
@ -666,8 +690,6 @@ The circuit for $Q$ has at most $|V_{Q_1}|+|{Q_1}|$ vertices.
\intertext{(By definition of $\qruntime{Q}$)}
& \le (k-1)\qruntime{Q}.
\end{align*}
\AH{In the inductive step above, where does $\abs{\poly_1}$ come from? I understand that $b_i$ is part of the inductive hypothesis, but, is it \emph{legal/justifiable} to just throw in \emph{any} constant we so desire?}
\caseheading{Union}
Assume that $Q = Q_1 \cup Q_2$.
The circuit for $Q$ has $|V_{Q_1}|+|V_{Q_2}|+|{Q_1} \cap {Q_2}|$ vertices.

View File

@ -21,7 +21,7 @@
\usepackage[normalem]{ulem}
\usepackage{subcaption}
\usepackage{booktabs}
\usepackage[disable]{todonotes}
\usepackage{todonotes}
\usepackage{graphicx}
\usepackage{listings}
%%%%%%%%%% SQL + proveannce listing settings

View File

@ -122,27 +122,16 @@ Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blo
\noindent The usefulness of this will reduction become clear in \Cref{lem:exp-poly-rpoly}.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Lemma}\label{lem:pre-poly-rpoly}
If
$\poly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}} \cdot \prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i^{d_i}$
then
$\rpoly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \eta} q_{\vct{d}}\cdot\prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i$ \;\;\; for some $\eta \subseteq \{0,\ldots, B\}^\numvar$
\end{Lemma}
\begin{Definition}[Valid Worlds]
For probability distribution $\vct{P}$ and its corresponding PMF $P$, the set of valid worlds $\eta$ corresponds to all worlds that have a probability value greater than $0$, formally, for random variable $\vct{W}$
\[
\eta = \{\vct{w}\st P[\vct{W} = \vct{w}] > 0\}
\]
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{proof}
Follows by the construction of $\rpoly$ in \cref{def:reduced-bi-poly}. \qed
\end{proof}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We state additional equivalences between $\poly(\vct{X})$ and $\rpoly(\vct{X})$ in~\Cref{app:subsec-pre-poly-rpoly} and~\Cref{app:subsec-prop-q-qtilde}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\noindent Note the following fact:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Proposition}\label{proposition:q-qtilde} For any \bi-lineage polynomial $\poly(X_1, \ldots, X_\numvar)$ and all $\vct{w} \in \{0,1\}^\numvar$, it holds that
$% \[
\poly(\vct{w}) = \rpoly(\vct{w}).
$% \]
\end{Proposition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -154,10 +143,10 @@ $% \]
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Lemma}\label{lem:exp-poly-rpoly}
Let $\pxdb$ be a \bi over variables $\vct{X} = \{X_1, \ldots, X_\numvar\}$ and with probability distribution $\vct{p} = (\prob_1, \ldots, \prob_\numvar)$. For any \bi-lineage polynomial $\poly(\vct{X})$ based on $\pxdb$ and some query $\query$ we have
Let $\pxdb$ be a \bi over variables $\vct{X} = \{X_1, \ldots, X_\numvar\}$ and with probability distribution $\vct{p} = (\prob_1, \ldots, \prob_\numvar)$ over all $\vct{w}$ in $\eta$. For any \bi-lineage polynomial $\poly(\vct{X})$ based on $\pxdb$ and some query $\query$ we have
% The expectation over possible worlds in $\poly(\vct{X})$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
\begin{equation*}
\expct_{\vct{W}}\pbox{\poly(\vct{W})} = \rpoly(\vct{p}).
\expct_{\vct{w}\sim \vct{p}}\pbox{\poly(\vct{W})} = \rpoly(\vct{p}).
\end{equation*}
\end{Lemma}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

View File

@ -134,7 +134,7 @@ We ignore the remaining fields (\vari{partial} and \vari{weight}) until \Cref{se
For our running example, $\etreeset{\smb} = \{2X^2 + 3XY - 2Y^2, (X + 2Y)(2X - Y), X(2X - Y) + 2Y(2X - Y), 2X(X + 2Y) - Y(X + 2Y)\}$. Note that \cref{def:express-tree-set} implies that $\etree \in \etreeset{poly(\etree)}$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\medskip
%\medskip
\noindent We are now ready to formally state our \textbf{main problem}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

View File

@ -89,7 +89,7 @@ For any $p$, we have:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{proof}%[Proof of \Cref{lem:qE3-exp}]
\begin{proof}[Proof of \Cref{lem:qE3-exp}]
By definition we have that
\[\poly_{G}^3(\vct{X}) = \sum_{\substack{(i_1, j_1), (i_2, j_2), (i_3, j_3) \in E}}~\; \prod_{\ell = 1}^{3}X_{i_\ell}X_{j_\ell}.\]
Hence $\rpoly_{G}^3(\vct{X})$ has degree six. Note that the monomial $\prod_{\ell = 1}^{3}X_{i_\ell}X_{j_\ell}$ will contribute to the coefficient of $p^\nu$ in $\rpoly_{G}^3(\vct{X})$, where $\nu$ is the number of distinct variables in the monomial.