Pulled more S2 material into the appendix

2020-12-19 15:04:14 -05:00 · 2020-12-19 15:04:14 -05:00 · f04846020d
parent 5263bde6ac
commit f04846020d
5 changed files with 47 additions and 36 deletions
--- a/hardness-app.tex
+++ b/hardness-app.tex
@ -46,8 +46,7 @@ $\semNX$-PDBs are a complete representation system for $\semN$-PDBs that is clos
 \end{Proposition}

 \subsection{Proof of~\Cref{prop:semnx-pdbs-are-a-}} 
-\AH{I made small changes to the proof, noteably the summation, the variable definition and the world subscript, the latter of which I am not sure if it is the best notation or not.}
-	
+
  To prove that $\semNX$-PDBs are complete consider the following construction that for any $\semN$-PDB $\pdb = (\idb, \pd)$ produces an $\semNX$-PDB $\pxdb = (\db, \pd')$  such that $\rmod(\pxdb) = \pdb$. Let $\idb = \{D_1, \ldots, D_{\abs{\idb}}\}$ and let $max(D_i)$ denote $max_{\tup} D_i(\tup)$. For each world $D_i$ we create a corresponding variable $X_i$.
 %variables $X_{i1}$, \ldots, $X_{im}$ where $m = max(D_i)$.  
 In $\db$ we assign each tuple $\tup$ the polynomial:
@ -94,10 +93,33 @@ Note that the main difference to the standard definitions of \tis and \bis is th
 A well-known result for set semantics PDBs is that while not all finite PDBs can be encoded as \tis, any finite PDB can be encoded using a \ti and a query. An analog result holds in our case: any finite $\semN$-PDB can be encoded as a bag \ti and a query (WHAT CLASS? ADD PROOF)
 }

+\subsection{~\Cref{lem:pre-poly-rpoly}}\label{app:subsec-pre-poly-rpoly}
+\begin{Lemma}\label{lem:pre-poly-rpoly}
+If
+$\poly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}} \cdot \prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i^{d_i}$
+then
+$\rpoly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \eta} q_{\vct{d}}\cdot\prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i$% \;\;\;  for some $\eta \subseteq \{0,\ldots, B\}^\numvar$
+\end{Lemma}

+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{proof}[Proof for~\Cref{lem:pre-poly-rpoly}]
+Follows by the construction of $\rpoly$ in \cref{def:reduced-bi-poly}. \qed
+\end{proof}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-\subsection{Proof for Proposition ~\ref{proposition:q-qtilde}}
+\subsection{Proposition ~\ref{proposition:q-qtilde}}\label{app:subsec-prop-q-qtilde}
+\noindent Note the following fact:
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{Proposition}\label{proposition:q-qtilde} For any \bi-lineage polynomial $\poly(X_1, \ldots, X_\numvar)$ and all $\vct{w} \in \eta$, it holds that 
+$%  \[
+    \poly(\vct{w}) = \rpoly(\vct{w}).
+$%    \]
+\end{Proposition}
+
+\begin{proof}[Proof for~\Cref{proposition:q-qtilde}]
 Note that any $\poly$ in factorized form is equivalent to its \abbrSMB expansion.  For each term in the expanded form, further note that for all $b \in \{0, 1\}$ and all $e \geq 1$, $b^e = b$. \qed
+\end{proof}



@ -108,10 +130,10 @@ Let $\poly$ be the generalized polynomial, i.e., the polynomial of $\numvar$ var

 Then, assigning $\vct{w}$ to $\vct{X}$, for expectation we have
 \begin{align}
-\expct_{\vct{w}}\pbox{\poly(\vct{w})} &= \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \expct_{\vct{w}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar w_i^{d_i}}\label{p1-s1}\\
-&= \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i^{d_i}}\label{p1-s2}\\
-&= \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i}\label{p1-s3}\\
-&= \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\
+\expct_{\vct{w}}\pbox{\poly(\vct{w})} &= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \expct_{\vct{w}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar w_i^{d_i}}\label{p1-s1}\\
+&= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i^{d_i}}\label{p1-s2}\\
+&= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i}\label{p1-s3}\\
+&= \sum_{\vct{d} \in \eta}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\
 &= \rpoly(\prob_1,\ldots, \prob_\numvar)\label{p1-s5}
 \end{align}

@ -221,7 +243,7 @@ When $\eset{1} \equiv \oneint$, the inner edges $(e_i, 1)$ of $\eset{2}$ are all
 	\item $3$-path ($\threepath$)
 \end{itemize}
 When $\eset{1} \equiv\threepath$ it is the case that all edges beginning with $e_1$ and ending with $e_3$ are successively connected.  This means that the edges of $\eset{2}$ form a $6$-path in the edges of $f_2^{-1}(\eset{1})$, where all edges from $(e_1, 0),\ldots,(e_3, 1)$ are successively connected.  For a $3$-matching to exist in $f_2^{-1}(\eset{1})$, we cannot pick both $(e_i,0)$ and $(e_i,1)$. % there must be at least one edge separating edges picked from a sequence. 
- There are four such possibilities: $\pbrace{(e_1, 0), (e_2, 0), (e_3, 0)}, \pbrace{(e_1, 0), (e_2, 0), (e_3, 1)},  \pbrace{(e_1, 0), (e_2, 1), (e_3, 1)},$\newline $\pbrace{(e_1, 1), (e_2, 1),  (e_3, 1)}$ . Thus, there are four possible 3-matchings in $f_2^{-1}(\eset{1})$.
+ There are four such possibilities: $\pbrace{(e_1, 0), (e_2, 0), (e_3, 0)}$, $\pbrace{(e_1, 0), (e_2, 0), (e_3, 1)}$, $\pbrace{(e_1, 0), (e_2, 1), (e_3, 1)},$ $\pbrace{(e_1, 1), (e_2, 1),  (e_3, 1)}$ . Thus, there are four possible 3-matchings in $f_2^{-1}(\eset{1})$.

 \begin{itemize}
 	\item Triangle ($\tri$)
@ -362,8 +384,10 @@ Consider now the random variables $\randvar_1,\dots,\randvar_\numvar$, where eac
 \[Y_i= \onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \prod_{X_i\in \var\inparen{v}} p_i,\]
 where the indicator variable handles the check in~\Cref{alg:check-duplicate-block} 
 Then for random variable $\randvar_i$, it is the case that
-
-\[\expct\pbox{\randvar_i} = \sum\limits_{(\monom, \coef) \in \expandtree{\etree} }\frac{\onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot c\cdot\prod_{X_i\in \var\inparen{v}} p_i }{\abs{\etree}(1,\dots,1)} = \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)},\]
+\begin{align*}
+\expct\pbox{\randvar_i} &= \sum\limits_{(\monom, \coef) \in \expandtree{\etree} }\frac{\onesymbol\inparen{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot c\cdot\prod_{X_i\in \var\inparen{v}} p_i }{\abs{\etree}(1,\dots,1)} \\
+&= \frac{\rpoly(\prob_1,\ldots, \prob_\numvar)}{\abs{\etree}(1,\ldots, 1)},
+\end{align*}
 where in the first equality we use the fact that $\vari{sgn}_{\vari{i}}\cdot \abs{\coef}=\coef$ and the second equality follows from~\cref{eq:tilde-Q-bi} with $X_i$ substituted by $\prob_i$.

 Let $\empmean = \frac{1}{\samplesize}\sum_{i = 1}^{\samplesize}\randvar_i$.  It is also true that
@ -666,8 +690,6 @@ The circuit for $Q$ has at most $|V_{Q_1}|+|{Q_1}|$ vertices.
 \intertext{(By definition  of $\qruntime{Q}$)}
 & \le (k-1)\qruntime{Q}.
 \end{align*}
-\AH{In the inductive step above, where does $\abs{\poly_1}$ come from?  I understand that $b_i$ is part of the inductive hypothesis, but, is it \emph{legal/justifiable} to just throw in \emph{any} constant we so desire?}
-
 \caseheading{Union}
 Assume that $Q = Q_1 \cup Q_2$.
 The circuit for $Q$ has $|V_{Q_1}|+|V_{Q_2}|+|{Q_1} \cap {Q_2}|$ vertices.
--- a/main.tex
+++ b/main.tex
@ -21,7 +21,7 @@
 \usepackage[normalem]{ulem}
 \usepackage{subcaption}
 \usepackage{booktabs}
-\usepackage[disable]{todonotes}
+\usepackage{todonotes}
 \usepackage{graphicx}
 \usepackage{listings}
 %%%%%%%%%% SQL + proveannce listing settings
--- a/poly-form.tex
+++ b/poly-form.tex
@ -122,27 +122,16 @@ Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blo
 \noindent The usefulness of this will reduction become clear in \Cref{lem:exp-poly-rpoly}.
 %
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{Lemma}\label{lem:pre-poly-rpoly}
-If
-$\poly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \{0,\ldots, B\}^\numvar}q_{\vct{d}} \cdot \prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i^{d_i}$
-then
-$\rpoly(X_1,\ldots, X_\numvar) = \sum\limits_{\vct{d} \in \eta} q_{\vct{d}}\cdot\prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numvar}X_i$ \;\;\;  for some $\eta \subseteq \{0,\ldots, B\}^\numvar$
-\end{Lemma}
+\begin{Definition}[Valid Worlds]
+For probability distribution $\vct{P}$ and its corresponding PMF $P$, the set of valid worlds $\eta$ corresponds to all worlds that have a probability value greater than $0$, formally, for random variable $\vct{W}$
+\[
+\eta = \{\vct{w}\st P[\vct{W} = \vct{w}] > 0\}
+\]
+\end{Definition}

-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{proof}
-Follows by the construction of $\rpoly$ in \cref{def:reduced-bi-poly}. \qed
-\end{proof}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+We state additional equivalences between $\poly(\vct{X})$ and $\rpoly(\vct{X})$ in~\Cref{app:subsec-pre-poly-rpoly} and~\Cref{app:subsec-prop-q-qtilde}.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-\noindent Note the following fact:
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{Proposition}\label{proposition:q-qtilde} For any \bi-lineage polynomial $\poly(X_1, \ldots, X_\numvar)$ and all $\vct{w} \in \{0,1\}^\numvar$, it holds that 
-$%  \[
-    \poly(\vct{w}) = \rpoly(\vct{w}).
-$%    \]
-\end{Proposition}


 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -154,10 +143,10 @@ $%    \]

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Lemma}\label{lem:exp-poly-rpoly}
-Let $\pxdb$ be a \bi over variables $\vct{X} = \{X_1, \ldots, X_\numvar\}$ and with probability distribution $\vct{p} = (\prob_1, \ldots, \prob_\numvar)$. For any \bi-lineage polynomial $\poly(\vct{X})$ based on $\pxdb$ and some query $\query$ we have
+Let $\pxdb$ be a \bi over variables $\vct{X} = \{X_1, \ldots, X_\numvar\}$ and with probability distribution $\vct{p} = (\prob_1, \ldots, \prob_\numvar)$ over all $\vct{w}$ in $\eta$. For any \bi-lineage polynomial $\poly(\vct{X})$ based on $\pxdb$ and some query $\query$ we have
  % The expectation over possible worlds in $\poly(\vct{X})$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numvar)$.
 \begin{equation*}
-\expct_{\vct{W}}\pbox{\poly(\vct{W})}  = \rpoly(\vct{p}).
+\expct_{\vct{w}\sim \vct{p}}\pbox{\poly(\vct{W})}  = \rpoly(\vct{p}).
 \end{equation*}
 \end{Lemma}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
--- a/ra-to-poly.tex
+++ b/ra-to-poly.tex
@ -134,7 +134,7 @@ We ignore the remaining fields (\vari{partial} and \vari{weight}) until \Cref{se
 For our running example, $\etreeset{\smb} = \{2X^2 + 3XY - 2Y^2, (X + 2Y)(2X - Y), X(2X - Y) + 2Y(2X - Y), 2X(X + 2Y) - Y(X + 2Y)\}$.  Note that \cref{def:express-tree-set} implies that $\etree \in \etreeset{poly(\etree)}$.

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\medskip
+%\medskip
 \noindent We are now ready to formally state our \textbf{main problem}.

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
--- a/single_p.tex
+++ b/single_p.tex
@ -89,7 +89,7 @@ For any $p$, we have:
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{proof}%[Proof of \Cref{lem:qE3-exp}]
+\begin{proof}[Proof of \Cref{lem:qE3-exp}]
 By definition we have that
 		\[\poly_{G}^3(\vct{X}) = \sum_{\substack{(i_1, j_1), (i_2, j_2), (i_3, j_3) \in E}}~\; \prod_{\ell = 1}^{3}X_{i_\ell}X_{j_\ell}.\]
 Hence $\rpoly_{G}^3(\vct{X})$ has degree six. Note that the monomial $\prod_{\ell = 1}^{3}X_{i_\ell}X_{j_\ell}$ will contribute to the coefficient of $p^\nu$ in $\rpoly_{G}^3(\vct{X})$, where $\nu$ is the number of distinct variables in the monomial.