From 59d3e72b00f0e1a4acb2e6923bf4eb9ab887e908 Mon Sep 17 00:00:00 2001
From: Aaron Huber <ahuber@buffalo.edu>
Date: Tue, 8 Mar 2022 07:45:19 -0500
Subject: [PATCH] Changes from yesterday 030722.

---
 intro-rewrite-070921.tex | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/intro-rewrite-070921.tex b/intro-rewrite-070921.tex
index 9b66eba..6843556 100644
--- a/intro-rewrite-070921.tex
+++ b/intro-rewrite-070921.tex
@@ -335,34 +335,40 @@ can be stated as the following stronger (since~\Cref{prob:big-o-joint-steps} has
 Given one circuit $\circuit$ that encodes $\apolyqdt$ for all result tuples $\tup$ (one sink per $\tup$) for \abbrCTIDB $\pdb$ and $\raPlus$ query $\query$, does there exist an algorithm that computes a $(1\pm\epsilon)$-approximation of $\expct_{\rvworld\sim\bpd}\pbox{\query\inparen{\rvworld}\inparen{\tup}}$ (for all result tuples $\tup$) in $\bigO{|\circuit|}$ time?
 \end{Problem}
 
-For an upper bound on approximating the expected count, it is easy to check that if all the probabilties are constant then $\poly\left(\prob_1,\dots, \prob_n\right)$ (i.e. evaluating the original lineage polynomial over the probability values) is a constant factor approximation.  For example (with $\bound = 1$ for all variables but $X$ to aid in presentation), using $\query_1^2$ from above and $\prob_A$ to denote $\probOf\pbox{A = 1}$, we can see that
-First, note that we have some cancellations to deal with:
+For an upper bound on approximating the expected count, it is easy to check that if all the probabilties are constant then $\poly\left(\prob_1,\dots, \prob_n\right)$ (i.e. evaluating the original lineage polynomial over the probability values) is a constant factor approximation.  This is illustrated in the following example using $\query_1^2$ from earlier.  To aid in presentation we assume $\bound = 2$ for variable $X$ and $\bound = 1$ for all other variables.  Let $\prob_A$ denote $\probOf\pbox{A = 1}$.
+In computing $\rpoly$, we have some cancellations to deal with:
 
 \begin{footnotesize}
 \begin{align*}
-\smbOf{\refpoly{1, }^2\inparen{\vct{X}}} &= A^2\inparen{X_1^2 + 4X_1X_2 + 4X_2^2}B^2 + B^2Y^2E^2 + B^2Z^2C^2 + 2AX_1B^2YE \\
+\refpoly{1, }^2\inparen{\vct{X}} &= A^2\inparen{X_1^2 + 4X_1X_2 + 4X_2^2}B^2 + B^2Y^2E^2 + B^2Z^2C^2 + 2AX_1B^2YE \\
 &\qquad+ 2AX_2B^2YE + 2AX_1B^2ZC + 2AX_2B^2ZC + 2B^2YEZC\\
-	&= A^2\inparen{X_1^2 + 4X_2^2}B^2 +\ldots+2B^2YEZC\\
-	&= A^2X_1^2B^2 + 4A^2X_2^2B^2 + B^2Y^2E^2 + B^2Z^2C^2 + 2AX_1B^2YE + 2AX_2B^2YE   \\
-	&\qquad+ 2AX_1B^2ZC + 2AX_2B^2ZC + 2B^2YEZC.\\
+%	&= A^2\inparen{X_1^2 + 4X_2^2}B^2 +\ldots+2B^2YEZC\\
+%	&= A^2X_1^2B^2 + 4A^2X_2^2B^2 + B^2Y^2E^2 + B^2Z^2C^2 + 2AX_1B^2YE + 2AX_2B^2YE   \\
+%	&\qquad+ 2AX_1B^2ZC + 2AX_2B^2ZC + 2B^2YEZC.\\
+\end{align*}
+\end{footnotesize}
+This then implies 
+\begin{footnotesize}
+\begin{align*}
+\rpoly\inparen{\vct{X}} &= AX_1B+AX_2B+BYE+BZC+2AX_1BYE+2AX_2BYE+2AX_1BZC\\
+&\qquad+2AX_2BZC+2BYEZC\\
 \end{align*}
 \end{footnotesize}
 Substituting $\vct{\prob}$ for $\vct{X}$,
 \begin{footnotesize}
 \begin{align*}
 \hspace*{-3mm}
-	\smbOf{\refpoly{1, }^2\inparen{\probAllTup}} &= \prob_A^2\prob_{X_1}^2\prob_B^2 + 4\prob_A^2\prob_{X_2}^2\prob_B^2 + \prob_B^2\prob_Y^2\prob_E^2 + \prob_B^2\prob_Z^2\prob_C^2 + 2\prob_A\prob_{X_1}\prob_B^2\prob_Y\prob_E + 2\prob_A\prob_{X_2}\prob_B^2\prob_Y\prob_E\\
+	\refpoly{1, }^2\inparen{\probAllTup} &= \prob_A^2\prob_{X_1}^2\prob_B^2 + 4\prob_A^2\prob_{X_1}\prob_{X_2}\prob_B^2 + 4\prob_A^2\prob_{X_2}^2\prob_B^2 + \prob_B^2\prob_Y^2\prob_E^2 + \prob_B^2\prob_Z^2\prob_C^2 + 2\prob_A\prob_{X_1}\prob_B^2\prob_Y\prob_E + 2\prob_A\prob_{X_2}\prob_B^2\prob_Y\prob_E\\
 	&\qquad+ 2\prob_A\prob_{X_1}\prob_B^2\prob_Z\prob_C + 2\prob_A\prob_{X_2}\prob_B^2\prob_Z\prob_C+ 2\prob_B^2\prob_Y\prob_E\prob_Z\prob_C\\
-	&\leq\prob_A\prob_{X_1}\prob_B + 4\prob_A\prob_{X_2}\prob_b + \prob_B\prob_Y\prob_E + \prob_B\prob_Z\prob_C + 2\prob_A\prob_{X_1}\prob_B\prob_Y\prob_E+ 2\prob_A\prob_{X_2}\prob_B\prob_Y\prob_E \\
+	&\leq\prob_A\prob_{X_1}\prob_B + 4\prob_A\prob_{X_1}\prob_{X_2}\prob_B + 4\prob_A\prob_{X_2}\prob_b + \prob_B\prob_Y\prob_E + \prob_B\prob_Z\prob_C + 2\prob_A\prob_{X_1}\prob_B\prob_Y\prob_E+ 2\prob_A\prob_{X_2}\prob_B\prob_Y\prob_E \\
 	&\qquad+ 2\prob_A\prob_{X_1}\prob_B\prob_Z\prob_C + 2\prob_A\prob_{X_2}\prob_B\prob_Z\prob_C + 2\prob_B\prob_Y\prob_E\prob_Z\prob_C
-	= \rpoly_1^2\inparen{\vct{p}}.
+	= \rpoly_1^2\inparen{\vct{p}} + 4\prob_A\prob_{X_1}\prob_{X_2}\prob_B.
 	 %\inparen{0.9\cdot 1.0\cdot 1.0 + 0.5\cdot 1.0\cdot 1.0 + 0.5\cdot 1.0\cdot 0.5}^2 = 2.7225 < 3.45 = \rpoly^2\inparen{\probAllTup}
 \end{align*}
 \end{footnotesize}
-If we assume that all seven probability values are at least $p_0>0$,
+If we assume that all probability values are at least $p_0>0$, then given access to $\refpoly{1, }^2\inparen{\vct{\prob}} - 4\prob_A^2\prob_{X_1}\prob_{X_2}\prob_B^2$
 %Choose the least factor that is reduced in $\rpoly^2\inparen{\vct{X}}$, in this case $\prob_A\prob_X\prob_B$, and
-we get that $\poly_1^2\inparen{\vct{\prob}}$ is in the range $\left(\inparen{p_0}^3\cdot\rpoly^2_1\inparen{\vct{\prob}}, \rpoly_1^2\inparen{\vct{\prob}}\right]$, which is \emph{not a tight approximation}.
-%
+we get that $\refpoly{1, }^2\inparen{\vct{\prob}}$ is in the range $\left(\inparen{p_0}^3\cdot\rpoly^2_1\inparen{\vct{\prob}}, \rpoly_1^2\inparen{\vct{\prob}}\right]$.  We can simulate sampling from $\refpoly{1, }\inparen{\vct{X}}$ by sampling monomials from $\refpoly{1, }^2$ while ignoring any samples $A^2X_1X_2B^2$.  Note however, that this is \emph{not a tight approximation}.  
 %To get an $(1\pm \epsilon)$-multiplicative approximation we uniformly sample monomials from the \abbrSMB representation of $\poly$ and `adjust' their contribution to $\widetilde{\poly}\left(\cdot\right)$.
 In~\cref{sec:algo} we demonstrate that a $(1\pm\epsilon)$ (multiplicative) approximation with competitive performance is achievable.
 To get an $(1\pm \epsilon)$-multiplicative approximation and solve~\Cref{prob:intro-stmt}, using \circuit we uniformly sample monomials from the equivalent \abbrSMB representation of $\poly$ (without materializing the \abbrSMB representation) and `adjust' their contribution to $\widetilde{\poly}\left(\cdot\right)$.