From 49f4560e812ec06004bbad262a3ba50beb62c1db Mon Sep 17 00:00:00 2001 From: Aaron Huber Date: Sat, 6 Jul 2019 15:07:20 -0400 Subject: [PATCH] Changes per Atri's suggestions: Sec. 2-expectation --- analysis.tex | 81 ++++++++++++++++++++++++++++++++++++++-------------- macros.tex | 1 + 2 files changed, 60 insertions(+), 22 deletions(-) diff --git a/analysis.tex b/analysis.tex index 6747ba6..fa7fe92 100644 --- a/analysis.tex +++ b/analysis.tex @@ -1,36 +1,73 @@ % -*- root: main.tex -*- \section{Analysis} \label{sec:analysis} -We begin the analysis by showing that with high probability an estimate is approximately $\numWorldsP$, where $p$ is the probability measure for a given TIPD. Note that +We begin the analysis by showing that with high probability an estimate is approximately $\numWorldsP$, where $p$ is a tuple's probability measure for a given TIPD. Note that \begin{equation} \numWorldsP = \numWorldsSum\label{eq:mu}. \end{equation} -The first step is to show that the expectation of the estimate of a tuple t's membership across all worlds is $\numWorldsSum$. +We begin by making the claim that the expectation of the estimate of a tuple t's membership across all worlds is $\numWorldsSum$, formally +\begin{equation} +\expect{\sum_{\wVec \in \pw} \sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}} = \sum_{\wVec \in \pw}\kMapParam{\wVec}\label{eq:allWorlds-est}. +\end{equation} +To verify this claim, we argue that the expectation of the estimate of a tuple's appearance in single world is it's annotation, i.e. +\begin{equation} +\expect{\sketchJParam{\sketchHashParam{\wVec}}\cdot \sketchPolarParam{\wVec}} = \kMapParam{\wVec} \label{eq:single-est}. +\end{equation} \AR{While the analysis below is correct, the way it is stated it seems to `come out of the blue.' I would recommend that you re-structure the argument below as follows. First argue that $\expect{\sketch[i][\sketchHash[\wVec]]\cdot s_i[\wVec]}=v_t[\wVec]$. From this the claim below just follows by linearity of expectation but this result is a good thing for the reader to realize. Also instead of summing over $j\in [B],\wVec|h_i[\wVec]=j,\wVec'|h_i[\wVec']=j$ it would be better to just write it as sum over all $\wVec,\wVec'\in W\text{ s.t. }h_i[\wVec]=h_i[\wVec']$-- the latter is bit more compact and it is easier to comprehend as well.} -\begin{align} -&\expect{\estimate}\\ -=&\expect{\estExpOne}\\ -=&\expect{\sum_{\substack{j \in [B],\\ - \wVec \in \pw~|~ \sketchHash{i}[\wVec] = j,\\ - \wVec[w']\in \pw~|~ \sketchHash{i}[\wVec[w']] = j} } v_t[\wVec] \cdot s_i[\wVec] \cdot s_i[\wVec[w']]}\\ -=&\multLineExpect\big[\sum_{\substack{j \in [B],\\ - \wVec~|~\sketchHashParam{\wVec}= j,\\ - \wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\ - \wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime} + \nonumber \\ -&\phantom{{}\kMapParam{\wVec}}\sum_{\substack{j \in [B], \\ - \wVec~|~\sketchHashParam{\wVec} = j,\\ - \wVecPrime ~|~ \sketchHashParam{\wVecPrime} = j,\\ \wVec \neq \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot\sketchPolarParam{\wVecPrime}\big]\textit{(by linearity of expectation)}\\ -=&\expect{\sum_{\substack{j \in [B],\\ - \wVec~|~\sketchHashParam{\wVec}= j,\\ - \wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\ - \wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime}} \nonumber \\ -&\phantom{{}\big[}\textit{(by uniform distribution in the second summation)}\\ -=& \estExp \label{eq:estExpect} -\end{align} +\AH{Proof changed as suggested above. I aired on the verbose side for the sake of clarity.} +For a given $\wVec \in \pw$, substituting definitions we have +\begin{align*} +&\expect{\sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}} = \nonumber\\ +&\phantom{{}\sketchJParam{\sketchHashParam{\wVec}}}\expect{\big(\sum_{\substack{\wVecPrime \in \pw \st \\ + \sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\kMapParam{\wVecPrime} \cdot \sketchPolarParam{\wVecPrime}\big) \cdot \sketchPolarParam{\wVec} }. +\end{align*} +Since $\wVec \in \pw$, we know that for $j \in [|\pw|], \exists \wVecPrime_j \st \wVecPrime_j = \wVec$. This yields +\begin{multline*} +\mathbb{E}\big[\kMapParam{\wVecPrime_0}\cdot \sketchPolarParam{\wVecPrime_0} + \cdots \\ ++\kMapParam{\wVecPrime_j}\cdot \sketchPolarParam{\wVecPrime_j}\cdot \sketchPolarParam{\wVecPrime_j}+ \cdots \\ ++ \kMapParam{\wVecPrime_n}\sketchPolarParam{\wVecPrime_n}\big] +\end{multline*} +Due to the uniformity of $\sketchPolar$, we have +\begin{equation*} += \kMapParam{\wVec}, +\end{equation*} +thus verifying \eqref{eq:single-est}. + +We can now take \eqref{eq:single-est}, substitute it in for \eqref{eq:allWorlds-est} and show by linearity of expectation that \eqref{eq:allWorlds-est} holds. +\begin{align*} +&\expect{\sum_{\wVec \in \pw} \sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}} \\ +&= \expect{\sum_{\wVecPrime \in \pw}\kMapParam{\wVecPrime} \cdot \sketchPolarParam{\wVecPrime} \cdot \sum_{\substack{\wVec \in \pw \st \\ +\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\sketchPolarParam{\wVec}}\\ +&= \sum_{\wVec \in \pw} \expect{\left( \sum_{\substack{\wVecPrime \in \pw \st \\ +\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\kMapParam{\wVecPrime}\cdot\sketchPolarParam{\wVecPrime}\right) \cdot \sketchPolarParam{\wVec}}\\ +&= \sum_{\wVec \in \pw}\kMapParam{\wVec}. +\end{align*} + +%\begin{align} +%&\expect{\estimate}\\ +%=&\expect{\estExpOne}\\ +%=&\expect{\sum_{\substack{j \in [B],\\ +% \wVec \in \pw~|~ \sketchHash{i}[\wVec] = j,\\ +% \wVec[w']\in \pw~|~ \sketchHash{i}[\wVec[w']] = j} } v_t[\wVec] \cdot s_i[\wVec] \cdot s_i[\wVec[w']]}\\ +%=&\multLineExpect\big[\sum_{\substack{j \in [B],\\ +% \wVec~|~\sketchHashParam{\wVec}= j,\\ +% \wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\ +% \wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime} + \nonumber \\ +%&\phantom{{}\kMapParam{\wVec}}\sum_{\substack{j \in [B], \\ +% \wVec~|~\sketchHashParam{\wVec} = j,\\ +% \wVecPrime ~|~ \sketchHashParam{\wVecPrime} = j,\\ \wVec \neq \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot\sketchPolarParam{\wVecPrime}\big]\textit{(by linearity of expectation)}\\ +%=&\expect{\sum_{\substack{j \in [B],\\ +% \wVec~|~\sketchHashParam{\wVec}= j,\\ +% \wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\ +% \wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime}} \nonumber \\ +%&\phantom{{}\big[}\textit{(by uniform distribution in the second summation)}\\ +%=& \estExp \label{eq:estExpect} +%\end{align} \AR{A general comment: The last display equation should have a period at the end. The idea is that display equations are considered part of a sentence and every sentence should end with a period.} +\AH{Thank you for clarifying this, as I have always wondered what the convention was for display equations. Hopefully, I haven't missed any end display equations, and have them all fixed properly.} For the next step, we show that the variance of an estimate is small.$$\varParam{\estimate}$$ diff --git a/macros.tex b/macros.tex index bbcc646..409187c 100644 --- a/macros.tex +++ b/macros.tex @@ -7,6 +7,7 @@ % \newcommand{\sketch}{\mathcal{S}} \newcommand{\sketchIj}{\sketch[i][j]} +\newcommand{\sketchJParam}[1]{\sketch\paramBox{i}\paramBox{#1}} \newcommand{\sketchCols}{B} \newcommand{\sketchRows}{M} \newcommand{\sketchHash}[1][i]{h_{#1}}