Changes per Atri's suggestions: Sec. 2-expectation

This commit is contained in:
Aaron Huber 2019-07-06 15:07:20 -04:00
parent 67888e23f7
commit 49f4560e81
2 changed files with 60 additions and 22 deletions

View file

@ -1,36 +1,73 @@
% -*- root: main.tex -*-
\section{Analysis}
\label{sec:analysis}
We begin the analysis by showing that with high probability an estimate is approximately $\numWorldsP$, where $p$ is the probability measure for a given TIPD. Note that
We begin the analysis by showing that with high probability an estimate is approximately $\numWorldsP$, where $p$ is a tuple's probability measure for a given TIPD. Note that
\begin{equation}
\numWorldsP = \numWorldsSum\label{eq:mu}.
\end{equation}
The first step is to show that the expectation of the estimate of a tuple t's membership across all worlds is $\numWorldsSum$.
We begin by making the claim that the expectation of the estimate of a tuple t's membership across all worlds is $\numWorldsSum$, formally
\begin{equation}
\expect{\sum_{\wVec \in \pw} \sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}} = \sum_{\wVec \in \pw}\kMapParam{\wVec}\label{eq:allWorlds-est}.
\end{equation}
To verify this claim, we argue that the expectation of the estimate of a tuple's appearance in single world is it's annotation, i.e.
\begin{equation}
\expect{\sketchJParam{\sketchHashParam{\wVec}}\cdot \sketchPolarParam{\wVec}} = \kMapParam{\wVec} \label{eq:single-est}.
\end{equation}
\AR{While the analysis below is correct, the way it is stated it seems to `come out of the blue.' I would recommend that you re-structure the argument below as follows. First argue that $\expect{\sketch[i][\sketchHash[\wVec]]\cdot s_i[\wVec]}=v_t[\wVec]$. From this the claim below just follows by linearity of expectation but this result is a good thing for the reader to realize. Also instead of summing over $j\in [B],\wVec|h_i[\wVec]=j,\wVec'|h_i[\wVec']=j$ it would be better to just write it as sum over all $\wVec,\wVec'\in W\text{ s.t. }h_i[\wVec]=h_i[\wVec']$-- the latter is bit more compact and it is easier to comprehend as well.}
\begin{align}
&\expect{\estimate}\\
=&\expect{\estExpOne}\\
=&\expect{\sum_{\substack{j \in [B],\\
\wVec \in \pw~|~ \sketchHash{i}[\wVec] = j,\\
\wVec[w']\in \pw~|~ \sketchHash{i}[\wVec[w']] = j} } v_t[\wVec] \cdot s_i[\wVec] \cdot s_i[\wVec[w']]}\\
=&\multLineExpect\big[\sum_{\substack{j \in [B],\\
\wVec~|~\sketchHashParam{\wVec}= j,\\
\wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\
\wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime} + \nonumber \\
&\phantom{{}\kMapParam{\wVec}}\sum_{\substack{j \in [B], \\
\wVec~|~\sketchHashParam{\wVec} = j,\\
\wVecPrime ~|~ \sketchHashParam{\wVecPrime} = j,\\ \wVec \neq \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot\sketchPolarParam{\wVecPrime}\big]\textit{(by linearity of expectation)}\\
=&\expect{\sum_{\substack{j \in [B],\\
\wVec~|~\sketchHashParam{\wVec}= j,\\
\wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\
\wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime}} \nonumber \\
&\phantom{{}\big[}\textit{(by uniform distribution in the second summation)}\\
=& \estExp \label{eq:estExpect}
\end{align}
\AH{Proof changed as suggested above. I aired on the verbose side for the sake of clarity.}
For a given $\wVec \in \pw$, substituting definitions we have
\begin{align*}
&\expect{\sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}} = \nonumber\\
&\phantom{{}\sketchJParam{\sketchHashParam{\wVec}}}\expect{\big(\sum_{\substack{\wVecPrime \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\kMapParam{\wVecPrime} \cdot \sketchPolarParam{\wVecPrime}\big) \cdot \sketchPolarParam{\wVec} }.
\end{align*}
Since $\wVec \in \pw$, we know that for $j \in [|\pw|], \exists \wVecPrime_j \st \wVecPrime_j = \wVec$. This yields
\begin{multline*}
\mathbb{E}\big[\kMapParam{\wVecPrime_0}\cdot \sketchPolarParam{\wVecPrime_0} + \cdots \\
+\kMapParam{\wVecPrime_j}\cdot \sketchPolarParam{\wVecPrime_j}\cdot \sketchPolarParam{\wVecPrime_j}+ \cdots \\
+ \kMapParam{\wVecPrime_n}\sketchPolarParam{\wVecPrime_n}\big]
\end{multline*}
Due to the uniformity of $\sketchPolar$, we have
\begin{equation*}
= \kMapParam{\wVec},
\end{equation*}
thus verifying \eqref{eq:single-est}.
We can now take \eqref{eq:single-est}, substitute it in for \eqref{eq:allWorlds-est} and show by linearity of expectation that \eqref{eq:allWorlds-est} holds.
\begin{align*}
&\expect{\sum_{\wVec \in \pw} \sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}} \\
&= \expect{\sum_{\wVecPrime \in \pw}\kMapParam{\wVecPrime} \cdot \sketchPolarParam{\wVecPrime} \cdot \sum_{\substack{\wVec \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\sketchPolarParam{\wVec}}\\
&= \sum_{\wVec \in \pw} \expect{\left( \sum_{\substack{\wVecPrime \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\kMapParam{\wVecPrime}\cdot\sketchPolarParam{\wVecPrime}\right) \cdot \sketchPolarParam{\wVec}}\\
&= \sum_{\wVec \in \pw}\kMapParam{\wVec}.
\end{align*}
%\begin{align}
%&\expect{\estimate}\\
%=&\expect{\estExpOne}\\
%=&\expect{\sum_{\substack{j \in [B],\\
% \wVec \in \pw~|~ \sketchHash{i}[\wVec] = j,\\
% \wVec[w']\in \pw~|~ \sketchHash{i}[\wVec[w']] = j} } v_t[\wVec] \cdot s_i[\wVec] \cdot s_i[\wVec[w']]}\\
%=&\multLineExpect\big[\sum_{\substack{j \in [B],\\
% \wVec~|~\sketchHashParam{\wVec}= j,\\
% \wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\
% \wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime} + \nonumber \\
%&\phantom{{}\kMapParam{\wVec}}\sum_{\substack{j \in [B], \\
% \wVec~|~\sketchHashParam{\wVec} = j,\\
% \wVecPrime ~|~ \sketchHashParam{\wVecPrime} = j,\\ \wVec \neq \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot\sketchPolarParam{\wVecPrime}\big]\textit{(by linearity of expectation)}\\
%=&\expect{\sum_{\substack{j \in [B],\\
% \wVec~|~\sketchHashParam{\wVec}= j,\\
% \wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\
% \wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime}} \nonumber \\
%&\phantom{{}\big[}\textit{(by uniform distribution in the second summation)}\\
%=& \estExp \label{eq:estExpect}
%\end{align}
\AR{A general comment: The last display equation should have a period at the end. The idea is that display equations are considered part of a sentence and every sentence should end with a period.}
\AH{Thank you for clarifying this, as I have always wondered what the convention was for display equations. Hopefully, I haven't missed any end display equations, and have them all fixed properly.}
For the next step, we show that the variance of an estimate is small.$$\varParam{\estimate}$$

View file

@ -7,6 +7,7 @@
%
\newcommand{\sketch}{\mathcal{S}}
\newcommand{\sketchIj}{\sketch[i][j]}
\newcommand{\sketchJParam}[1]{\sketch\paramBox{i}\paramBox{#1}}
\newcommand{\sketchCols}{B}
\newcommand{\sketchRows}{M}
\newcommand{\sketchHash}[1][i]{h_{#1}}