95 lines
4.6 KiB
TeX
95 lines
4.6 KiB
TeX
% -*- root: main.tex -*-
|
|
\section{Analysis}
|
|
\label{sec:analysis}
|
|
We begin the analysis by showing that with high probability an estimate is approximately $\numWorldsP$, where $p$ is the probability measure for a given TIPD. Note that $$\numWorldsP = \numWorldsSum.$$
|
|
|
|
The first step is to show that the expectation of the estimate of a tuple t's membership across all worlds is $\numWorldsSum$.
|
|
|
|
\begin{align}
|
|
&\expect{\estimate}\\
|
|
=&\expect{\estExpOne}\\
|
|
=&\expect{\sum_{\substack{j \in [B],\\
|
|
\wVec \in \pw~|~ \sketchHash{i}[\wVec] = j,\\
|
|
\wVec[w']\in \pw~|~ \sketchHash{i}[\wVec[w']] = j} } v_t[\wVec] \cdot s_i[\wVec] \cdot s_i[\wVec[w']]}\\
|
|
=&\multLineExpect\big[\sum_{\substack{j \in [B],\\
|
|
\wVec~|~\sketchHashParam{\wVec}= j,\\
|
|
\wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\
|
|
\wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime} + \nonumber \\
|
|
&\phantom{{}\kMapParam{\wVec}}\sum_{\substack{j \in [B], \\
|
|
\wVec~|~\sketchHashParam{\wVec} = j,\\
|
|
\wVecPrime ~|~ \sketchHashParam{\wVecPrime} = j,\\ \wVec \neq \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot\sketchPolarParam{\wVecPrime}\big]\textit{(by linearity of expectation)}\\
|
|
=&\expect{\sum_{\substack{j \in [B],\\
|
|
\wVec~|~\sketchHashParam{\wVec}= j,\\
|
|
\wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\
|
|
\wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime}} \nonumber \\
|
|
&\phantom{{}\big[}\textit{(by uniform distribution in the second summation)}\\
|
|
=& \sum_{\substack{j \in [B],\\
|
|
\wVec~|~\sketchHashParam{\wVec}= j,\\}} \kMapParam{\wVec}
|
|
\end{align}
|
|
|
|
For the next step, we show that the variance of an estimate is small.$$\varParam{\estimate}$$
|
|
|
|
\begin{align}
|
|
&=\varParam{\estExpOne}\\
|
|
&= \expect{\big(\estTwo\big)^2}\\
|
|
&=\expect{\sum_{\substack{
|
|
\wVec_1, \wVec_2,\\
|
|
\wVecPrime_1, \wVecPrime_2 \in \pw,\\
|
|
\sketchHashParam{\wVec_1} = \sketchHashParam{\wVecPrime_1},\\
|
|
\sketchHashParam{\wVec_2} = \sketchHashParam{\wVecPrime_2}
|
|
}}\kMapParam{\wVec_1} \cdot \kMapParam{\wVec_2}\cdot\sketchPolarParam{\wVec_1}\cdot\sketchPolarParam{\wVec_2}\cdot\sketchPolarParam{\wVecPrime_1}\cdot\sketchPolarParam{\wVecPrime_2} }\label{eq:var-sum-w}
|
|
\end{align}
|
|
|
|
Note that four-wise independence is assumed across all four random variables of \eqref{eq:var-sum-w}. Zooming in on the inner products of the $\sketchPolar$ functions,
|
|
\begin{equation}
|
|
\polarProdEq \label{eq:polar-product}
|
|
\end{equation}
|
|
note that all four random variables in \eqref{eq:polar-product} take their values from the same set of possible worlds $\pw$. Thus, there are four possible patterns of distribution between the $\wVec$ variables, namely:
|
|
\begin{align*}
|
|
&\distPattern{1}:&\cOne\\
|
|
&\distPattern{2}:&\cTwo \textit{*} \\
|
|
&\distPattern{3}:&\cThree \textit{*} \\
|
|
&\distPattern{4}:&\cFour \textit{*}\\
|
|
&\distPattern{5}:&\cFive
|
|
\end{align*}
|
|
$$\text{ }^*\textit{(and all variants of the respective pattern)}$$
|
|
|
|
We are interested in those particular cases whose expecation does not equal zero, since an expectation of zero will not add to the summation of \eqref{eq:var-sum-w}. In expectation we have that
|
|
\begin{align}
|
|
&\expect{\sum_{\substack{\elems \\
|
|
\st \cOne}} \polarProdEq} = 1 \label{eq:polar-prod-all}\\
|
|
&\expect{\sum_{\substack{\elems \\
|
|
\st \cTwo}} \polarProdEq} = 1 \label{eq:polar-prod-two-and-two}\\
|
|
&\expect{\sum_{\substack{\elems \\
|
|
\st \cThree}} \polarProdEq} = 0 \nonumber \\
|
|
&\expect{\sum_{\substack{\elems \\
|
|
\st \cFour}} \polarProdEq} = 0 \nonumber \\
|
|
&\expect{\sum_{\substack{\elems \\
|
|
\st \cFive}} \polarProdEq} = 0 \nonumber
|
|
\end{align}
|
|
|
|
Only equation \eqref{eq:polar-prod-all} (which maps to $\cOne$) and \eqref{eq:polar-prod-two-and-two} (mapping to $\cTwo$) affect the $\var$ computation.
|
|
|
|
Thus, when considering $\distPattern{1}$ the variance results in
|
|
\begin{equation}
|
|
\sum_{\wVec \in \pw} \kMapParam{\wVec}^2
|
|
\end{equation}
|
|
|
|
For the distribution pattern $\cTwo$, we have three variants to consider.
|
|
\begin{align*}
|
|
&\vCase{1}:&\cTwo \\
|
|
&\vCase{2}:&\cTwoV{\wOne}{\wTwo}{\wOneP}{\wTwoP}\\
|
|
&\vCase{3}:&\cTwoV{\wOne}{\wTwoP}{\wOneP}{\wTwo}
|
|
\end{align*}
|
|
When considered separately, the variants have the following $\var$.
|
|
\begin{align}
|
|
\cTwo&=\sum_{\wOne \neq \wTwo}\kMapParam{\wOne} \cdot \kMapParam{\wTwo}\\
|
|
\cTwoV{\wOne}{\wTwo}{\wOneP}{\wTwoP}&=\sum_{\substack{\wOne \neq \wOneP,\\
|
|
\wOne = \wTwo,\\
|
|
\sketchHashParam{\wOne} = \sketchHashParam{\wOneP}}} \big| \sketchHashParam{\wOne}\neq \sketchHashParam{\wOneP} \big|\cdot \kMapParam{\wOne}\cdot \kMapParam{\wTwo}\\
|
|
\cTwoV{\wOne}{\wTwoP}{\wOneP}{\wTwo}&=\sum_{\wOne \neq \wTwo} \kMapParam{\wOne} \cdot \kMapParam{\wTwo}
|
|
\end{align}
|
|
|
|
|
|
|