paper-BagRelationalPDBsAreHard/analysis.tex

% -*- root: main.tex -*-
\section{Analysis}
\label{sec:analysis}
We begin the analysis by showing that with high probability an estimate is approximately $\numWorldsP$, where $p$ is the probability measure for a given TIPD.  Note that $$\numWorldsP = \numWorldsSum.$$

The first step is to show that the expectation of the estimate of a tuple t's membership across all worlds is $\numWorldsSum$.

\begin{align}
&\expect{\estimate}\\
=&\expect{\estExpOne}\\
=&\expect{\sum_{\substack{j \in [B],\\
			 \wVec \in \pw~|~ \sketchHash{i}[\wVec] = j,\\
			 \wVec[w']\in \pw~|~ \sketchHash{i}[\wVec[w']] = j} } v_t[\wVec] \cdot s_i[\wVec] \cdot s_i[\wVec[w']]}\\
=&\multLineExpect\big[\sum_{\substack{j \in [B],\\
				\wVec~|~\sketchHashParam{\wVec}= j,\\
				\wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\
				\wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime} +  \nonumber \\
&\phantom{{}\kMapParam{\wVec}}\sum_{\substack{j \in [B], \\
				\wVec~|~\sketchHashParam{\wVec} = j,\\
				\wVecPrime ~|~ \sketchHashParam{\wVecPrime} = j,\\ \wVec \neq \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot\sketchPolarParam{\wVecPrime}\big]\textit{(by linearity of expectation)}\\
=&\expect{\sum_{\substack{j \in [B],\\
				\wVec~|~\sketchHashParam{\wVec}= j,\\
				\wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\
				\wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime}} \nonumber \\
&\phantom{{}\big[}\textit{(by uniform distribution in the second summation)}\\
=&  \sum_{\substack{j \in [B],\\
				\wVec~|~\sketchHashParam{\wVec}= j,\\}} \kMapParam{\wVec}
\end{align}

For the next step, we show that the variance of an estimate is small.$$\varParam{\estimate}$$

\begin{align}
&=\varParam{\estExpOne}\\
&= \expect{\big(\estTwo\big)^2}\\
&=\expect{\sum_{\substack{
		\wVec_1, \wVec_2,\\
		 \wVecPrime_1, \wVecPrime_2 \in \pw,\\
		 \sketchHashParam{\wVec_1} = \sketchHashParam{\wVecPrime_1},\\
		 \sketchHashParam{\wVec_2} = \sketchHashParam{\wVecPrime_2}
		 }}\kMapParam{\wVec_1} \cdot \kMapParam{\wVec_2}\cdot\sketchPolarParam{\wVec_1}\cdot\sketchPolarParam{\wVec_2}\cdot\sketchPolarParam{\wVecPrime_1}\cdot\sketchPolarParam{\wVecPrime_2} }\label{eq:var-sum-w}
\end{align}

Note that four-wise independence is assumed across all four random variables of \eqref{eq:var-sum-w}.  Zooming in on the inner products of the $\sketchPolar$ functions,
\begin{equation}
\polarProdEq \label{eq:polar-product}
\end{equation}
note that all four random variables in \eqref{eq:polar-product} take their values from the same set of possible worlds $\pw$.  Thus, there are four possible patterns of distribution between the $\wVec$ variables, namely:
\begin{align*}
&\distPattern{1}:&\cOne\\
&\distPattern{2}:&\cTwo \textit{*} \\
&\distPattern{3}:&\cThree \textit{*} \\
&\distPattern{4}:&\cFour \textit{*}\\
&\distPattern{5}:&\cFive
\end{align*}
$$\text{ }^*\textit{(and all variants of the respective pattern)}$$

We are interested in those particular cases whose expecation does not equal zero, since an expectation of zero will not add to the summation of \eqref{eq:var-sum-w}.  In expectation we have that
\begin{align}
&\expect{\sum_{\substack{\elems \\
			\st \cOne}} \polarProdEq} = 1 \label{eq:polar-prod-all}\\
&\expect{\sum_{\substack{\elems \\
			\st \cTwo}} \polarProdEq} = 1 \label{eq:polar-prod-two-and-two}\\
&\expect{\sum_{\substack{\elems \\
			\st \cThree}} \polarProdEq} = 0 \nonumber \\
&\expect{\sum_{\substack{\elems \\
			\st \cFour}} \polarProdEq} = 0 \nonumber \\
&\expect{\sum_{\substack{\elems \\
			\st \cFive}} \polarProdEq} = 0 \nonumber
\end{align}

Only equation \eqref{eq:polar-prod-all} (which maps to $\cOne$) and \eqref{eq:polar-prod-two-and-two} (mapping to $\cTwo$) affect the $\var$ computation.

Thus, when considering $\distPattern{1}$ the variance results in
\begin{equation}
\sum_{\wVec \in \pw} \kMapParam{\wVec}^2
\end{equation}

For the distribution pattern $\cTwo$, we have three variants to consider.
\begin{align*}
&\vCase{1}:&\cTwo \\
&\vCase{2}:&\cTwoV{\wOne}{\wTwo}{\wOneP}{\wTwoP}\\
&\vCase{3}:&\cTwoV{\wOne}{\wTwoP}{\wOneP}{\wTwo}
\end{align*}
When considered separately, the variants have the following $\var$.
\begin{align}
\cTwo&=\sum_{\wOne \neq \wTwo}\kMapParam{\wOne} \cdot \kMapParam{\wTwo}\\
\cTwoV{\wOne}{\wTwo}{\wOneP}{\wTwoP}&=\sum_{\substack{\wOne \neq \wOneP,\\
											\wOne = \wTwo,\\
											\sketchHashParam{\wOne} = \sketchHashParam{\wOneP}}} \big| \sketchHashParam{\wOne}\neq \sketchHashParam{\wOneP} \big|\cdot \kMapParam{\wOne}\cdot \kMapParam{\wTwo}\\
\cTwoV{\wOne}{\wTwoP}{\wOneP}{\wTwo}&=\sum_{\wOne \neq \wTwo} \kMapParam{\wOne} \cdot \kMapParam{\wTwo}
\end{align}