Started changes to Lemma 2, still working out a few bugs.

master
Aaron Huber 2020-04-02 18:22:34 -04:00
parent ecddb5c470
commit 2b4920d035
4 changed files with 216 additions and 148 deletions

View File

@ -5,32 +5,38 @@
%
%SKETCH
%
\newcommand{\hfunc}{h}
\newcommand{\vect}{v}
\newcommand{\wElem}{w}
\newcommand{\wSet}{W}
\newcommand{\sine}{s}
\newcommand{\hfunc}{h}
\newcommand{\est}{est}
\newcommand{\conj}[1]{\overline{#1}}
\newcommand{\ind}[1]{\underset{#1}{\mathbbold{1}}}
\newcommand{\had}{\circ}
\newcommand{\pos}{POS}
\newcommand{\sketchCols}{B}
\newcommand{\sketchRows}{M}
\newcommand{\buck}{j}
%
%many of these are outdated and need to be cleaned up
%
\newcommand{\cvar}[1]{\lambda\left({#1}\right)}
\newcommand{\ind}[1]{\underset{#1}{\mathbbold{1}}}%_{#1}}
\newcommand{\sk}{\mathcal{S}}
\newcommand{\sketch}{\mathcal{S}_t}
\newcommand{\sketchIj}{\sketch[i][j]}
\newcommand{\sketchJParam}[1]{\sketch\paramBox{i}\paramBox{#1}}
\newcommand{\sCom}[2]{\mathcal{S}_{#1}\paramBox{i}\paramBox{#2}}
\newcommand{\sketchCols}{B}
\newcommand{\sketchRows}{M}
\newcommand{\hash}[1][i]{h_{#1}}
\newcommand{\hashP}[1]{\hash\paramBox{#1}}
\newcommand{\pol}[1][i]{s_{#1}}
\newcommand{\polP}[1]{\pol\paramBox{#1}}
\newcommand{\polI}[2]{s_{#1}\paramBox{#2}}
\newcommand{\gIJ}{\gamma\paramBox{i}\paramBox{j}}
\newcommand{\buck}{j}
\newcommand{\kvec}{k}
\newcommand{\vect}{v}
\newcommand{\had}{\circ}
\newcommand{\pos}{POS}
\newcommand{\jVec}{\textbf{j}}
\newcommand{\lenB}{b}
\newcommand{\hVec}{\textbf{h}_{i,k}}
@ -47,13 +53,31 @@
\end{pmatrix}}
\newcommand{\jpbit}[1]{\buck^{(#1)}}
\newcommand{\polSum}{Bias(j, \hash, \pol)}
%
%functions
%
\newcommand{\match}[2]{#1 \simeq #2}
\newcommand{\dist}{m}
\newcommand{\dupSize}{j}
%
%number of joins/products
%
\newcommand{\prodsize}{k}
%
%terms
%
\newcommand{\term}{T}
%
%TIDB
%
%
%many of these are outdated and need to be cleaned up
%
\newcommand{\paramBox}[1]{\left({#1}\right)}
\newcommand{\bigParamBox}[1]{\big[{#1}\big]}
\newcommand{\st}{~|~}
@ -77,6 +101,10 @@
%maybe easier this way:
%WVector Notation
%%%%%%%%%%%%%%%%
%
%many of these are outdated and need to be cleaned up
%
\newcommand{\w}{\wVec}
\newcommand{\wa}{\wVec_a}
\newcommand{\wb}{\wVec_b}
@ -91,6 +119,10 @@
%%%%%%%%%%%%%%%%
%4-way cases
%%%%%%%%%%%%%%%%
%
%many of these are outdated and need to be cleaned up
%
\newcommand{\polarProdNEq}{\polP{\wOne}\cdot\polP{\wOneP}\cdot\polP{\wTwo}\cdot\polP{\wTwoP}}%
\newcommand{\polarProdEq}{\polP{\wa}\cdot\polP{\wb}\cdot\polP{\wc}\cdot\polP{\wVecD}}%
\newcommand{\elems}{\wa, \wb, \wc, \wVecD}
@ -125,6 +157,10 @@
%%%%%%%%%%%%%%%%%
%Chebyshev
%%%%%%%%%%%%%%%%%
%
%many of these are outdated and need to be cleaned up
%
\newcommand{\pr}[2]{Pr\big[|X - \mu| > {#1}\big] < {#2}}
\newcommand{\cheby}{\pr{\Delta}{\frac{1}{3}}}
\newcommand{\chebyK}{\pr{k\sd}{\frac{2}{\prob\errB^2\sketchCols}}}
@ -132,6 +168,10 @@
%%%%%%%%%%%%%%%%%
% Equations
%%%%%%%%%%%%%%%%%
%
%many of these are outdated and need to be cleaned up
%
\newcommand{\polarFuncSum}[1][]{\sum_{\substack{\wVecPrime ~|~ \\
\hash\left[\wVecPrime\right] = j\\
{#1}}}\polP{\wVecPrime}}
@ -159,10 +199,20 @@
\newcommand{\AH}[1]{\todo[inline, backgroundcolor=blue]{\textbf{Aaron says:$\,$} #1}}
\newcommand{\SR}[1]{\todo[inline, backgroundcolor=white]{\textbf{Note to self:$\,$} #1}}
\newcommand{\AR}[1]{\todo[inline, color=green]{\textbf{Atri says:$\,$} #1}}
%
%many of these are outdated and need to be cleaned up
%
\newcommand{\startOld}[1]{\textcolor{purple}{\newline-------------------------\newline\textbf{Old Content:\newline-------------------------\newline} #1}\newline}
\newcommand{\finOld}{\newline\textcolor{purple}{------------------------------\newline\textbf{END} Old Content\newline ------------------------------\newline}}
%\newcommand{\comment}[1]{}
%
%borrowed from Su and Boris
%needs to be cleaned up
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% DETAILED PROOFS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

74
pos.tex
View File

@ -7,54 +7,54 @@ The following property of the sine function $\sine$ is used in $\ex{\pos}$ deriv
\begin{Lemma}\label{lem:exp-sine}
$\forall \wElem \in \wSet$,\newline
$\ex{\sine(\wElem)^i} = \begin{cases}
0 &1 \leq i < k\\
0 &1 \leq i < \prodsize\\
1 &\text{otherwise}.
\end{cases}$
\end{Lemma}
Notice that, $\forall i \in [1, k - 1]$, $\ex{\sine(\wElem)^i} = \frac{\sum\limits_{\omega \in \Omega}\omega^i}{k} = \frac{\sum\limits_{l = 0}^{k - 1}(\omega^i)^l}{k}$. To prove the lemma then, one needs only to prove that $\sum\limits_{l = 0}^{k - 1}\omega^i = \begin{cases}0&1 \leq i < k\\k&\text{otherwise}.\end{cases}$
For the case of $i = k$,
Notice that, $\forall i \in [1, \prodsize - 1]$, $\ex{\sine(\wElem)^i} = \frac{\sum\limits_{\omega \in \Omega}\omega^i}{\prodsize} = \frac{\sum\limits_{l = 0}^{\prodsize - 1}(\omega^i)^l}{\prodsize}$. To prove the lemma then, one needs only to prove that $\sum\limits_{l = 0}^{\prodsize - 1}\omega^i = \begin{cases}0&1 \leq i < \prodsize\\\prodsize&\text{otherwise}.\end{cases}$
For the case of $i = \prodsize$,
\begin{equation}
\frac{\sum\limits_{l = 0}^{k - 1}(\omega^k)^l}{k} = \frac{\sum\limits_{l = 0}^{k - 1}1^l}{k} = \frac{k}{k} = 1.
\frac{\sum\limits_{l = 0}^{\prodsize - 1}(\omega^\prodsize)^l}{\prodsize} = \frac{\sum\limits_{l = 0}^{\prodsize - 1}1^l}{\prodsize} = \frac{\prodsize}{\prodsize} = 1.
\end{equation}
For $i \in [1, k - 1]$, we can show by geometric sum series that
For $i \in [1, \prodsize - 1]$, we can show by geometric sum series that
\begin{equation}
\sum_{l = 0}^{k - 1}(\omega^i)^l = \frac{(\omega^i)^k - 1}{\omega^i - 1} = \frac{1 - 1}{\omega^i - 1} = 0.
\sum_{l = 0}^{\prodsize - 1}(\omega^i)^l = \frac{(\omega^i)^\prodsize - 1}{\omega^i - 1} = \frac{1 - 1}{\omega^i - 1} = 0.
\end{equation}
\qed
We target the specific query where it is optimal to push down projections below join operators. Such a query is a product of sums ($\pos$). To show that our scheme works in this setting, we first compute the expectation of a $\pos$~ query over sketch annotations, i.e. $\pos$ = $\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i \in \kvec'}\sk^{\vect_i}\left[\buck\right]\right) \left(\sum_{i' \in \kvec''}\sk^{\vect_{i'}}\left[\buck\right]\right)$, for the set of matching projected tuples from each input, denoted $k', k''$. Note that we denote the $i^{th}$ vector as $\vect_i$ and the sketch of the $i^{th}$ vector $\sk^{\vect_i}$.
We target the specific query where it is optimal to push down projections below join operators. Such a query is a product of sums ($\pos$). To show that our scheme works in this setting, we first compute the expectation of a $\pos$~ query over sketch annotations, i.e. $\pos$ = $\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i \in \kvec'}\sk^{\vect_i}\left[\buck\right]\right) \left(\sum_{i' \in \kvec''}\sk^{\vect_{i'}}\left[\buck\right]\right)$, for the set of matching projected tuples from each input, denoted $\prodsize', \prodsize''$. Note that we denote the $i^{th}$ vector as $\vect_i$ and the sketch of the $i^{th}$ vector $\sk^{\vect_i}$.
\begin{align}
&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i \in \kvec'}\sk^{\vect_i}\left[\buck\right]\right) \left(\sum_{i' \in \kvec''}\sk^{\vect_{i'}}\left[\buck\right]\right)}\nonumber\\
=&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i \in \kvec'}\sum_{\wElem \in \wSet}\vect_i(\wElem)\ind{\hfunc(\wElem) = \buck}\sine(\wElem)\right) \left(\sum_{i' \in \kvec''}\sum_{\wElem' \in \wSet}\vect_{i'}(\wElem')\ind{\hfunc(\wElem) = \buck}\sine(\wElem')\right)}\label{eq:exp-pos1}\\
=&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem \in \wSet}\ind{\hfunc(\wElem) = \buck}\left(\sum_{i \in \kvec'}\vect_i(\wElem)\right)\sine(\wElem)\right) \left(\sum_{\wElem' \in \wSet}\ind{\hfunc(\wElem') = j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem')\right)\sine(\wElem')\right)}\label{eq:exp-pos2}\\
=&\ex{\sum_{\buck = 1}^{\sketchCols} \left(\sum_{\wElem \in \wSet}\ind{\hfunc(\wElem) = \buck} \left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\sine(\wElem)^{2 = k}\right) + \left(\sum_{\substack{\wElem, \wElem' \in \wSet,\\\wElem \neq \wElem'}}\ind{\hfunc(\wElem) = j}\ind{\hfunc(\wElem') = j}\left(\left(\sum_{i \in k'}\vect_i(\wElem)\right)\sine(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem')\right)\sine(\wElem')\right)}\label{eq:exp-pos3}\\
=& \sum_{\buck = 1}^{\sketchCols}\sum_{\wElem \in \wSet}\ind{\hfunc(\wElem) = \buck}\left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\label{eq:exp-pos4}\\
=& \sum_{\wElem \in \wSet}\left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\label{eq:exp-pos5}
=&\ex{\sum_{\buck = 1}^{\sketchCols} \left(\sum_{\wElem \in \wSet}\ind{\hfunc(\wElem) = \buck} \left(\sum_{i \in \prodsize'}\vect_i(\wElem)\right)\left(\sum_{i' \in \prodsize''}\vect_{i'}(\wElem)\right)\sine(\wElem)^{2 = \prodsize}\right) + \left(\sum_{\substack{\wElem, \wElem' \in \wSet,\\\wElem \neq \wElem'}}\ind{\hfunc(\wElem) = j}\ind{\hfunc(\wElem') = j}\left(\left(\sum_{i \in \prodsize'}\vect_i(\wElem)\right)\sine(\wElem)\right)\left(\sum_{i' \in \prodsize''}\vect_{i'}(\wElem')\right)\sine(\wElem')\right)}\label{eq:exp-pos3}\\
=& \sum_{\buck = 1}^{\sketchCols}\sum_{\wElem \in \wSet}\ind{\hfunc(\wElem) = \buck}\left(\sum_{i \in \prodsize'}\vect_i(\wElem)\right)\left(\sum_{i' \in \prodsize''}\vect_{i'}(\wElem)\right)\label{eq:exp-pos4}\\
=& \sum_{\wElem \in \wSet}\left(\sum_{i \in \prodsize'}\vect_i(\wElem)\right)\left(\sum_{i' \in \prodsize''}\vect_{i'}(\wElem)\right)\label{eq:exp-pos5}
\end{align}
\qed\newline
Equation \eqref{eq:exp-pos1} follows from expanding the definitions of $\sk^{v_i}$. Equation \eqref{eq:exp-pos2} follows from the associative property of addition and the distributive property of addition over multiplication. Equation \eqref{eq:exp-pos3} also uses the associative and distributive properties to rearrange the $\pos$. Equation \eqref{eq:exp-pos4} results from Lemma \ref{lem:exp-sine}, where it can be seen that $\ex{\sine(\wElem)\sine(\wElem')} = 0$, thus eliminating the right hand term. The left hand operand stays, since by Lemma \ref{lem:exp-sine} we know that $\ex{\sine(\wElem)^k} = 1$. Finally, equation \eqref{eq:exp-pos4} follows from the construction of $\sk$.
Equation \eqref{eq:exp-pos1} follows from expanding the definitions of $\sk^{v_i}$. Equation \eqref{eq:exp-pos2} follows from the associative property of addition and the distributive property of addition over multiplication. Equation \eqref{eq:exp-pos3} also uses the associative and distributive properties to rearrange the $\pos$. Equation \eqref{eq:exp-pos4} results from Lemma \ref{lem:exp-sine}, where it can be seen that $\ex{\sine(\wElem)\sine(\wElem')} = 0$, thus eliminating the right hand term. The left hand operand stays, since by Lemma \ref{lem:exp-sine} we know that $\ex{\sine(\wElem)^\prodsize} = 1$. Finally, equation \eqref{eq:exp-pos4} follows from the construction of $\sk$.
We now move to computing the variance of a $\pos$~ query. Note, that the use of complex numbers requires the variance formula $\var = \ex{\pos \cdot\conj{\pos}} - \ex{\pos}\ex{\conj{\pos}}$.
To make this easier to present and digest, we start by turning our focus on the first term, $T_1 = \ex{\pos \cdot \conj{\pos}}$.
\begin{align}
&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in k'}\sk^{\vect_{i_1}}[\buck]\right)\left(\sum_{i_1' \in k''}\sk^{\vect_{i_1'}}[\buck]\right) \cdot
\conj{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in k'}\sk^{\vect_{i_2}}[\buck]\right)\left(\sum_{i_2' \in k''}\sk^{\vect_{i_2'}}[\buck]\right)}}\\
&=\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in k'}\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\vect_{i_1}(\wElem_1)\sine(\wElem_1)\sum_{i_1' \in k''}\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\vect_{i_1'}(\wElem_1')\sine(\wElem_1')\right)
\conj{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in k'}\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\vect_{i_2}(\wElem_2)\conj{\sine(\wElem_2)}\sum_{i_2' \in k''}\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\vect_{i_2'}(\wElem_2')\conj{\sine(\wElem_2')}\right)}}\label{eq:var-pos1}\\
&=\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in k'}\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\vect_{i_1}(\wElem_1)\sine(\wElem_1)\sum_{i_1' \in k''}\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\vect_{i_1'}(\wElem_1')\sine(\wElem_1')\right)
\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in k'}\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\vect_{i_2}(\wElem_2)\conj{\sine(\wElem_2)}\sum_{i_2' \in k''}\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\vect_{i_2'}(\wElem_2')\conj{\sine(\wElem_2')}\right)}\label{eq:var-pos2}\\
&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in \prodsize'}\sk^{\vect_{i_1}}[\buck]\right)\left(\sum_{i_1' \in \prodsize''}\sk^{\vect_{i_1'}}[\buck]\right) \cdot
\conj{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in \prodsize'}\sk^{\vect_{i_2}}[\buck]\right)\left(\sum_{i_2' \in \prodsize''}\sk^{\vect_{i_2'}}[\buck]\right)}}\\
&=\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in \prodsize'}\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\vect_{i_1}(\wElem_1)\sine(\wElem_1)\sum_{i_1' \in \prodsize''}\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\vect_{i_1'}(\wElem_1')\sine(\wElem_1')\right)
\conj{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in \prodsize'}\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\vect_{i_2}(\wElem_2)\conj{\sine(\wElem_2)}\sum_{i_2' \in \prodsize''}\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\vect_{i_2'}(\wElem_2')\conj{\sine(\wElem_2')}\right)}}\label{eq:var-pos1}\\
&=\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in \prodsize'}\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\vect_{i_1}(\wElem_1)\sine(\wElem_1)\sum_{i_1' \in \prodsize''}\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\vect_{i_1'}(\wElem_1')\sine(\wElem_1')\right)
\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in \prodsize'}\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\vect_{i_2}(\wElem_2)\conj{\sine(\wElem_2)}\sum_{i_2' \in \prodsize''}\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\vect_{i_2'}(\wElem_2')\conj{\sine(\wElem_2')}\right)}\label{eq:var-pos2}\\
%
&=\mathbb{E}\left[\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\left(\sum_{i_1 \in k'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\right)\left(\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\left(\sum_{i_1' \in k''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')\right)\right.\nonumber\\
&\left.\qquad\qquad\qquad\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\left(\sum_{i_2 \in k'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\right)\left(\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_2' \in k''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}\right)\right]\label{eq:var-pos3}\\
&=\mathbb{E}\left[\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\left(\sum_{i_1 \in \prodsize'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\right)\left(\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\left(\sum_{i_1' \in \prodsize''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')\right)\right.\nonumber\\
&\left.\qquad\qquad\qquad\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\left(\sum_{i_2 \in \prodsize'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\right)\left(\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_2' \in \prodsize''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}\right)\right]\label{eq:var-pos3}\\
%
&=\ex{\sum_{\buck = 1}^{\sketchCols}\sum_{\wElem_1, \wElem_1' \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\left(\sum_{i_1 \in k'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\left(\sum_{i_1' \in k''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')\cdot
\sum_{\buck' = 1}^{\sketchCols}\sum_{\wElem_2, \wElem_2' \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_2 \in k'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\left(\sum_{i_2' \in k''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}}\label{eq:var-pos4}\\
&=\ex{\sum_{\buck = 1}^{\sketchCols}\sum_{\wElem_1, \wElem_1' \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\left(\sum_{i_1 \in \prodsize'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\left(\sum_{i_1' \in \prodsize''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')\cdot
\sum_{\buck' = 1}^{\sketchCols}\sum_{\wElem_2, \wElem_2' \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_2 \in \prodsize'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\left(\sum_{i_2' \in \prodsize''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}}\label{eq:var-pos4}\\
%
&=\ex{\sum_{\buck, \buck' \in [\sketchCols]}\sum_{\substack{\wElem_1, \wElem_1',\\ \wElem_2, \wElem_2'\\ \in \wSet}}\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_1 \in k'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\left(\sum_{i_1' \in k''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')\left(\sum_{i_2 \in k'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\left(\sum_{i_2' \in k''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}}\label{eq:var-pos5}\\
&=\ex{\sum_{\buck, \buck' \in [\sketchCols]}\sum_{\substack{\wElem_1, \wElem_1',\\ \wElem_2, \wElem_2'\\ \in \wSet}}\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_1 \in \prodsize'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\left(\sum_{i_1' \in \prodsize''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')\left(\sum_{i_2 \in \prodsize'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\left(\sum_{i_2' \in \prodsize''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}}\label{eq:var-pos5}\\
%
&=\sum_{\buck, \buck' \in [\sketchCols]}\sum_{\substack{\wElem_1, \wElem_1',\\ \wElem_2, \wElem_2'\\ \in \wSet}}\sum_{\substack{i_1, i_2 \in k',\\i_1', i_2' \in k''}}\vect_{i_1}(\wElem_1)\vect_{i_1'}(\wElem_1')\vect_{i_2}(\wElem_2)\vect_{i_2'}(\wElem_2')\ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\sine(\wElem_1)\sine(\wElem_1')\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\label{eq:var-pos6}
&=\sum_{\buck, \buck' \in [\sketchCols]}\sum_{\substack{\wElem_1, \wElem_1',\\ \wElem_2, \wElem_2'\\ \in \wSet}}\sum_{\substack{i_1, i_2 \in \prodsize',\\i_1', i_2' \in \prodsize''}}\vect_{i_1}(\wElem_1)\vect_{i_1'}(\wElem_1')\vect_{i_2}(\wElem_2)\vect_{i_2'}(\wElem_2')\ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\sine(\wElem_1)\sine(\wElem_1')\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\label{eq:var-pos6}
%--Below is part of the derivation without using the indicator variables. Only saving for short term...
%&=\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sine(\wElem_1)\right) \left(\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right)\conj{\sine(\wElem_2)}\right) \cdot \sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right)\conj{\sine(\wElem_3)}\right) \left(\sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\conj{\sine(\wElem_4)}\right)}\label{eq:var-pos1}\\
%=&\ex{\sum_{\buck, \buck' \in \sketchCols}\left(\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sine(\wElem_1)\right) \left(\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right)\conj{\sine(\wElem_2)}\right) \cdot \left(\sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right)\conj{\sine(\wElem_3)}\right) \left(\sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\conj{\sine(\wElem_4)}\right)}\label{eq:var-pos2}\\
@ -66,9 +66,9 @@ Equation \eqref{eq:var-pos3} results from rewriting the summations using the law
Equations \eqref{eq:var-pos4}, \eqref{eq:var-pos5} again rewrite the summation(s) using the law of distributivity of addition over multiplication.
Equation \eqref{eq:var-pos6} is the result of factoring out non-random terms from the expectation.\newline
When considering the terms that survive the expecation in \eqref{eq:var-pos6}, recall that it is a known fact when working with $k^{th}$ roots of unity ($R^k$) in the complex numbers that a complex number times its conjugate has a product of one, formally:
When considering the terms that survive the expecation in \eqref{eq:var-pos6}, recall that it is a known fact when working with $\prodsize^{th}$ roots of unity ($R^\prodsize$) in the complex numbers that a complex number times its conjugate has a product of one, formally:
\begin{equation*}
\forall c \in \mathbb{C} \text{ s.t. } c \in R^k, c \cdot \conj{c}= 1.
\forall c \in \mathbb{C} \text{ s.t. } c \in R^\prodsize, c \cdot \conj{c}= 1.
\end{equation*}
Combining this result with Lemma \eqref{lem:exp-sine} one can see that only two possible cases of terms survive the expectation in \eqref{eq:var-pos6}.
@ -92,19 +92,19 @@ Second, by the law of conjugates,
Next, we show that the second term, $T_2 = \ex{\pos}\ex{\conj{\pos}}$, has the same term as $T_1$ factor out of the expectations.
\begin{align}
&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in k'}\sk^{\vect_{i_1}}[\buck]\right)\left(\sum_{i_1' \in k''}\sk^{\vect_{i_1'}}[\buck]\right)}
\ex{\conj{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in k'}\sk^{\vect_{i_2}}[\buck]\right)\left(\sum_{i_2' \in k''}\sk^{\vect_{i_2'}}[\buck]\right)}}\label{eq:var-t2-pos1}\\
&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in \prodsize'}\sk^{\vect_{i_1}}[\buck]\right)\left(\sum_{i_1' \in \prodsize''}\sk^{\vect_{i_1'}}[\buck]\right)}
\ex{\conj{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in \prodsize'}\sk^{\vect_{i_2}}[\buck]\right)\left(\sum_{i_2' \in \prodsize''}\sk^{\vect_{i_2'}}[\buck]\right)}}\label{eq:var-t2-pos1}\\
%
&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in k'}\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\vect_{i_1}(\wElem_1)\sine(\wElem_1)\right)\left(\sum_{i_1' \in k''}\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\vect_{i_1'}(\wElem_1')\sine(\wElem_1')\right)}\ex{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in k'}\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\vect_{i_2}(\wElem_2)\conj{\sine(\wElem_2)}\right)\left(\sum_{i_2' \in k''}\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\vect_{i_2'}(\wElem_2')\conj{\sine(\wElem_2')}\right)}\label{eq:var-t2-pos2}\\
&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in \prodsize'}\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\vect_{i_1}(\wElem_1)\sine(\wElem_1)\right)\left(\sum_{i_1' \in \prodsize''}\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\vect_{i_1'}(\wElem_1')\sine(\wElem_1')\right)}\ex{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in \prodsize'}\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\vect_{i_2}(\wElem_2)\conj{\sine(\wElem_2)}\right)\left(\sum_{i_2' \in \prodsize''}\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\vect_{i_2'}(\wElem_2')\conj{\sine(\wElem_2')}\right)}\label{eq:var-t2-pos2}\\
%
&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\left(\sum_{i_1 \in k'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\right)\left(\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\left(\sum_{i_1' \in k''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')\right)}\nonumber\\
&\qquad\qquad\qquad\ex{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\left(\sum_{i_2 \in k'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\right)\left(\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_2' \in k''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}\right)}\label{eq:var-t2-pos3}\\
&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\left(\sum_{i_1 \in \prodsize'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\right)\left(\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\left(\sum_{i_1' \in \prodsize''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')\right)}\nonumber\\
&\qquad\qquad\qquad\ex{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\left(\sum_{i_2 \in \prodsize'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\right)\left(\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_2' \in \prodsize''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}\right)}\label{eq:var-t2-pos3}\\
%
&\ex{\sum_{\buck = 1}^{\sketchCols}\sum_{\wElem_1, \wElem_1' \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\left(\sum_{i_1 \in k'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\left(\sum_{i_1' \in k''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')}\ex{\sum_{\buck' = 1}^{\sketchCols}\sum_{\wElem_2, \wElem_2' \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_2 \in k'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\left(\sum_{i_2' \in k''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}}\label{eq:var-t2-pos4} \\
&\ex{\sum_{\buck = 1}^{\sketchCols}\sum_{\wElem_1, \wElem_1' \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\left(\sum_{i_1 \in \prodsize'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\left(\sum_{i_1' \in \prodsize''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')}\ex{\sum_{\buck' = 1}^{\sketchCols}\sum_{\wElem_2, \wElem_2' \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_2 \in \prodsize'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\left(\sum_{i_2' \in \prodsize''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}}\label{eq:var-t2-pos4} \\
%
&\sum_{\buck = 1}^{\sketchCols}\sum_{\wElem_1, \wElem_1' \in \wSet}\left(\sum_{i_1 \in k'}\vect_{i_1}(\wElem_1)\right)\left(\sum_{i_1' \in k''}\vect_{i_1'}(\wElem_1')\right)\ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\sine(\wElem_1)\sine(\wElem_1')}\sum_{\buck' = 1}^{\sketchCols}\sum_{\wElem_2, \wElem_2' \in \wSet}\left(\sum_{i_2 \in k'}\vect_{i_2}(\wElem_2)\right)\left(\sum_{i_2' \in k''}\vect_{i_2'}(\wElem_2')\right)\ex{\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\label{eq:var-t2-pos5} \\
&\sum_{\buck = 1}^{\sketchCols}\sum_{\wElem_1, \wElem_1' \in \wSet}\left(\sum_{i_1 \in \prodsize'}\vect_{i_1}(\wElem_1)\right)\left(\sum_{i_1' \in \prodsize''}\vect_{i_1'}(\wElem_1')\right)\ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\sine(\wElem_1)\sine(\wElem_1')}\sum_{\buck' = 1}^{\sketchCols}\sum_{\wElem_2, \wElem_2' \in \wSet}\left(\sum_{i_2 \in \prodsize'}\vect_{i_2}(\wElem_2)\right)\left(\sum_{i_2' \in \prodsize''}\vect_{i_2'}(\wElem_2')\right)\ex{\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\label{eq:var-t2-pos5} \\
%
&\sum_{\buck, \buck' \in [\sketchCols]}\sum_{\substack{\wElem_1, \wElem_1',\\\wElem_2, \wElem_2' \in \wSet}}\left(\sum_{\substack{i_1, i_2 \in k',\\i_1', i_2' \in k''}}\vect_{i_1}(\wElem_1)\vect_{i_1'}(\wElem_1')\vect_{i_2}(\wElem_2)\vect_{i_2'}(\wElem_2')\right)\left(\ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\sine(\wElem_1)\sine(\wElem_1')}\ex{\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\right)\label{eq:var-t2-pos5}
&\sum_{\buck, \buck' \in [\sketchCols]}\sum_{\substack{\wElem_1, \wElem_1',\\\wElem_2, \wElem_2' \in \wSet}}\left(\sum_{\substack{i_1, i_2 \in \prodsize',\\i_1', i_2' \in \prodsize''}}\vect_{i_1}(\wElem_1)\vect_{i_1'}(\wElem_1')\vect_{i_2}(\wElem_2)\vect_{i_2'}(\wElem_2')\right)\left(\ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\sine(\wElem_1)\sine(\wElem_1')}\ex{\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\right)\label{eq:var-t2-pos5}
%
%&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sine(\wElem_1)\right) \left(\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right)\conj{\sine(\wElem_2)}\right)} \cdot \ex{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right)\conj{\sine(\wElem_3)}\right) \left(\sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\conj{\sine(\wElem_4)}\right)}\label{eq:var-t2-pos1}\\
%=&\sum_{\buck, \buck' \in \sketchCols}\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right) \sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right) \sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\ex{\sine(\wElem_1)\cdot \conj{\sine(\wElem_2)}}\ex{\cdot\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}\label{eq:var-t2-pos2}
@ -117,7 +117,7 @@ Equation \eqref{eq:var-t2-pos5} uses the distributive property of addition over
Notice that both $T_1$ and $T_2$ have the same left side factor, so the $\var$ can be written as
\begin{align}
&\sum_{\buck, \buck' \in [\sketchCols]}\sum_{\substack{\wElem_1, \wElem_1',\\\wElem_2, \wElem_2' \in \wSet}}\left(\sum_{\substack{i_1, i_2 \in k',\\i_1', i_2' \in k''}}\vect_{i_1}(\wElem_1)\vect_{i_1'}(\wElem_1')\vect_{i_2}(\wElem_2)\vect_{i_2'}(\wElem_2')\right)\left(\ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\sine(\wElem_1)\sine(\wElem_1')\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\right.\nonumber\\
&\sum_{\buck, \buck' \in [\sketchCols]}\sum_{\substack{\wElem_1, \wElem_1',\\\wElem_2, \wElem_2' \in \wSet}}\left(\sum_{\substack{i_1, i_2 \in \prodsize',\\i_1', i_2' \in \prodsize''}}\vect_{i_1}(\wElem_1)\vect_{i_1'}(\wElem_1')\vect_{i_2}(\wElem_2)\vect_{i_2'}(\wElem_2')\right)\left(\ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\sine(\wElem_1)\sine(\wElem_1')\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\right.\nonumber\\
&\left.\qquad\qquad\qquad - \ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\sine(\wElem_1)\sine(\wElem_1')}\ex{\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\right)\\\label{eq:var-t1-t2}
\end{align}
@ -125,14 +125,14 @@ Notice that the expectation terms coming from $T_2$ cancel out case 1 leaving th
Thus,
\begin{equation}
\var\left[\pos\right] = \sum_j\sum_{\wElem, \wElem'}\frac{1}{\sketchCols^2}\left(\sum_{\substack{i \in k',\\i' \in k''}}\vect_i(\wElem)^2\vect_{i'}(\wElem')^2 + \vect_i(\wElem)\vect_{i'}(\wElem)\vect_i(\wElem')\vect_{i'}(\wElem')\right)
\var\left[\pos\right] = \sum_j\sum_{\wElem, \wElem'}\frac{1}{\sketchCols^2}\left(\sum_{\substack{i \in \prodsize',\\i' \in \prodsize''}}\vect_i(\wElem)^2\vect_{i'}(\wElem')^2 + \vect_i(\wElem)\vect_{i'}(\wElem)\vect_i(\wElem')\vect_{i'}(\wElem')\right)
\end{equation}
%Putting things together we have,
%\begin{align}
%&\sum_{\buck, \buck' \in \sketchCols}\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right) \sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right) \sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\left(\ex{\sine(\wElem_1) \conj{\sine(\wElem_2)}\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}-\ex{\sine(\wElem_1) \conj{\sine(\wElem_2)}}\ex{\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}\right)\label{eq:var-both-pos1}\\
%=&\sum_{\buck}\sum_{\wElem \neq \wElem' \in \wSet}\left(\sum_{i \in k'}\vect_i(\wElem)\right)^2\left(\sum_{i' \in k''}\vect_{i'}(\wElem')\right)^2 + \left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem')\right) \left(\sum_{i \in k'}\vect_i(\wElem')\right)\label{eq:var-both-pos2}\\
%\leq&\norm{\sum_{i \in k'}\vect_i}_2^2\cdot\norm{\sum_{i' \in k''}\vect_{i'}}_2^2 + \norm{\sum_{i \in k'}\vect_i \had \sum_{i' \in k''}\vect_{i'}}_2^2\label{eq:var-both-pos3}
%=&\sum_{\buck}\sum_{\wElem \neq \wElem' \in \wSet}\left(\sum_{i \in \prodsize'}\vect_i(\wElem)\right)^2\left(\sum_{i' \in \prodsize''}\vect_{i'}(\wElem')\right)^2 + \left(\sum_{i \in \prodsize'}\vect_i(\wElem)\right)\left(\sum_{i' \in \prodsize''}\vect_{i'}(\wElem)\right)\left(\sum_{i' \in \prodsize''}\vect_{i'}(\wElem')\right) \left(\sum_{i \in \prodsize'}\vect_i(\wElem')\right)\label{eq:var-both-pos2}\\
%\leq&\norm{\sum_{i \in \prodsize'}\vect_i}_2^2\cdot\norm{\sum_{i' \in \prodsize''}\vect_{i'}}_2^2 + \norm{\sum_{i \in \prodsize'}\vect_i \had \sum_{i' \in \prodsize''}\vect_{i'}}_2^2\label{eq:var-both-pos3}
%\end{align}
%\qed
%

72
sop.tex
View File

@ -1,26 +1,32 @@
%root--main.tex
\section{Sum of Products Analysis}
We now seek to bound the variance of a k-way join.
We now seek to bound the variance of a $\prodsize$-way join.
\begin{align}
&\sigsq_j = \ex{est_j \cdot \overline{est_j}} - \ex{est_j} \cdot \ex{\overline{est_j}} \nonumber\\
&= \ex{\prod_{i = 1}^{k}\sum_{w \in W_j}v_i(w)s(w) \cdot \prod_{i = 1}^k\sum_{w' \in W_j}v_i(w')\overline{s(w')}} -
\ex{\prod_{i = 1}^{k}\sum_{w \in W_j}v_i(w)s(w)}\cdot \ex{\prod_{i = 1}^k\sum_{w' \in W_j}v_i(w')\overline{s(w')}}\nonumber\\
&= \ex{\sum_{\substack{w_1...w_k\\w'_1...w'_k\\ \in W}}\prod_{i = 1}^k v_i(w_i)v(w'_i)s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\sum_{w_1...w_k \in W} \prod_{i = 1}^k v_i(w_i)s(w_i)\ind{h(w_i) = j}} \cdot
\ex{\sum_{w'_1...w'_k \in W} \prod_{i = 1}^k v_i(w'_i)\overline{s(w'_i)}\ind{h(w'_i) = j}}\nonumber\\
=&\sum_{\substack{w_1...w_k\\w'_1...w'_k\\ \in W}}\ex{\prod_{i = 1}^k v_i(w_i)v_i(w'_i)s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\prod_{i = 1}^kv_i(w_i)s(w_i)\ind{h(w_i) = j}} \cdot \ex{\prod_{i = 1}^k v_i(w'_i)\overline{s(w'_i)}\ind{h(w'_i) = j}}\nonumber\\
&= \sum_{\substack{w_1...w_k\\w'_1...w'_k\\ \in W}}\prod_{i = 1}^k v_i(w_i)v_i(w'_i)\cdot\left( \ex{\prod_{i = 1}^k s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\prod_{i = 1}^ks(w_i)\ind{h(w_i) = j}}\cdot \ex{\prod_{i = 1}^k\overline{s(w'_i)}\ind{h(w'_i) = j}} \right)\label{eq:sig-j-last}.
&\sigsq_j = \ex{\est_j \cdot \overline{\est_j}} - \ex{\est_j} \cdot \ex{\overline{\est_j}} \nonumber\\
&= \ex{\prod_{i = 1}^{\prodsize}\sum_{w \in W_j}v_i(w)s(w) \cdot \prod_{i = 1}^\prodsize\sum_{w' \in W_j}v_i(w')\overline{s(w')}} -
\ex{\prod_{i = 1}^{\prodsize}\sum_{w \in W_j}v_i(w)s(w)}\cdot \ex{\prod_{i = 1}^\prodsize\sum_{w' \in W_j}v_i(w')\overline{s(w')}}\nonumber\\
&= \ex{\sum_{\substack{w_1...w_\prodsize\\w'_1...w'_\prodsize\\ \in W}}\prod_{i = 1}^\prodsize v_i(w_i)v(w'_i)s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\sum_{w_1...w_\prodsize \in W} \prod_{i = 1}^\prodsize v_i(w_i)s(w_i)\ind{h(w_i) = j}} \cdot
\ex{\sum_{w'_1...w'_\prodsize \in W} \prod_{i = 1}^\prodsize v_i(w'_i)\overline{s(w'_i)}\ind{h(w'_i) = j}}\nonumber\\
=&\sum_{\substack{w_1...w_\prodsize\\w'_1...w'_\prodsize\\ \in W}}\ex{\prod_{i = 1}^\prodsize v_i(w_i)v_i(w'_i)s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\prod_{i = 1}^kv_i(w_i)s(w_i)\ind{h(w_i) = j}} \cdot \ex{\prod_{i = 1}^\prodsize v_i(w'_i)\overline{s(w'_i)}\ind{h(w'_i) = j}}\nonumber\\
&= \sum_{\substack{w_1...w_\prodsize\\w'_1...w'_\prodsize\\ \in W}}\prod_{i = 1}^\prodsize v_i(w_i)v_i(w'_i)\cdot\left( \ex{\prod_{i = 1}^\prodsize s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\prod_{i = 1}^ks(w_i)\ind{h(w_i) = j}}\cdot \ex{\prod_{i = 1}^\prodsize\overline{s(w'_i)}\ind{h(w'_i) = j}} \right)\label{eq:sig-j-last}.
\end{align}
Before proceeding, we introduce some notation that will aid in communicating the bounds we are about to establish. First note, that the only terms that survive the expectation above are mappings of $w_i = w'_j = w$ for $i, j \in [k]$, such that each $w_i$ has a match, i.e., no $w_i$ or $w'_j$ stands alone without a matching world in its complimentary set.
Before proceeding, we introduce some notation and terminology that will aid in communicating the bounds we are about to establish. First we refer to the expectation computations as
\[\term = \ex{\prod_{i = 1}^\prodsize s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\prod_{i = 1}^ks(w_i)\ind{h(w_i) = j}}\cdot \ex{\prod_{i = 1}^\prodsize\overline{s(w'_i)}\ind{h(w'_i) = j}} \text{,} \]
\[\term_1 = \ex{\prod_{i = 1}^\prodsize s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} \text{, and}\]
\[\term_2 = \ex{\prod_{i = 1}^ks(w_i)\ind{h(w_i) = j}}\cdot \ex{\prod_{i = 1}^\prodsize\overline{s(w'_i)}\ind{h(w'_i) = j}}. \]
We will use the vocabulary 'term' to denote a value for which the equation \eqref{eq:sig-j-last} computes given a specific set of world values. To say that a term survives the expectation is to mean that a value other than zero is computed from \eqref{eq:sig-j-last} for a given set of world values. Note, that the only terms that survive the expectation above are mappings of $w_i = w'_j = w$ for $i, j \in [\prodsize]$, such that each $w_i$ has a match, i.e., no $w_i$ or $w'_j$ stands alone without a matching world in its complimentary set. In other words, the set of values in $\wElem_1,\ldots,\wElem_k$ has a bijective mapping to the set of values in $\wElem'_1,\ldots,\wElem'_k$.
%\subsection{M-tuples}
%\begin{Definition}
%Given a $k$-way join, define $\dist \in [k]$. An \dist-tuple then is a set of tuples, each tuple conatining $\dist$ elements, such that the values of each tuple sum up to $\dist$, i.e. $\forall i \in [\dist], \sum_j \dist_{t_{i, j}} = \dist$, where i is the $i^{th}$ tuple in $\dist_t$, and $j$ is the $j^{th}$ index of that tuple $t$. The set consists of each unique sum up to symmetry, meaning a tuple with the same elements only reversed is disallowed.
%Given a $\prodsize$-way join, define $\dist \in [\prodsize]$. An \dist-tuple then is a set of tuples, each tuple conatining $\dist$ elements, such that the values of each tuple sum up to $\dist$, i.e. $\forall i \in [\dist], \sum_j \dist_{t_{i, j}} = \dist$, where i is the $i^{th}$ tuple in $\dist_t$, and $j$ is the $j^{th}$ index of that tuple $t$. The set consists of each unique sum up to symmetry, meaning a tuple with the same elements only reversed is disallowed.
%\end{Definition}
%For example, when $k = 4$, $\dist = 2$, the \dist-tuple, denoted, $\dist_2$, would be$\left\{\left(1, 3\right), \left(2, 2\right)\right\}$. Here, $\dist_{2_{1, 1}} = 1$, and while the tuple $\left(3, 1\right)$ sums up to $k = 4$, we do not include it since we have it's symmetrical term $\left(1, 3\right)$.
%For example, when $\prodsize = 4$, $\dist = 2$, the \dist-tuple, denoted, $\dist_2$, would be$\left\{\left(1, 3\right), \left(2, 2\right)\right\}$. Here, $\dist_{2_{1, 1}} = 1$, and while the tuple $\left(3, 1\right)$ sums up to $\prodsize = 4$, we do not include it since we have it's symmetrical term $\left(1, 3\right)$.
%
%\AR{Why is the definition of M-tuples needed? From what I understand you need this to define what kinds of $f$ and $f'$ are allowed but in that case why not state those properties directly in terms of $f$ and $f'$? Actually after reading the next section, I do not see why these properties are needed at all..}
%\AH{I use the \dist-tuples to explain 1) what kind of matchings survive and 2) that $f, f'$ must only cross product from within the matchings of the same tuple. Maybe there is an easier way to do this.}
@ -28,7 +34,7 @@ Before proceeding, we introduce some notation that will aid in communicating the
\subsection{f, f'}
To help describe all possible matchings we introduce functions $f$ and $f'$.
\begin{Definition}
Functions f, f' are the set of surjective mappings from $k$ to $\dist$ elements: $f: [k] \rightarrow [\dist], f': [k] \rightarrow [\dist'].$
Functions f, f' are the set of surjective mappings from $\prodsize$ to $\dist$ elements: $f: [\prodsize] \rightarrow [\dist], f': [\prodsize] \rightarrow [\dist'].$
\end{Definition}
%\begin{equation*}
%f(i) = \begin{cases}
@ -38,21 +44,21 @@ Functions f, f' are the set of surjective mappings from $k$ to $\dist$ elements:
% \widetilde{w_\dist} &f(i) = \dist.
% \end{cases}
%\end{equation*}
The functions $f, f'$ are used to produce the mappings $w_i \mapsto \widetilde{w_{f(i)}}$. In particular, $f$ and $f'$ are machinery for mapping $k$ $\wElem$-world variables to $\dist$ distinct values.
The functions $f, f'$ are used to produce the mappings $w_i \mapsto \widetilde{w_{f(i)}}$. In particular, $f$ and $f'$ are machinery for mapping $\prodsize$ $\wElem$-world variables to $\dist$ distinct values.
We rewrite equation \eqref{eq:sig-j-last} in terms of $\dist$ distinct worlds, with $f, f'$ mappings.
\begin{equation}
\sum_{\dist \in [k]}\sum_{\dist' \in [k]}\sum_{f, f'}\sum_{\substack{\wElem_1, \cdots,\wElem_\dist,\\\wElem'_1,\cdots,\wElem'_{\dist'}\\ \in W}}\prod_{i = 1}^{k}\vect_i(\widetilde{\wElem_{f(i)}})\vect_i(\widetilde{\wElem'_{f'(i)}})\cdot\left( \ex{\prod_{i = 1}^k \sine(\widetilde{\wElem_{f(i)}}\conj{\sine(\widetilde{\wElem'_{f'(i)}})}\ind{h(\widetilde{\wElem_{f(i)}}) = j}\ind{h(\widetilde{w'_{f'(i)}}) = j}} -
\ex{\prod_{i = 1}^k \sine(\wElem_{f(i)})\ind{h(\widetilde{\wElem_{f(i)}}) = j}}\cdot \ex{\prod_{i = 1}^k\conj{\sine(\wElem'_{f'(i)})}\ind{h(\widetilde{w'_{f'(i)}}) = j}} \right)\label{eq:sig-j-distinct}
\sum_{\dist \in [\prodsize]}\sum_{\dist' \in [\prodsize]}\sum_{f, f'}\sum_{\substack{\wElem_1, \ldots,\wElem_\dist,\\\wElem'_1,\ldots,\wElem'_{\dist'}\\ \in W}}\prod_{i = 1}^{\prodsize}\vect_i(\widetilde{\wElem_{f(i)}})\vect_i(\widetilde{\wElem'_{f'(i)}})\cdot\left( \ex{\prod_{i = 1}^\prodsize \sine(\widetilde{\wElem_{f(i)}}\conj{\sine(\widetilde{\wElem'_{f'(i)}})}\ind{h(\widetilde{\wElem_{f(i)}}) = j}\ind{h(\widetilde{w'_{f'(i)}}) = j}} -
\ex{\prod_{i = 1}^\prodsize \sine(\wElem_{f(i)})\ind{h(\widetilde{\wElem_{f(i)}}) = j}}\cdot \ex{\prod_{i = 1}^\prodsize\conj{\sine(\wElem'_{f'(i)})}\ind{h(\widetilde{w'_{f'(i)}}) = j}} \right)\label{eq:sig-j-distinct}
\end{equation}
Note that for a given $\dist$, we may have several ways to map $k$ worlds to $\dist$ distinct values. We need to define what if means for $f$ and $f'$ to be matching.
Note that for a given $\dist$, we may have several ways to map $\prodsize$ worlds to $\dist$ distinct values. We need to define what if means for $f$ and $f'$ to be matching.
\begin{Definition}
Functions $f:[k]\mapsto [\dist], f':[k]\mapsto [\dist']$ are said to be matching, denoted $\match{f}{f'}$, if and only if
Functions $f:[\prodsize]\mapsto [\dist], f':[\prodsize]\mapsto [\dist']$ are said to be matching, denoted $\match{f}{f'}$, if and only if
\begin{enumerate}
\item $\dist = \dist'$
\item $\{f^{-1}(i) ~|~ \forall i \in [\dist]\} = \{f'^{-1}(i') ~|~ \forall i' \in [\dist] \}$, i.e., the set of preimages for $f$ equals the set of preimages for $f'$
\item $\{|f^{-1}(i)| ~|~ \forall i \in [\dist]\} = \{|f'^{-1}(i')| ~|~ \forall i' \in [\dist] \}$, i.e., the set of preimage cardinalities for $f$ equals the set of preimage cardinalities for $f'$.
% \item $\forall i \in [\dist], |f^{-1}(i)| = |f'^{-1}(i)|$, or a symmetrical mapping exists, where $\forall i \in [\dist], \exists i' \in [\dist]$ such that $i'$ is unique, $|f^{-1}(i)| = |f^{-1}(i')|$.
\end{enumerate}
\end{Definition}
@ -60,34 +66,46 @@ Functions $f:[k]\mapsto [\dist], f':[k]\mapsto [\dist']$ are said to be matching
\begin{Lemma}\label{lem:sig-j-survive}
The only terms surviving the expectation of equation \eqref{eq:sig-j-distinct} are those with $f, f'$ matching, where $\forall j \in[\dist], \widetilde{\wElem_j} = \widetilde{\wElem'_j}$.
\end{Lemma}
We state what the expectation looks like when $f, f'$ are not matching. From \cref{eq:sig-j-last} it can be seen that if $\term_1 = \term_2 = 0$, then there are no surviving terms. Beginning with \cref{eq:sig-j-last}, we first look into the case when $\dist \neq \dist'$. By the fact that $\dist \neq \dist'$ we know that one set of variables has at least one more distinct world than the other set of variables. Without loss of generality, assume that $\dist < \dist'$. Looking at $\term_1$,
\begin{equation}
%&\sum_{\substack{\wElem_1,\ldots,\wElem_{\dist},\\ \wElem_1',\ldots,\wElem_{\dist}'\\\in \wSet}}\prod_{i = 1}^{\prodsize}\vect_i(\wElem_i)\vect_i(\wElem_i')
% \left(\ex{\prod_{i = 1}^{k}\sine(\wElem_i)\conj{\sine(\wElem_i')}\ind{\hfunc(\wElem_i) = \buck}\ind{\hfunc(\wElem_i') = \buck}} -
% \ex{\prod_{i = 1}^{k}\sine(\wElem_i)\ind{\hfunc(\wElem_i) = \buck}}\ex{\prod_{i = 1}\conj{\sine(\wElem'_i)}\ind{\hfunc(\wElem'_i) = \buck}} \right) \\
\term_1 = \ex{\prod_{i = 1}^{\prodsize}\sine(\wElem_1)^{\dupSize_1}\cdot,\ldots,\cdot\sine(\wElem_m)^{\dupSize_m}\conj{\sine(\wElem_1')}^{\dupSize_1'}\cdot, \ldots,\cdot \conj{\sine(\wElem_{m}')}^{\dupSize_m'} \cdot, \ldots, \cdot \conj{\sine(\wElem_{m'}')}^{\dupSize_{m'}'}} = 0.
\end{equation}
Notice that, with $\dist < \dist'$, this means that there will be $\sum\limits_{n \in \{ [\dist'] - [\dist] \}}\dupSize_n'$ world values on either side that do not have a match since the number of $\prodsize$ products is constant. This leaves us with at least one $\sine(\wElem_i)^{\dupSize_n - \dupSize_{n'}} \cdot \conj{\sine(\wElem_{m'}')}^{\dupSize_{m'}'}$ pairing which in expectation = 0. Since we have at least one extra \textit{distinct} world value, whose conjugate of its sine value is paired with the sine value of another distinct world value, the expectation will equal zero.
\newline
The proof is immediate and follows from the fact that the random $\sine$ functions are only guaranteed to produce a product of one under one of two possible conditions:
\begin{enumerate}
\item $\sine(\wElem)^k = 1$,
\item $\sine(\wElem)^\prodsize = 1$,
\item $\sine(\wElem) \conj{\sine(\wElem)} = 1$.\qed
\end{enumerate}
%\AH{Here is where I have attempted to use prose to discuss the restrictions on $f$ and $f'$, rather than the use of \dist-tuples. Maybe there is a better, cleaner formal way?}
%E.g., for $k = 4, \dist = 2$, mappings could be such that one $\wElem_i$ is distinct, while the other three $\wElem_i$ are mapped to the other distinct value. Additionally, we would have the case where two $\wElem_i$ map to a distinct value, while the other two $\wElem_i$ map to a seperate distinct world. The expectations of equation \eqref{eq:sig-j-last} restrict $f$ and $f'$ to belonging to the same class of $\dist$-mapping, meaning, if the mapping $f$ for $k = 4, \dist = 2$ is in the setting of one distinct world and three equal world values, then $f'$ must be from that same set of mappings, and not from another class of mappings, such as when two $w_i$ map to a distinct world, while the other two $w_i$ map to a separate distinct world.
%E.g., for $\prodsize = 4, \dist = 2$, mappings could be such that one $\wElem_i$ is distinct, while the other three $\wElem_i$ are mapped to the other distinct value. Additionally, we would have the case where two $\wElem_i$ map to a distinct value, while the other two $\wElem_i$ map to a seperate distinct world. The expectations of equation \eqref{eq:sig-j-last} restrict $f$ and $f'$ to belonging to the same class of $\dist$-mapping, meaning, if the mapping $f$ for $\prodsize = 4, \dist = 2$ is in the setting of one distinct world and three equal world values, then $f'$ must be from that same set of mappings, and not from another class of mappings, such as when two $w_i$ map to a distinct world, while the other two $w_i$ map to a separate distinct world.
%\AH{Here is the use of \dist-tuples to explain the same thing.}
% In the example above, $f$ mappings for $\dist_{2_1}$ may only cross product with $f'$ mappings for $\dist_{2_1}$ and not with those for $\dist_{2_2}$. Likewise for $f, f'$ mappings of $\dist_{2_2}$.
Using the above definitions, we can now present the variance bounds for $\sigsq_j$ based on \eqref{eq:sig-j-distinct}.
By the fact that the expectations cancel when $\forall i, i', j, j'\in [k], \wElem_i = \wElem_j = \wElem_{i'}' = \wElem_{j'}' = \wElem$, we can rid ourselves of the case when there exists only one distinct world value. We then need to sum up all the $\dist$ distinct world value possibilities for $\dist \in [2, k]$. Note that the number of distinct values $\dist$ affects the randomness of the hash function $\hfunc$. E.g. only $\dist = 2$ distinct values will yield $\frac{1}{\sketchCols} \cdot \frac{1}{\sketchCols} = \frac{1}{\sketchCols^2} = \frac{1}{\sketchCols^\dist}$. By lemma \ref{lem:sig-j-survive} and equation \eqref{eq:sig-j-distinct} we get
By the fact that the expectations cancel when $\forall i, i', j, j'\in [\prodsize], \wElem_i = \wElem_j = \wElem_{i'}' = \wElem_{j'}' = \wElem$, we can rid ourselves of the case when there exists only one distinct world value. We then need to sum up all the $\dist$ distinct world value possibilities for $\dist \in [2, \prodsize]$. Note that the number of distinct values $\dist$ affects the randomness of the hash function $\hfunc$. E.g. only $\dist = 2$ distinct values will yield $\frac{1}{\sketchCols} \cdot \frac{1}{\sketchCols} = \frac{1}{\sketchCols^2} = \frac{1}{\sketchCols^\dist}$. By lemma \ref{lem:sig-j-survive} and equation \eqref{eq:sig-j-distinct} we get
%
%\begin{equation*}
%\frac{1}{\sketchCols^2}\sum_{\widetilde{\wElem_1}, \widetilde{\wElem_2}}\prod_{i = 1}^{k}\vect_i(\widetilde{\wElem_{f(i)}})\vect_i(\widetilde{\wElem_{f'(i)}}).
%\frac{1}{\sketchCols^2}\sum_{\widetilde{\wElem_1}, \widetilde{\wElem_2}}\prod_{i = 1}^{\prodsize}\vect_i(\widetilde{\wElem_{f(i)}})\vect_i(\widetilde{\wElem_{f'(i)}}).
%\end{equation*}
%This is because we know that the expectation from \eqref{eq:sig-j-last} will survive when we have mappings that produce pairs of the form $\sine(\wElem)\conj{\sine(\wElem)}$. Second, in consideration of the randomized hashing, with two distinct variables, the indicator variables in the expectation yield $\frac{1}{\sketchCols}\cdot \frac{1}{\sketchCols}$.
%
%We need to sum over all mappings for each case (c) when the number of distinct values is $\dist = 2$, resulting in
%\begin{equation*}
%\frac{1}{\sketchCols^2}\sum_{\widetilde{\wElem_1}, \widetilde{\wElem_2}}\sum_{c \in \dist = 2}\sum_{f, f'}\prod_{i = 1}^{k}\vect_i(\widetilde{\wElem_{f(i)}})\vect_i(\widetilde{\wElem_{f'(i)}}).
%\frac{1}{\sketchCols^2}\sum_{\widetilde{\wElem_1}, \widetilde{\wElem_2}}\sum_{c \in \dist = 2}\sum_{f, f'}\prod_{i = 1}^{\prodsize}\vect_i(\widetilde{\wElem_{f(i)}})\vect_i(\widetilde{\wElem_{f'(i)}}).
%\end{equation*}
%
%Finally, we need to do this for all $\dist$.
\begin{equation*}
\sigsq_j = \sum_{\dist \in [2, k]} \frac{1}{B^\dist} \sum_{\widetilde{w_1}\cdots\widetilde{w_\dist}\in W} \sum_{\substack{f, f',\\\match{f}{f'}}} \prod_{i = 1}^{k} v_i(\widetilde{w_{f(i)}}) v_i(\widetilde{w_{f'(i)}})
\sigsq_j = \sum_{\dist \in [2, \prodsize]} \frac{1}{B^\dist} \sum_{\widetilde{w_1}\ldots\widetilde{w_\dist}\in W} \sum_{\substack{f, f',\\\match{f}{f'}}} \prod_{i = 1}^{\prodsize} v_i(\widetilde{w_{f(i)}}) v_i(\widetilde{w_{f'(i)}})
\end{equation*}

View File

@ -10,159 +10,159 @@ We wish to prove that
\]
Therefore, substituting in the definition of variance for complex numbers,
\begin{align}
\sigsq &= \ex{\sum_j est_j \cdot \conj{\sum_{j'} est_j'}} - \ex{\sum_j est_j}\cdot\ex{\conj{\sum_{j'} est_{j'}}}\nonumber\\
&= \ex{\sum_j est_j \cdot \sum_{j'} \conj{est_j'}} - \ex{\sum_j est_j}\cdot\ex{\sum_{j'} \conj{est_{j'}}}\nonumber\\
&= \sum_{j, j'}\left(\ex{est_j \cdot \overline{est_j'}} - \ex{est_j}\ex{\overline{est_{j'}}} = \cvar{j, j'}\right)\nonumber\\
&= \sum_j\ex{est_j \cdot \overline{est_j'}} - \ex{est_j}\ex{\overline{est_j}} + \sum_{j \neq j'}\cvar{j, j'}\nonumber\\
\sigsq &= \ex{\sum_j \est_j \cdot \conj{\sum_{j'} \est_j'}} - \ex{\sum_j \est_j}\cdot\ex{\conj{\sum_{j'} \est_{j'}}}\nonumber\\
&= \ex{\sum_j \est_j \cdot \sum_{j'} \conj{\est_j'}} - \ex{\sum_j \est_j}\cdot\ex{\sum_{j'} \conj{\est_{j'}}}\nonumber\\
&= \sum_{j, j'}\left(\ex{\est_j \cdot \overline{\est_j'}} - \ex{\est_j}\ex{\overline{\est_{j'}}} = \cvar{j, j'}\right)\nonumber\\
&= \sum_j\ex{\est_j \cdot \overline{\est_j'}} - \ex{\est_j}\ex{\overline{\est_j}} + \sum_{j \neq j'}\cvar{j, j'}\nonumber\\
&= \sum_j \sigsq_j + \sum_{j \neq j'}\cvar{j, j'} \label{eq:sigsq-jneqj}\\
&\Rightarrow \sum_{j \neq j'}\cvar{j, j'}\leq 0. \nonumber
\end{align}
\subsection{Bounding $\sum_{j \neq j'}\cvar{j, j'}$}
\begin{align*}
\sum_{j \neq j'}\cvar{j, j'} &= \sum_{j \neq j'} \ex{\est_j \cdot \conj{\est_{j'}}} - \ex{\est_j}\cdot\ex{\conj{\est_{j'}}}\\
&=\ex{\prod_{i = 1}^{k}\sum_{w \in W}v_i(w)s(w)\ind{h(w) = j}\cdot \prod_{i = 1}^{k}\sum_{w' \in W}v_i(w')\conj{s(w')}\ind{h(w') = j'}} - \ex{\prod_{i = 1}^{k}\sum_{w \in W}v_i(w)s(w)\ind{h(w) = j}}\cdot \ex{\prod_{i = 1}^{k}\sum_{w' \in W}v_i(w')\conj{s(w')}\ind{h(w') = j'}}\\
&=\ex{\sum_{\substack{w_1,\cdots,w_k,\\w'_1,\cdots,w'_k\\\in W}}\prod_{i = 1}^{k}v_i(w_i)s(w_i)v_i(w'_i)s(w'_i) \ind{h(w_i) = j} \ind{h(w'_i) = j'}} - \ex{\sum_{\substack{w_1,\cdots, w_k\\\in W}}\prod_{i = 1}^{k}v_i(w_i)s(w_i) \ind{h(w_i) = j}}\cdot\ex{\sum_{\substack{w'_1,\cdots, w'_k\\\in W}}\prod_{i = 1}^{k}v_i(w'_i)s(w'_i) \ind{h(w'_i) = j'}}\\
&=\sum_{\substack{w_1,\cdots,w_k,\\w'_1,\cdots,w'_k\\\in W}}\ex{\prod_{i = 1}^{k}v_i(w_i)s(w_i)v_i(w'_i)s(w'_i)\ind{h(w_i) = j}\ind{h(w'_i) = j'}} - \ex{\prod_{i = 1}^{k} v_i(w_i)s(w_i) \ind{h(w_i) = j}} \cdot \ex{\prod_{i = 1}^{k}v_i(w'_i)s(w'_i)\ind{h(w'_i) = j'}}\\
&= \sum_{\substack{w_1,\cdots,w_k,\\w'_1,\cdots,w'_k\\\in W}}\prod_{i = 1}^{k}v_i(w_i)v_i(w'_i)\ex{\prod_{i = 1}^{k}s(w_i)s(w'_i)\ind{h(w_i) = j}\ind{h(w'_i) = j'}} - \prod_{i = 1}^{k}v_i(w_i)\ex{\prod_{i = 1}^{k}s(w_i)\ind{h(w_i) = j}}\cdot \prod_{i = 1}^{k}v_i(w'_i)\ex{\prod_{i = 1}^{k}s(w'_i)\ind{h(w_i') = j'}}\\
&= \sum_{\substack{w_1,\cdots,w_k,\\w'_1,\cdots,w'_k\\\in W}}\prod_{i = 1}^{k}v_i(w_i)v_i(w'_i)\left(\ex{\prod_{i = 1}^{k}s(w_i)s(w'_i)\ind{h(w_i) = j}\ind{h(w'_i) = j'}} - \ex{\prod_{i = 1}^{k}s(w_i)\ind{h(w_i) = j}}\cdot\ex{\prod_{i = 1}^{k}s(w'_i)\ind{h(w_i') = j'}} \right).
&=\ex{\prod_{i = 1}^{\prodsize}\sum_{\wElem \in W}v_i(\wElem)s(\wElem)\ind{h(\wElem) = j}\cdot \prod_{i = 1}^{\prodsize}\sum_{\wElem' \in W}v_i(\wElem')\conj{s(\wElem')}\ind{h(\wElem') = j'}} - \ex{\prod_{i = 1}^{\prodsize}\sum_{\wElem \in W}v_i(\wElem)s(\wElem)\ind{h(\wElem) = j}}\cdot \ex{\prod_{i = 1}^{\prodsize}\sum_{\wElem' \in W}v_i(\wElem')\conj{s(\wElem')}\ind{h(\wElem') = j'}}\\
&=\ex{\sum_{\substack{\wElem_1,\cdots,\wElem_\prodsize,\\\wElem'_1,\cdots,\wElem'_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem_i)s(\wElem_i)v_i(\wElem'_i)s(\wElem'_i) \ind{h(\wElem_i) = j} \ind{h(\wElem'_i) = j'}} - \ex{\sum_{\substack{\wElem_1,\cdots, \wElem_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem_i)s(\wElem_i) \ind{h(\wElem_i) = j}}\cdot\ex{\sum_{\substack{\wElem'_1,\cdots, \wElem'_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem'_i)s(\wElem'_i) \ind{h(\wElem'_i) = j'}}\\
&=\sum_{\substack{\wElem_1,\cdots,\wElem_\prodsize,\\\wElem'_1,\cdots,\wElem'_\prodsize\\\in W}}\ex{\prod_{i = 1}^{\prodsize}v_i(\wElem_i)s(\wElem_i)v_i(\wElem'_i)s(\wElem'_i)\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j'}} - \ex{\prod_{i = 1}^{\prodsize} v_i(\wElem_i)s(\wElem_i) \ind{h(\wElem_i) = j}} \cdot \ex{\prod_{i = 1}^{\prodsize}v_i(\wElem'_i)s(\wElem'_i)\ind{h(\wElem'_i) = j'}}\\
&= \sum_{\substack{\wElem_1,\cdots,\wElem_\prodsize,\\\wElem'_1,\cdots,\wElem'_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem_i)v_i(\wElem'_i)\ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)s(\wElem'_i)\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j'}} - \prod_{i = 1}^{\prodsize}v_i(\wElem_i)\ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)\ind{h(\wElem_i) = j}}\cdot \prod_{i = 1}^{\prodsize}v_i(\wElem'_i)\ex{\prod_{i = 1}^{\prodsize}s(\wElem'_i)\ind{h(\wElem_i') = j'}}\\
&= \sum_{\substack{\wElem_1,\cdots,\wElem_\prodsize,\\\wElem'_1,\cdots,\wElem'_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem_i)v_i(\wElem'_i)\left(\ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)s(\wElem'_i)\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j'}} - \ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)\ind{h(\wElem_i) = j}}\cdot\ex{\prod_{i = 1}^{\prodsize}s(\wElem'_i)\ind{h(\wElem_i') = j'}} \right).
\end{align*}
For $T_1 = \ex{\prod_{i = 1}^{k}s(w_i)s(w'_i)\ind{h(w_i) = j}\ind{h(w'_i) = j'}}$, because hash function $h$ cannot bucket the same world to two different buckets, the only surviving terms occur when there is no overlap between the $w_i$ and $w'_i$ variables. Given the condition of no overlap, the only terms that survive are when $\forall i \in [k], w_i = w, w'_i = w', w \neq w'$. Notice, however, that in such a case, the product of the remaining expectations will cancel this out. Looking at the remaining two expectations, each can only survive when $\forall i \in [k], w_i = w, w'_i = w'$. Such constraints leave us with only one surviving case, when all variables are the same world. Thus,
For $T_1 = \ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)s(\wElem'_i)\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j'}}$, because hash function $h$ cannot bucket the same world to two different buckets, the only surviving terms occur when there is no overlap between the $\wElem_i$ and $\wElem'_i$ variables. Given the condition of no overlap, the only terms that survive are when $\forall i \in [\prodsize], \wElem_i = \wElem, \wElem'_i = \wElem', \wElem \neq \wElem'$. Notice, however, that in such a case, the product of the remaining expectations will cancel this out. Looking at the remaining two expectations, each can only survive when $\forall i \in [\prodsize], \wElem_i = \wElem, \wElem'_i = \wElem'$. Such constraints leave us with only one surviving case, when all variables are the same world. Thus,
\begin{align}
&\sum_{j \neq j'}\cvar{j, j'} = - \frac{1}{B^2}\sum_{w \in W}\prod_{i = 1}^{k}v_i^2(w)\label{eq:cvar-bound}.
&\sum_{j \neq j'}\cvar{j, j'} = - \frac{1}{B^2}\sum_{\wElem \in W}\prod_{i = 1}^{\prodsize}v_i^2(\wElem)\label{eq:cvar-bound}.
\end{align}
\subsection{Bounding $\sigsq_j$}
We now seek to bound the remaining term in ~\eqref{eq:sigsq-jneqj}. We take a look at the variance of a single bucket estimate.
\begin{align*}
&\sigsq_j = \ex{est_j \cdot \overline{est_j}} - \ex{est_j} \cdot \ex{\overline{est_j}} \\
&= \ex{\prod_{i = 1}^{k}\sum_{w \in W_j}v_i(w)s(w) \cdot \prod_{i = 1}^k\sum_{w' \in W_j}v_i(w')\overline{s(w')}} -
\ex{\prod_{i = 1}^{k}\sum_{w \in W_j}v_i(w)s(w)}\cdot \ex{\prod_{i = 1}^k\sum_{w' \in W_j}v_i(w')\overline{s(w')}}\\
&= \ex{\sum_{\substack{w_1...w_k\\w'_1...w'_k\\ \in W}}\prod_{i = 1}^k v_i(w_i)v(w'_i)s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\sum_{w_1...w_k \in W} \prod_{i = 1}^k v_i(w_i)s(w_i)\ind{h(w_i) = j}} \cdot
\ex{\sum_{w'_1...w'_k \in W} \prod_{i = 1}^k v_i(w'_i)\overline{s(w'_i)}\ind{h(w'_i) = j}}\\
=&\sum_{\substack{w_1...w_k\\w'_1...w'_k\\ \in W}}\ex{\prod_{i = 1}^k v_i(w_i)v_i(w'_i)s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\prod_{i = 1}^kv_i(w_i)s(w_i)\ind{h(w_i) = j}} \cdot \ex{\prod_{i = 1}^k v_i(w'_i)\overline{s(w'_i)}\ind{h(w'_i) = j}}\\
&= \sum_{\substack{w_1...w_k\\w'_1...w'_k\\ \in W}}\prod_{i = 1}^k v_i(w_i)v_i(w'_i)\cdot\left( \ex{\prod_{i = 1}^k s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\prod_{i = 1}^ks(w_i)\ind{h(w_i) = j}}\cdot \ex{\prod_{i = 1}^k\overline{s(w'_i)}\ind{h(w'_i) = j}} \right).
&\sigsq_j = \ex{\est_j \cdot \overline{\est_j}} - \ex{\est_j} \cdot \ex{\overline{\est_j}} \\
&= \ex{\prod_{i = 1}^{\prodsize}\sum_{\wElem \in W_j}v_i(\wElem)s(\wElem) \cdot \prod_{i = 1}^\prodsize\sum_{\wElem' \in W_j}v_i(\wElem')\overline{s(\wElem')}} -
\ex{\prod_{i = 1}^{\prodsize}\sum_{\wElem \in W_j}v_i(\wElem)s(\wElem)}\cdot \ex{\prod_{i = 1}^\prodsize\sum_{\wElem' \in W_j}v_i(\wElem')\overline{s(\wElem')}}\\
&= \ex{\sum_{\substack{\wElem_1...\wElem_\prodsize\\\wElem'_1...\wElem'_\prodsize\\ \in W}}\prod_{i = 1}^\prodsize v_i(\wElem_i)v(\wElem'_i)s(\wElem_i)\overline{s(\wElem'_i)}\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j}} -
\ex{\sum_{\wElem_1...\wElem_\prodsize \in W} \prod_{i = 1}^\prodsize v_i(\wElem_i)s(\wElem_i)\ind{h(\wElem_i) = j}} \cdot
\ex{\sum_{\wElem'_1...\wElem'_\prodsize \in W} \prod_{i = 1}^\prodsize v_i(\wElem'_i)\overline{s(\wElem'_i)}\ind{h(\wElem'_i) = j}}\\
=&\sum_{\substack{\wElem_1...\wElem_\prodsize\\\wElem'_1...\wElem'_\prodsize\\ \in W}}\ex{\prod_{i = 1}^\prodsize v_i(\wElem_i)v_i(\wElem'_i)s(\wElem_i)\overline{s(\wElem'_i)}\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j}} -
\ex{\prod_{i = 1}^kv_i(\wElem_i)s(\wElem_i)\ind{h(\wElem_i) = j}} \cdot \ex{\prod_{i = 1}^\prodsize v_i(\wElem'_i)\overline{s(\wElem'_i)}\ind{h(\wElem'_i) = j}}\\
&= \sum_{\substack{\wElem_1...\wElem_\prodsize\\\wElem'_1...\wElem'_\prodsize\\ \in W}}\prod_{i = 1}^\prodsize v_i(\wElem_i)v_i(\wElem'_i)\cdot\left( \ex{\prod_{i = 1}^\prodsize s(\wElem_i)\overline{s(\wElem'_i)}\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j}} -
\ex{\prod_{i = 1}^ks(\wElem_i)\ind{h(\wElem_i) = j}}\cdot \ex{\prod_{i = 1}^\prodsize\overline{s(\wElem'_i)}\ind{h(\wElem'_i) = j}} \right).
\end{align*}
\subsection{Non-generic k}
\subsubsection{k = 2}
Taking $k = 2$ and looking at $T_1 = \ex{\prod\limits_{i = 1}^k s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}}$, it can be seen that only specific combinations of $w$ can survive. First, when $\forall i \in [k], w_i = w, w'_i = w'$, then we end up with $s(w)^k = 1$ and $s(w')^k = 1$. This translates into:
\subsection{Non-generic $\prodsize$}
\subsubsection{$\prodsize = 2$}
Taking $\prodsize = 2$ and looking at $T_1 = \ex{\prod\limits_{i = 1}^\prodsize s(\wElem_i)\overline{s(\wElem'_i)}\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j}}$, it can be seen that only specific combinations of $\wElem$ can survive. First, when $\forall i \in [\prodsize], \wElem_i = \wElem, \wElem'_i = \wElem'$, then we end up with $s(\wElem)^\prodsize = 1$ and $s(\wElem')^\prodsize = 1$. This translates into:
\begin{align*}
\frac{1}{B}\sum_{w \in W}\prod_{i = 1}^{2}v_i^2(w) +\frac{1}{B^2}\sum_{w\neq w' \in W}\prod_{i = 1}^{2}v_i(w)v_i(w'_i).
\frac{1}{B}\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem) +\frac{1}{B^2}\sum_{\wElem\neq \wElem' \in W}\prod_{i = 1}^{2}v_i(\wElem)v_i(\wElem'_i).
\end{align*}
Taking into account that for $\omega \in \mathbb{C}, \omega \cdot \conj{\omega} = 1$, terms in $T_1$ also survive the expectation when all $w_i$ have a matching counterpart in $w'_i$, yielding
Taking into account that for $\omega \in \mathbb{C}, \omega \cdot \conj{\omega} = 1$, terms in $T_1$ also survive the expectation when all $\wElem_i$ have a matching counterpart in $\wElem'_i$, yielding
\begin{align*}
\frac{1}{B^2}\sum_{\substack{w_1 \neq w_2\\ \in W}}\prod_{i = 1}^{2}v_i^2(w_i) + \frac{1}{B^2}\sum_{w \neq w' \in W}\prod_{i = 1}^{2}v_i(w)v_i(w').
\frac{1}{B^2}\sum_{\substack{\wElem_1 \neq \wElem_2\\ \in W}}\prod_{i = 1}^{2}v_i^2(\wElem_i) + \frac{1}{B^2}\sum_{\wElem \neq \wElem' \in W}\prod_{i = 1}^{2}v_i(\wElem)v_i(\wElem').
\end{align*}
Putting all cases together we have that
\begin{align*}
T_1 = \frac{1}{B}\sum_{w \in W}\prod_{i = 1}^{2}v_i^2(w) + \frac{1}{B^2}\left(2\sum_{w\neq w' \in W}\prod_{i = 1}^{2}v_i(w)v_i(w') + \sum_{\substack{w_1 \neq w_2\\ \in W}}\prod_{i = 1}^{2}v_i^2(w_i)\right).
T_1 = \frac{1}{B}\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem) + \frac{1}{B^2}\left(2\sum_{\wElem\neq \wElem' \in W}\prod_{i = 1}^{2}v_i(\wElem)v_i(\wElem') + \sum_{\substack{\wElem_1 \neq \wElem_2\\ \in W}}\prod_{i = 1}^{2}v_i^2(\wElem_i)\right).
\end{align*}
For $T_2 = \ex{\prod_{i = 1}^ks(w_i)\ind{h(w_i) = j}}$ and $T_3 = \ex{\prod_{i = 1}^k\overline{s(w'_i)}\ind{h(w'_i) = j}}$, we get
For $T_2 = \ex{\prod_{i = 1}^ks(\wElem_i)\ind{h(\wElem_i) = j}}$ and $T_3 = \ex{\prod_{i = 1}^\prodsize\overline{s(\wElem'_i)}\ind{h(\wElem'_i) = j}}$, we get
\begin{align*}
&T_2 = \frac{1}{B}\sum_{w \in W}\prod_{i = 1}^{2}v_i(w),\\
&T_3 = \frac{1}{B}\sum_{w' \in W}\prod_{i = 1}^{2}v_i(w'),\\
&T_2 \cdot T_3 = \frac{1}{B^2}\sum_{w, w' \in W}\prod_{i = 1}^{2}v_i(w)v_i(w') = \frac{1}{B^2}\left(\sum_{w \in W}\prod_{i = 1}^{2}v_i^2(w) + \sum_{w \neq w' \in W}\prod_{i = 1}^{2}v_i(w)v_i(w')\right).
&T_2 = \frac{1}{B}\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i(\wElem),\\
&T_3 = \frac{1}{B}\sum_{\wElem' \in W}\prod_{i = 1}^{2}v_i(\wElem'),\\
&T_2 \cdot T_3 = \frac{1}{B^2}\sum_{\wElem, \wElem' \in W}\prod_{i = 1}^{2}v_i(\wElem)v_i(\wElem') = \frac{1}{B^2}\left(\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem) + \sum_{\wElem \neq \wElem' \in W}\prod_{i = 1}^{2}v_i(\wElem)v_i(\wElem')\right).
\end{align*}
Combining all $T_i$,
\begin{align*}
\sigsq_j = T_1 - T_2 \cdot T_3 = \frac{B - 1}{B^2}\sum_{w \in W}\prod_{i = 1}^{2}v_i^2(w) + \frac{1}{B^2}\left(\sum_{\substack{w_1\neq w_2 \\ \in W}}\prod_{i = 1}^{2}v_i(w_1)v_i(w_2) + v_i^2(w_i)\right).%+ \sum_{w}\prod_{i = 1}^{2}v_i(w)^2\right)
\sigsq_j = T_1 - T_2 \cdot T_3 = \frac{B - 1}{B^2}\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem) + \frac{1}{B^2}\left(\sum_{\substack{\wElem_1\neq \wElem_2 \\ \in W}}\prod_{i = 1}^{2}v_i(\wElem_1)v_i(\wElem_2) + v_i^2(\wElem_i)\right).%+ \sum_{\wElem}\prod_{i = 1}^{2}v_i(\wElem)^2\right)
\end{align*}
Recall ~\eqref{eq:cvar-bound}, that $\sum\limits_{j \neq j'}\cvar{j, j'} = -\frac{1}{B^2}\sum\limits_{w \in W}\prod_{i = 1}^{2}v_i^2(w)$. Thus, for $k = 2$ we can compute ~\eqref{eq:sigsq-jneqj}
Recall ~\eqref{eq:cvar-bound}, that $\sum\limits_{j \neq j'}\cvar{j, j'} = -\frac{1}{B^2}\sum\limits_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem)$. Thus, for $\prodsize = 2$ we can compute ~\eqref{eq:sigsq-jneqj}
\begin{align*}
&\sigsq = \sum_{j \in B}\sigsq_j + \sum_{j \neq j'}\cvar{j, j'}\\
&=B \cdot \left(\frac{B - 1}{B^2}\sum_{w \in W}\prod_{i = 1}^{2}v_i^2(w) + \frac{1}{B^2}\left(\sum_{\substack{w_1\neq w_2 \\ \in W}}\prod_{i = 1}^{2}v_i(w_1)v_i(w_2) + v_i^2(w_i)\right)
\right) - \frac{B\left(B - 1\right)}{B^2}\sum_{w \in W}\prod_{i = 1}^{2}v_i^2(w)\\
&= \frac{1}{B}\left(\sum_{\substack{w_1\neq w_2 \\ \in W}}\prod_{i = 1}^{2}v_i(w_1)v_i(w_2) + v_i^2(w_i)\right)\\
&= \frac{1}{B}\left(\left(\sum_{w \in W}v_1(w)\right)^2\left(\sum_{w \in W}v_2(w)\right)^2 + \left(\sum_{w \in W}v_1(w)v_2(w)\right)^2\right)\\
&= \frac{1}{B}\left(\norm{v_1}_2^2\norm{v_2}_2^2 + \left(\sum_{w \in W}v_1(w)v_2(w)\right)^2\right)\\
&=B \cdot \left(\frac{B - 1}{B^2}\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem) + \frac{1}{B^2}\left(\sum_{\substack{\wElem_1\neq \wElem_2 \\ \in W}}\prod_{i = 1}^{2}v_i(\wElem_1)v_i(\wElem_2) + v_i^2(\wElem_i)\right)
\right) - \frac{B\left(B - 1\right)}{B^2}\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem)\\
&= \frac{1}{B}\left(\sum_{\substack{\wElem_1\neq \wElem_2 \\ \in W}}\prod_{i = 1}^{2}v_i(\wElem_1)v_i(\wElem_2) + v_i^2(\wElem_i)\right)\\
&= \frac{1}{B}\left(\left(\sum_{\wElem \in W}v_1(\wElem)\right)^2\left(\sum_{\wElem \in W}v_2(\wElem)\right)^2 + \left(\sum_{\wElem \in W}v_1(\wElem)v_2(\wElem)\right)^2\right)\\
&= \frac{1}{B}\left(\norm{v_1}_2^2\norm{v_2}_2^2 + \left(\sum_{\wElem \in W}v_1(\wElem)v_2(\wElem)\right)^2\right)\\
&\leq \frac{1}{B}\left(\norm{v_1}_2^2\norm{v_2}_2^2 + \norm{v_1}_2^2\norm{v_2}_2^2\right)\\
&\leq \frac{2}{B}\left(\norm{v_1}_2^2\norm{v_2}_2^2\right).
\end{align*}
\subsubsection{k = 3}
\subsubsection{$\prodsize = 3$}
\begin{align*}
&= \sum_{\substack{w_1...w_3\\w'_1...w'_3\\ \in W}}\prod_{i = 1}^3 v_i(w_i)v_i(w'_i)\cdot\left( \ex{\prod_{i = 1}^3 s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\prod_{i = 1}^3s(w_i)\ind{h(w_i) = j}}\cdot \ex{\prod_{i = 1}^3\overline{s(w'_i)}\ind{h(w'_i) = j}} \right)
&= \sum_{\substack{\wElem_1...\wElem_3\\\wElem'_1...\wElem'_3\\ \in W}}\prod_{i = 1}^3 v_i(\wElem_i)v_i(\wElem'_i)\cdot\left( \ex{\prod_{i = 1}^3 s(\wElem_i)\overline{s(\wElem'_i)}\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j}} -
\ex{\prod_{i = 1}^3s(\wElem_i)\ind{h(\wElem_i) = j}}\cdot \ex{\prod_{i = 1}^3\overline{s(\wElem'_i)}\ind{h(\wElem'_i) = j}} \right)
\end{align*}
In the above expression, we seek to know which combinations of $w_i$ and $w'_i$ variables will survive the expectation calculations. We can divide the possibilities up into several different cases.
In the above expression, we seek to know which combinations of $\wElem_i$ and $\wElem'_i$ variables will survive the expectation calculations. We can divide the possibilities up into several different cases.
First, for roots of unity, we have that $\omega^k = 1$ if $\omega$ is a kth root of unity. This gives our first case.
First, for roots of unity, we have that $\omega^\prodsize = 1$ if $\omega$ is a kth root of unity. This gives our first case.
\underline{Case 1:}
\begin{align*}
&w_1 = w_2 = w_3 = w\\
&w'_1 = w'_2 = w'_3 = w'\\
&1.1)~ w = w'\qquad1.2)~ w \neq w'
&\wElem_1 = \wElem_2 = \wElem_3 = \wElem\\
&\wElem'_1 = \wElem'_2 = \wElem'_3 = \wElem'\\
&1.1)~ \wElem = \wElem'\qquad1.2)~ \wElem \neq \wElem'
\end{align*}
The remaining cases take into account the property for roots of unity that $\omega \cdot \conj{\omega} = 1$. Note that we omit the case of all variables being equal because that has already been covered above.
\underline{Case 2:}
\begin{align*}
&w_1 = w'_1 = w\\
&w_2 = w'_2 = w'\\
&w_3 = w'_3 = w''\\
&2.1)~ w = w' \neq w''\qquad2.2)~ w \neq w'= w''\qquad2.3)w = w'' \neq w'\qquad2.4) w \neq w' \neq w''
&\wElem_1 = \wElem'_1 = \wElem\\
&\wElem_2 = \wElem'_2 = \wElem'\\
&\wElem_3 = \wElem'_3 = \wElem''\\
&2.1)~ \wElem = \wElem' \neq \wElem''\qquad2.2)~ \wElem \neq \wElem'= \wElem''\qquad2.3)\wElem = \wElem'' \neq \wElem'\qquad2.4) \wElem \neq \wElem' \neq \wElem''
\end{align*}
\underline{Case 3:}
\begin{align*}
&w_1 = w'_2 = w\\
&w_2 = w'_3 = w'\\
&w_3 = w'_1 = w''\\
&3.1)~ w = w' \neq w''\qquad3.2)~ w \neq w'= w''\qquad3.3)w = w'' \neq w'\qquad3.4) w \neq w' \neq w''
&\wElem_1 = \wElem'_2 = \wElem\\
&\wElem_2 = \wElem'_3 = \wElem'\\
&\wElem_3 = \wElem'_1 = \wElem''\\
&3.1)~ \wElem = \wElem' \neq \wElem''\qquad3.2)~ \wElem \neq \wElem'= \wElem''\qquad3.3)\wElem = \wElem'' \neq \wElem'\qquad3.4) \wElem \neq \wElem' \neq \wElem''
\end{align*}
\underline{Case 4:}
\begin{align*}
&w_1 = w'_3 = w\\
&w_2 = w'_1 = w'\\
&w_3 = w'_2 = w''\\
&4.1)~ w = w' \neq w''\qquad4.2)~ w \neq w'= w''\qquad4.3)w = w'' \neq w'\qquad4.4) w \neq w' \neq w''
&\wElem_1 = \wElem'_3 = \wElem\\
&\wElem_2 = \wElem'_1 = \wElem'\\
&\wElem_3 = \wElem'_2 = \wElem''\\
&4.1)~ \wElem = \wElem' \neq \wElem''\qquad4.2)~ \wElem \neq \wElem'= \wElem''\qquad4.3)\wElem = \wElem'' \neq \wElem'\qquad4.4) \wElem \neq \wElem' \neq \wElem''
\end{align*}
\underline{Case 5:}
\begin{align*}
&w_1 = w'_2 = w\\
&w_2 = w'_1 = w'\\
&w_3 = w'_3 = w''\\
&5.1)~ w = w' \neq w''\qquad5.2)~ w \neq w'= w''\qquad5.3)w = w'' \neq w'\qquad5.4) w \neq w' \neq w''
&\wElem_1 = \wElem'_2 = \wElem\\
&\wElem_2 = \wElem'_1 = \wElem'\\
&\wElem_3 = \wElem'_3 = \wElem''\\
&5.1)~ \wElem = \wElem' \neq \wElem''\qquad5.2)~ \wElem \neq \wElem'= \wElem''\qquad5.3)\wElem = \wElem'' \neq \wElem'\qquad5.4) \wElem \neq \wElem' \neq \wElem''
\end{align*}
\underline{Case 6:}
\begin{align*}
&w_1 = w'_1 = w\\
&w_2 = w'_3 = w'\\
&w_3 = w'_2 = w''\\
&6.1)~ w = w' \neq w''\qquad6.2)~ w \neq w'= w''\qquad6.3)w = w'' \neq w'\qquad6.4) w \neq w' \neq w''
&\wElem_1 = \wElem'_1 = \wElem\\
&\wElem_2 = \wElem'_3 = \wElem'\\
&\wElem_3 = \wElem'_2 = \wElem''\\
&6.1)~ \wElem = \wElem' \neq \wElem''\qquad6.2)~ \wElem \neq \wElem'= \wElem''\qquad6.3)\wElem = \wElem'' \neq \wElem'\qquad6.4) \wElem \neq \wElem' \neq \wElem''
\end{align*}
\underline{Case 7:}
\begin{align*}
&w_1 = w'_3 = w\\
&w_2 = w'_2 = w'\\
&w_3 = w'_1 = w''\\
&7.1)~ w = w' \neq w''\qquad7.2)~ w \neq w'= w''\qquad7.3)w = w'' \neq w'\qquad7.4) w \neq w' \neq w''
&\wElem_1 = \wElem'_3 = \wElem\\
&\wElem_2 = \wElem'_2 = \wElem'\\
&\wElem_3 = \wElem'_1 = \wElem''\\
&7.1)~ \wElem = \wElem' \neq \wElem''\qquad7.2)~ \wElem \neq \wElem'= \wElem''\qquad7.3)\wElem = \wElem'' \neq \wElem'\qquad7.4) \wElem \neq \wElem' \neq \wElem''
\end{align*}
The surviving terms are:
\begin{align*}
&\text{Case 1:}\\
&\frac{B - 1}{B^2}\left(\sum_w v_1^2(w) v_2^2(w) v_3^2(w)\right) + \\
&\frac{B - 1}{B^2}\left(\sum_\wElem v_1^2(\wElem) v_2^2(\wElem) v_3^2(\wElem)\right) + \\
&Case 2:\\
&\frac{1}{B^2}\left(\sum_{w \neq w'}v_1^2(w)\left(v_2^2(w)v_3^2(w') + v_2^2(w')v_3^2(w') + v_2^2(w')v_3^2(w)\right)\right) + \frac{1}{B^3}\sum_{w \neq w' \neq w''}v_1^2(w)v_2^2(w')v_3^2(w'') +\\
&\frac{1}{B^2}\left(\sum_{\wElem \neq \wElem'}v_1^2(\wElem)\left(v_2^2(\wElem)v_3^2(\wElem') + v_2^2(\wElem')v_3^2(\wElem') + v_2^2(\wElem')v_3^2(\wElem)\right)\right) + \frac{1}{B^3}\sum_{\wElem \neq \wElem' \neq \wElem''}v_1^2(\wElem)v_2^2(\wElem')v_3^2(\wElem'') +\\
&\text{Case 3 and 4:}\\
&\frac{2}{B^2}\left(\sum_{w \neq w'}v_1(w)v_1(w')v_2^2(w)v_3(w)v_3(w') + v_1(w)v_1(w')v_2(w)v_2(w')v_3^2(w') + v_1^2(w)v_2(w)v_2(w')v_3(w)v_3(w')\right) + \\
&\qquad\qquad \frac{1}{B^3}\left(\sum_{w \neq w' \neq w''}v_1(w)v_1(w'')v_2(w')v_2(w)v_3(w'')v_3(w') + v_1(w)v_1(w')v_2(w')v_2(w'')v_2(w)v_3(w'')\right)\\
&\frac{2}{B^2}\left(\sum_{\wElem \neq \wElem'}v_1(\wElem)v_1(\wElem')v_2^2(\wElem)v_3(\wElem)v_3(\wElem') + v_1(\wElem)v_1(\wElem')v_2(\wElem)v_2(\wElem')v_3^2(\wElem') + v_1^2(\wElem)v_2(\wElem)v_2(\wElem')v_3(\wElem)v_3(\wElem')\right) + \\
&\qquad\qquad \frac{1}{B^3}\left(\sum_{\wElem \neq \wElem' \neq \wElem''}v_1(\wElem)v_1(\wElem'')v_2(\wElem')v_2(\wElem)v_3(\wElem'')v_3(\wElem') + v_1(\wElem)v_1(\wElem')v_2(\wElem')v_2(\wElem'')v_2(\wElem)v_3(\wElem'')\right)\\
&\text{Case 5, 6, 7:}\\
&\frac{1}{B^3}\left(\sum_{w \neq w' \neq w''}v_1(w)v_1(w')v_2(w)v_2(w')v_3^2(w'') + v_1^2(w)v_2(w')v_2(w'')v_3(w')v_3(w'') + v_1(w)v_1(w'')v_2^2(w')v_3(w)v_3(w'')\right)
&\frac{1}{B^3}\left(\sum_{\wElem \neq \wElem' \neq \wElem''}v_1(\wElem)v_1(\wElem')v_2(\wElem)v_2(\wElem')v_3^2(\wElem'') + v_1^2(\wElem)v_2(\wElem')v_2(\wElem'')v_3(\wElem')v_3(\wElem'') + v_1(\wElem)v_1(\wElem'')v_2^2(\wElem')v_3(\wElem)v_3(\wElem'')\right)
\end{align*}