From 0e6af4bca963986090c2df12eb89ac842d7392b2 Mon Sep 17 00:00:00 2001 From: Aaron Huber Date: Tue, 17 Mar 2020 13:55:44 -0400 Subject: [PATCH] Var of POS query single join using Indicator Vars --- macros.tex | 3 +- pos.tex | 115 ++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 94 insertions(+), 24 deletions(-) diff --git a/macros.tex b/macros.tex index 5c8aea1..4dfabec 100644 --- a/macros.tex +++ b/macros.tex @@ -5,13 +5,14 @@ % %SKETCH % +\newcommand{\hfunc}{h} \newcommand{\wElem}{w} \newcommand{\wSet}{W} \newcommand{\sine}{s} \newcommand{\est}{est} \newcommand{\conj}[1]{\overline{#1}} \newcommand{\cvar}[1]{\lambda\left({#1}\right)} -\newcommand{\ind}[1]{\mathbbold{1}_{#1}} +\newcommand{\ind}[1]{\underset{#1}{\mathbbold{1}}}%_{#1}} \newcommand{\sk}{\mathcal{S}} \newcommand{\sketch}{\mathcal{S}_t} \newcommand{\sketchIj}{\sketch[i][j]} diff --git a/pos.tex b/pos.tex index df13131..b4bae62 100644 --- a/pos.tex +++ b/pos.tex @@ -22,14 +22,14 @@ For $i \in [1, k - 1]$, we can show by geometric sum series that \end{equation} \qed -We target the specific query where it is optimal to push down projections below join operators. Such a query is a product of sums ($\pos$). To show that our scheme works in this setting, we first compute the expectation of a $\pos$~ query over sketch annotations, i.e. $\pos$ = $\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i \in \kvec'}\sk^{\vect_i}\left[\buck\right]\right) \left(\sum_{i' \in \kvec''}\sk^{\vect_{i'}}\left[\buck'\right]\right)$, for the set of matching projected tuples from each input, denoted $k', k''$. Note that we denote the $i^{th}$ vector as $\vect_i$ and the sketch of the $i^{th}$ vector $\sk^{\vect_i}$. +We target the specific query where it is optimal to push down projections below join operators. Such a query is a product of sums ($\pos$). To show that our scheme works in this setting, we first compute the expectation of a $\pos$~ query over sketch annotations, i.e. $\pos$ = $\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i \in \kvec'}\sk^{\vect_i}\left[\buck\right]\right) \left(\sum_{i' \in \kvec''}\sk^{\vect_{i'}}\left[\buck\right]\right)$, for the set of matching projected tuples from each input, denoted $k', k''$. Note that we denote the $i^{th}$ vector as $\vect_i$ and the sketch of the $i^{th}$ vector $\sk^{\vect_i}$. \begin{align} &\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i \in \kvec'}\sk^{\vect_i}\left[\buck\right]\right) \left(\sum_{i' \in \kvec''}\sk^{\vect_{i'}}\left[\buck\right]\right)}\nonumber\\ -=&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i \in \kvec'}\sum_{\wElem \in \wSet}\vect_i(\wElem)\ind{\hash(\wElem) = \buck}\sine(\wElem)\right) \left(\sum_{i' \in \kvec''}\sum_{\wElem' \in \wSet}\vect_{i'}(\wElem')\ind{\hash(\wElem) = \buck}\sine(\wElem')\right)}\label{eq:exp-pos1}\\ -=&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem \in \wSet}\ind{\hash(\wElem) = \buck}\left(\sum_{i \in \kvec'}\vect_i(\wElem)\right)\sine(\wElem)\right) \left(\sum_{\wElem' \in \wSet}\ind{\hash(\wElem') = j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem')\right)\sine(\wElem')\right)}\label{eq:exp-pos2}\\ -=&\ex{\sum_{\buck = 1}^{\sketchCols} \left(\sum_{\wElem \in \wSet}\ind{\hash(\wElem) = \buck} \left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\sine(\wElem)^{2 = k}\right) + \left(\sum_{\substack{\wElem, \wElem' \in \wSet,\\\wElem \neq \wElem'}}\ind{\hash(\wElem) = j}\ind{\hash(\wElem') = j}\left(\left(\sum_{i \in k'}\vect_i(\wElem)\right)\sine(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem')\right)\sine(\wElem')\right)}\label{eq:exp-pos3}\\ -=& \sum_{\buck = 1}^{\sketchCols}\sum_{\wElem \in \wSet}\ind{\hash(\wElem) = \buck}\left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\label{eq:exp-pos4}\\ +=&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i \in \kvec'}\sum_{\wElem \in \wSet}\vect_i(\wElem)\ind{\hfunc(\wElem) = \buck}\sine(\wElem)\right) \left(\sum_{i' \in \kvec''}\sum_{\wElem' \in \wSet}\vect_{i'}(\wElem')\ind{\hfunc(\wElem) = \buck}\sine(\wElem')\right)}\label{eq:exp-pos1}\\ +=&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem \in \wSet}\ind{\hfunc(\wElem) = \buck}\left(\sum_{i \in \kvec'}\vect_i(\wElem)\right)\sine(\wElem)\right) \left(\sum_{\wElem' \in \wSet}\ind{\hfunc(\wElem') = j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem')\right)\sine(\wElem')\right)}\label{eq:exp-pos2}\\ +=&\ex{\sum_{\buck = 1}^{\sketchCols} \left(\sum_{\wElem \in \wSet}\ind{\hfunc(\wElem) = \buck} \left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\sine(\wElem)^{2 = k}\right) + \left(\sum_{\substack{\wElem, \wElem' \in \wSet,\\\wElem \neq \wElem'}}\ind{\hfunc(\wElem) = j}\ind{\hfunc(\wElem') = j}\left(\left(\sum_{i \in k'}\vect_i(\wElem)\right)\sine(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem')\right)\sine(\wElem')\right)}\label{eq:exp-pos3}\\ +=& \sum_{\buck = 1}^{\sketchCols}\sum_{\wElem \in \wSet}\ind{\hfunc(\wElem) = \buck}\left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\label{eq:exp-pos4}\\ =& \sum_{\wElem \in \wSet}\left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\label{eq:exp-pos5} \end{align} \qed\newline @@ -39,31 +39,100 @@ We now move to computing the variance of a $\pos$~ query. Note, that the use of To make this easier to present and digest, we start by turning our focus on the first term, $T_1 = \ex{\pos \cdot \conj{\pos}}$. \begin{align} -&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sine(\wElem_1)\right) \left(\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right)\sine(\wElem_2)\right) \cdot \sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right)\conj{\sine(\wElem_3)}\right) \left(\sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\conj{\sine(\wElem_4)}\right)}\label{eq:var-pos1}\\ -=&\ex{\sum_{\buck, \buck' \in \sketchCols}\left(\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sine(\wElem_1)\right) \left(\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right)\sine(\wElem_2)\right) \cdot \left(\sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right)\conj{\sine(\wElem_3)}\right) \left(\sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\conj{\sine(\wElem_4)}\right)}\label{eq:var-pos2}\\ -=&\sum_{\buck, \buck' \in \sketchCols}\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right) \sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right) \sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\ex{\sine(\wElem_1)\cdot \sine(\wElem_2)\cdot\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}\label{eq:var-pos3} +&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in k'}\sk^{\vect_{i_1}}[\buck]\right)\left(\sum_{i_1' \in k''}\sk^{\vect_{i_1'}}[\buck]\right) \cdot +\conj{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in k'}\sk^{\vect_{i_2}}[\buck]\right)\left(\sum_{i_2' \in k''}\sk^{\vect_{i_2'}}[\buck]\right)}}\\ +&=\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in k'}\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\vect_{i_1}(\wElem_1)\sine(\wElem_1)\sum_{i_1' \in k''}\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\vect_{i_1'}(\wElem_1')\sine(\wElem_1')\right) +\conj{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in k'}\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\vect_{i_2}(\wElem_2)\conj{\sine(\wElem_2)}\sum_{i_2' \in k''}\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\vect_{i_2'}(\wElem_2')\conj{\sine(\wElem_2')}\right)}}\label{eq:var-pos1}\\ +&=\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in k'}\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\vect_{i_1}(\wElem_1)\sine(\wElem_1)\sum_{i_1' \in k''}\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\vect_{i_1'}(\wElem_1')\sine(\wElem_1')\right) +\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in k'}\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\vect_{i_2}(\wElem_2)\conj{\sine(\wElem_2)}\sum_{i_2' \in k''}\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\vect_{i_2'}(\wElem_2')\conj{\sine(\wElem_2')}\right)}\label{eq:var-pos2}\\ +% +&=\mathbb{E}\left[\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\left(\sum_{i_1 \in k'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\right)\left(\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\left(\sum_{i_1' \in k''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')\right)\right.\nonumber\\ +&\left.\qquad\qquad\qquad\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\left(\sum_{i_2 \in k'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\right)\left(\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_2' \in k''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}\right)\right]\label{eq:var-pos3}\\ +% +&=\ex{\sum_{\buck = 1}^{\sketchCols}\sum_{\wElem_1, \wElem_1' \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\left(\sum_{i_1 \in k'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\left(\sum_{i_1' \in k''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')\cdot +\sum_{\buck' = 1}^{\sketchCols}\sum_{\wElem_2, \wElem_2' \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_2 \in k'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\left(\sum_{i_2' \in k''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}}\label{eq:var-pos4}\\ +% +&=\ex{\sum_{\buck, \buck' \in [\sketchCols]}\sum_{\substack{\wElem_1, \wElem_1',\\ \wElem_2, \wElem_2'\\ \in \wSet}}\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_1 \in k'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\left(\sum_{i_1' \in k''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')\left(\sum_{i_2 \in k'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\left(\sum_{i_2' \in k''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}}\label{eq:var-pos5}\\ +% +&=\sum_{\buck, \buck' \in [\sketchCols]}\sum_{\substack{\wElem_1, \wElem_1',\\ \wElem_2, \wElem_2'\\ \in \wSet}}\sum_{\substack{i_1, i_2 \in k',\\i_1', i_2' \in k''}}\vect_{i_1}(\wElem_1)\vect_{i_1'}(\wElem_1')\vect_{i_2}(\wElem_2)\vect_{i_2'}(\wElem_2')\ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\sine(\wElem_1)\sine(\wElem_1')\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\label{eq:var-pos6} +%--Below is part of the derivation without using the indicator variables. Only saving for short term... +%&=\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sine(\wElem_1)\right) \left(\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right)\conj{\sine(\wElem_2)}\right) \cdot \sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right)\conj{\sine(\wElem_3)}\right) \left(\sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\conj{\sine(\wElem_4)}\right)}\label{eq:var-pos1}\\ +%=&\ex{\sum_{\buck, \buck' \in \sketchCols}\left(\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sine(\wElem_1)\right) \left(\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right)\conj{\sine(\wElem_2)}\right) \cdot \left(\sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right)\conj{\sine(\wElem_3)}\right) \left(\sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\conj{\sine(\wElem_4)}\right)}\label{eq:var-pos2}\\ +%=&\sum_{\buck, \buck' \in \sketchCols}\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right) \sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right) \sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\ex{\sine(\wElem_1)\cdot \conj{\sine(\wElem_2)}\cdot\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}\label{eq:var-pos3} +\end{align} +Equation \eqref{eq:var-pos1} follows from expanding the definition of a sketch $\sk$. +Equation \eqref{eq:var-pos2} uses the fact that the sum (product) of conjugates is equal to the conjugate of the sum (product). +Equation \eqref{eq:var-pos3} results from rewriting the summations using the law of associativity, and then applying the law of distributivity of addition over multiplication to the rewrite. +Equations \eqref{eq:var-pos4}, \eqref{eq:var-pos5} again rewrite the summation(s) using the law of distributivity of addition over multiplication. +Equation \eqref{eq:var-pos6} is the result of factoring out non-random terms from the expectation.\newline + +When considering the terms that survive the expecation in \eqref{eq:var-pos6}, recall that it is a known fact when working with $k^{th}$ roots of unity ($R^k$) in the complex numbers that a complex number times its conjugate has a product of one, formally: +\begin{equation*} +\forall c \in \mathbb{C} \text{ s.t. } c \in R^k, c \cdot \conj{c}= 1. +\end{equation*} +Combining this result with Lemma \eqref{lem:exp-sine} one can see that only two possible cases of terms survive the expectation in \eqref{eq:var-pos6}. + +First by Lemma \eqref{lem:exp-sine}, +%labels not compiling +\begin{align} +&\emph{case 1}\nonumber\\ +&\qquad\text{a: }w_1 = w_1' =w_2 = w_2'\label{this-1}\\%\label{var:pos-case-1a} +&\qquad\text{b: }w_1 = w_1' \neq w_2 = w_2'\label{this-2}%\label{var:pos-case-1b} +\end{align} +Second, by the law of conjugates, +\begin{align} +&\emph{case 2}\nonumber\\ +&\qquad\text{a: }w_1 = w_2 \neq w_1' = w_2'\label{joe-a}\\%\label{var:pos-Case-2a} +&\qquad\text{b: }w_1 = w_2' \neq w_1' = w_2\label{joe-b}%\label{var:pos-Case-2b} \end{align} -Equation \eqref{eq:var-pos1} follows from substituting the $\pos$ ($\conj{\pos}$) equivalence derived in \eqref{eq:exp-pos2}. The conjugate term ($\conj{\pos}$) in equation \eqref{eq:var-pos1} uses the facts that the complex conjugate of a sum (product) is equal to the sum (product) of the conjugates. -Equation \eqref{eq:var-pos2} follows from a simple rewriting of the summations. -Equation \eqref{eq:var-pos3} is the result of factoring out non-random terms from the expectation. -Next, we show that the second term, $T_2 = \ex{\pos}\ex{\conj{\pos}}$, has the same term factor out of the expectations. + +Next, we show that the second term, $T_2 = \ex{\pos}\ex{\conj{\pos}}$, has the same term as $T_1$ factor out of the expectations. \begin{align} -&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sine(\wElem_1)\right) \left(\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right)\sine(\wElem_2)\right)} \cdot \ex{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right)\conj{\sine(\wElem_3)}\right) \left(\sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\conj{\sine(\wElem_4)}\right)}\label{eq:var-t2-pos1}\\ -=&\sum_{\buck, \buck' \in \sketchCols}\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right) \sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right) \sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\ex{\sine(\wElem_1)\cdot \sine(\wElem_2)}\ex{\cdot\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}\label{eq:var-t2-pos2} +&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in k'}\sk^{\vect_{i_1}}[\buck]\right)\left(\sum_{i_1' \in k''}\sk^{\vect_{i_1'}}[\buck]\right)} +\ex{\conj{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in k'}\sk^{\vect_{i_2}}[\buck]\right)\left(\sum_{i_2' \in k''}\sk^{\vect_{i_2'}}[\buck]\right)}}\\\label{eq:var-t2-pos1} +% +&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i_1 \in k'}\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\vect_{i_1}(\wElem_1)\sine(\wElem_1)\right)\left(\sum_{i_1' \in k''}\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\vect_{i_1'}(\wElem_1')\sine(\wElem_1')\right)}\ex{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{i_2 \in k'}\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\vect_{i_2}(\wElem_2)\conj{\sine(\wElem_2)}\right)\left(\sum_{i_2' \in k''}\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\vect_{i_2'}(\wElem_2')\conj{\sine(\wElem_2')}\right)}\\\label{eq:var-t2-pos2} +% +&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\left(\sum_{i_1 \in k'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\right)\left(\sum_{\wElem_1' \in \wSet}\ind{\hfunc(\wElem_1') = \buck}\left(\sum_{i_1' \in k''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')\right)}\nonumber\\ +&\qquad\qquad\qquad\ex{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_2 \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\left(\sum_{i_2 \in k'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\right)\left(\sum_{\wElem_2' \in \wSet}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_2' \in k''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}\right)}\\\label{eq:var-t2-pos3} +% +&\ex{\sum_{\buck = 1}^{\sketchCols}\sum_{\wElem_1, \wElem_1' \in \wSet}\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\left(\sum_{i_1 \in k'}\vect_{i_1}(\wElem_1)\right)\sine(\wElem_1)\left(\sum_{i_1' \in k''}\vect_{i_1'}(\wElem_1')\right)\sine(\wElem_1')}\ex{\sum_{\buck' = 1}^{\sketchCols}\sum_{\wElem_2, \wElem_2' \in \wSet}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\left(\sum_{i_2 \in k'}\vect_{i_2}(\wElem_2)\right)\conj{\sine(\wElem_2)}\left(\sum_{i_2' \in k''}\vect_{i_2'}(\wElem_2')\right)\conj{\sine(\wElem_2')}}\\\label{eq:var-t2-pos4} +% +&\sum_{\buck = 1}^{\sketchCols}\sum_{\wElem_1, \wElem_1' \in \wSet}\left(\sum_{i_1 \in k'}\vect_{i_1}(\wElem_1)\right)\left(\sum_{i_1' \in k''}\vect_{i_1'}(\wElem_1')\right)\ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\sine(\wElem_1)\sine(\wElem_1')}\sum_{\buck' = 1}^{\sketchCols}\sum_{\wElem_2, \wElem_2' \in \wSet}\left(\sum_{i_2 \in k'}\vect_{i_2}(\wElem_2)\right)\left(\sum_{i_2' \in k''}\vect_{i_2'}(\wElem_2')\right)\ex{\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\\\label{eq:var-t2-pos5} +% +&\sum_{\buck, \buck' \in [\sketchCols]}\sum_{\substack{\wElem_1, \wElem_1',\\\wElem_2, \wElem_2' \in \wSet}}\left(\sum_{\substack{i_1, i_2 \in k',\\i_1', i_2' \in k''}}\vect_{i_1}(\wElem_1)\vect_{i_1'}(\wElem_1')\vect_{i_2}(\wElem_2)\vect_{i_2'}(\wElem_2')\right)\left(\ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\sine(\wElem_1)\sine(\wElem_1')}\ex{\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\right)\\\label{eq:var-t2-pos5} +% +%&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sine(\wElem_1)\right) \left(\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right)\conj{\sine(\wElem_2)}\right)} \cdot \ex{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right)\conj{\sine(\wElem_3)}\right) \left(\sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\conj{\sine(\wElem_4)}\right)}\label{eq:var-t2-pos1}\\ +%=&\sum_{\buck, \buck' \in \sketchCols}\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right) \sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right) \sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\ex{\sine(\wElem_1)\cdot \conj{\sine(\wElem_2)}}\ex{\cdot\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}\label{eq:var-t2-pos2} \end{align} -Here, equation \eqref{eq:var-t2-pos1} is the substitution of definitions for both $\pos$ and $\conj{\pos}$. -Equation \eqref{eq:var-t2-pos2} follows from the factoring out of non-random terms from the expectation and the commutativity/associativity of product. +The justification of steps is almost identical to the justification used in $T_1$ derivation. +Equation\eqref{eq:var-t2-pos1} expands out the definition of $\sk$, and also uses the fact that the sum (product) of conjugates is equal to the conjugate of the sum (product). +Equations \eqref{eq:var-t2-pos2} and \eqref{eq:var-t2-pos3} rely on the associativity and distributivity properties of addition. +Equation \eqref{eq:var-t2-pos4} factors out non-random terms from the expectations. +Equation \eqref{eq:var-t2-pos5} uses the distributive property of addition over multiplication, along with the commutative and associativity of multiplication. -Putting things together we have, +Notice that both $T_1$ and $T_2$ have the same left side factor, so the $\var$ can be written as \begin{align} -&\sum_{\buck, \buck' \in \sketchCols}\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right) \sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right) \sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\left(\ex{\sine(\wElem_1) \sine(\wElem_2)\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}-\ex{\sine(\wElem_1) \sine(\wElem_2)}\ex{\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}\right)\label{eq:var-both-pos1}\\ -=&\sum_{\buck}\sum_{\wElem \neq \wElem' \in \wSet}\left(\sum_{i \in k'}\vect_i(\wElem)\right)^2\left(\sum_{i' \in k''}\vect_{i'}(\wElem')\right)^2 + \left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem')\right) \left(\sum_{i \in k'}\vect_i(\wElem')\right)\label{eq:var-both-pos2}\\ -\leq&\norm{\sum_{i \in k'}\vect_i}_2^2\cdot\norm{\sum_{i' \in k''}\vect_{i'}}_2^2 + \norm{\sum_{i \in k'}\vect_i \had \sum_{i' \in k''}\vect_{i'}}_2^2\label{eq:var-both-pos3} -\end{align} -\qed +&\sum_{\buck, \buck' \in [\sketchCols]}\sum_{\substack{\wElem_1, \wElem_1',\\\wElem_2, \wElem_2' \in \wSet}}\left(\sum_{\substack{i_1, i_2 \in k',\\i_1', i_2' \in k''}}\vect_{i_1}(\wElem_1)\vect_{i_1'}(\wElem_1')\vect_{i_2}(\wElem_2)\vect_{i_2'}(\wElem_2')\right)\left(\ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\sine(\wElem_1)\sine(\wElem_1')\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\right.\nonumber\\ +&\left.\qquad\qquad\qquad - \ex{\ind{\hfunc(\wElem_1) = \buck}\ind{\hfunc(\wElem_1') = \buck}\sine(\wElem_1)\sine(\wElem_1')}\ex{\ind{\hfunc(\wElem_2) = \buck'}\ind{\hfunc(\wElem_2') = \buck'}\conj{\sine(\wElem_2)}\conj{\sine(\wElem_2')}}\right)\\\label{eq:var-t1-t2} +\end{align} -Equation \eqref{eq:var-both-pos2} relies on the fact that the difference in expectation will only be non-zero when $\wElem_1 = \wElem_3 \neq \wElem_2 = \wElem_4$ or $\wElem_1 = \wElem_4 \neq \wElem_2 = \wElem_3$. \ No newline at end of file +Notice that the expectation terms coming from $T_2$ cancel out case 1 leaving the two possibilities of case 2, \eqref{joe-a} and \eqref{joe-b} as surviving terms in $\var$. + +Thus, +\begin{equation} +\var = \sum_j\sum_{\wElem, \wElem'}\frac{1}{\sketchCols^2}\left(\sum_{\substack{i \in k',\\i' \in k''}}\vect_i(\wElem)^2\vect_{i'}(\wElem')^2 + \vect_i(\wElem)\vect_{i'}(\wElem)\vect_i(\wElem')\vect_{i'}(\wElem')\right) +\end{equation} +%Putting things together we have, +%\begin{align} +%&\sum_{\buck, \buck' \in \sketchCols}\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right) \sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right) \sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\left(\ex{\sine(\wElem_1) \conj{\sine(\wElem_2)}\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}-\ex{\sine(\wElem_1) \conj{\sine(\wElem_2)}}\ex{\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}\right)\label{eq:var-both-pos1}\\ +%=&\sum_{\buck}\sum_{\wElem \neq \wElem' \in \wSet}\left(\sum_{i \in k'}\vect_i(\wElem)\right)^2\left(\sum_{i' \in k''}\vect_{i'}(\wElem')\right)^2 + \left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem')\right) \left(\sum_{i \in k'}\vect_i(\wElem')\right)\label{eq:var-both-pos2}\\ +%\leq&\norm{\sum_{i \in k'}\vect_i}_2^2\cdot\norm{\sum_{i' \in k''}\vect_{i'}}_2^2 + \norm{\sum_{i \in k'}\vect_i \had \sum_{i' \in k''}\vect_{i'}}_2^2\label{eq:var-both-pos3} +%\end{align} +%\qed +% +%Equation \eqref{eq:var-both-pos2} relies on the fact that the difference in expectation will only be non-zero when $\wElem_1 = \wElem_3 \neq \wElem_2 = \wElem_4$ or $\wElem_1 = \wElem_4 \neq \wElem_2 = \wElem_3$. \ No newline at end of file