41 lines
2.1 KiB
TeX
41 lines
2.1 KiB
TeX
% -*- root: main.tex -*-
|
|
\section{Combining Sketches}
|
|
\label{sec:combining}
|
|
\subsection{Adding Sketches}
|
|
When assuming that the variables are independent, as in the TIDB model, it is a known result that
|
|
\[
|
|
\varParam{X + Y} = \varParam{X} + \varParam{Y}.
|
|
\]
|
|
It then immediately follows that adding $n$ base sketches results in the following variance:
|
|
\[
|
|
n \cdot 4\norm{\genV}_2 |\pw|^{1/2}.
|
|
\]
|
|
|
|
\subsection{Multiplying Sketches}
|
|
For the case of multiplication it is a known result that
|
|
\[
|
|
\varParam{X \cdot Y} = \expect{X^2}\expect{Y^2} - (\expect{X})^2 (\expect{Y})^2.
|
|
\]
|
|
Assuming discreet variables the expectation of the square of a random variable is simply the sum of its weighted squares. This yields
|
|
\begin{align}
|
|
&\expect{\left(\sum_{\wVec \in \pw}\sketchJParam{\sketchHashParam{\wVec}}\cdot \sketchPolarParam{\wVec}\right)^2}\\
|
|
=& \sum_{\wVec \in \pw}\expect{\left(\sketchJParam{\sketchHashParam{\wVec}}\cdot\sketchPolarParam{\wVec}\right)^2}\\
|
|
=& \sum_{\wVec \in \pw}\expect{\left(\sum_{\substack{\wVecPrime \in \pw \st \\
|
|
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}} \genVParam{\wVecPrime}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}\right)^2}\\
|
|
=& \sum_{\wVec \in \pw}\expect{\left(\genVParam{\wVec}^2\sketchPolarParam{\wVec}^2 + \sum_{\substack{\wVecPrime \in \pw \st \\
|
|
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec},
|
|
\wVecPrime \neq \wVec}} \genVParam{\wVecPrime}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}\right)^2}\\
|
|
=& \sum_{\wVec \in \pw}\expect{\genVParam{\wVec}^2}\\
|
|
=& \sum_{\wVec \in \pw}\genVParam{\wVec}^2.
|
|
\end{align}
|
|
\begin{Justification}
|
|
\hfill
|
|
\begin{itemize}
|
|
\item stuff goes here
|
|
\end{itemize}
|
|
\end{Justification}
|
|
It then follows that the muliplication of two base sketches results in
|
|
\begin{align}
|
|
&\sum_{\wVec \in \pw}\genV_1\paramBox{\wVec}^2\sum_{\wVec \in \pw}\genV_2\paramBox{\wVec}^2 - \left(\sum_{\wVec \in \pw} \genV_1\paramBox{\wVec}\right)^2\left(\sum_{\wVec \in \pw} \genV_2\paramBox{\wVec}\right)^2\\
|
|
=&\norm{\genV_1}_2^2\norm{\genV_2}_2^2 - \norm{\genV_1}_1^2\norm{\genV_2}_1^2.
|
|
\end{align} |