55 lines
3.7 KiB
TeX
55 lines
3.7 KiB
TeX
% -*- root: main.tex -*-
|
|
\section{Combining Sketches}
|
|
\label{sec:combining}
|
|
\subsection{Adding Sketches}
|
|
When assuming that the variables are independent, as in the TIDB model, it is a known result that
|
|
\[
|
|
\varParam{X + Y} = \varParam{X} + \varParam{Y}.
|
|
\]
|
|
By \eqref{eq:sub-bounds-final} it immediately follows that adding $n$ base (base meaning a sketch that has not previously been added to another sketch) sketches results in the following variance:
|
|
\[
|
|
3n\left(\frac{\sqrt{\norm{\genV}_\infty}\left(|\pw|\right)}{\sqrt{\norm{\genV}_0\norm{\genV}_1} \epsilon^2} + \frac{1}{\epsilon^2}\right).
|
|
\]
|
|
|
|
\subsection{Multiplying Sketches}
|
|
For the case of multiplication, when assumming independent variables, it is a known result that
|
|
\[
|
|
\varParam{X \cdot Y} = \expect{X^2}\expect{Y^2} - (\expect{X})^2 (\expect{Y})^2.
|
|
\]
|
|
It is necessary then to calculate the expectation of the square of the sum of estimates. Assuming discreet variables the expectation of the square of a random variable is simply the sum of its weighted squares. This yields
|
|
\begin{align}
|
|
&\expect{\left(\sum_{\wVec \in \pw}\sketchJParam{\sketchHashParam{\wVec}}\cdot \sketchPolarParam{\wVec}\right)^2}\label{eq:rand-sq}\\
|
|
=& \sum_{\wVec \in \pw}\expect{\left(\sketchJParam{\sketchHashParam{\wVec}}\cdot\sketchPolarParam{\wVec}\right)^2}\label{eq:rand-sq-ex-push}\\
|
|
=& \sum_{\wVec \in \pw}\expect{\left(\sum_{\substack{\wVecPrime \in \pw \st \\
|
|
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}} \genVParam{\wVecPrime}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}\right)^2}\label{eq:rand-sq-equiv}\\
|
|
=& \sum_{\wVec \in \pw}\expect{\left(\genVParam{\wVec}^2\sketchPolarParam{\wVec}^2 + \sum_{\substack{\wVecPrime \in \pw \st \\
|
|
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec},
|
|
\wVecPrime \neq \wVec}} \genVParam{\wVecPrime}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}\right)^2}\label{eq:rand-sq-assoc}\\
|
|
=& \sum_{\wVec \in \pw}\expect{\genVParam{\wVec}^2}\label{eq:rand-sq-reduce}\\
|
|
=& \sum_{\wVec \in \pw}\genVParam{\wVec}^2\label{eq:rand-sq-final}.
|
|
\end{align}
|
|
\begin{Justification}
|
|
\hfill
|
|
\begin{itemize}
|
|
\item Starting out with \eqref{eq:rand-sq} since we need to know the expectation of the square of the sum of estimates.
|
|
\item \eqref{eq:rand-sq-ex-push} is the sum of weighted squares, or alternatively, pushes the expectation inside the summation by linearity of expectation.
|
|
\item \eqref{eq:rand-sq-equiv} substitutes the definition of a sketch bucket.
|
|
\item \eqref{eq:rand-sq-assoc} uses associativity to rearrange the operands of the sum.
|
|
\item \eqref{eq:rand-sq-reduce} reduces the second term of \eqref{eq:rand-sq-assoc} to $0$ by the property of uniform distribution of $\sketchPolar$.
|
|
\item \eqref{eq:rand-sq-final} is obtained by the fact that the expectation of $\genVParam{\wVec}$ is simply itself.
|
|
\end{itemize}
|
|
\end{Justification}
|
|
\begin{Assumption}
|
|
\hfill
|
|
\begin{itemize}\item Uniform distribution of both $\sketchHash$ and $\sketchPolar$.\end{itemize}
|
|
\end{Assumption}
|
|
It then follows that the variance corresponding to the muliplication of two base sketches is
|
|
\begin{align}
|
|
&\sum_{\wVec \in \pw}\genV_1\paramBox{\wVec}^2\sum_{\wVec \in \pw}\genV_2\paramBox{\wVec}^2 - \left(\sum_{\wVec \in \pw} \genV_1\paramBox{\wVec}\right)^2\left(\sum_{\wVec \in \pw} \genV_2\paramBox{\wVec}\right)^2\\
|
|
=&\norm{\genV_1}_2^2\cdot\norm{\genV_2}_2^2 - \norm{\genV_1}_1^2\cdot\norm{\genV_2}_1^2.
|
|
\end{align}
|
|
\AH{I don't think this equation makes sense. Where am I missing it?}
|
|
The subscript notation for $\genV$ is used to denote sketch identity. Substituting upper bounds obtained for the L1 norm squared from \eqref{eq:norm1-sq-cauchy} results in
|
|
\[
|
|
\norm{\genV_1}_2^2\cdot\norm{\genV_2}_2^2 - \left(|\pw|\right)\norm{\genV_1}_2^2 \cdot \left(|\pw|\right)\norm{\genV_2}_2^2.
|
|
\] |