paper-BagRelationalPDBsAreHard/combining.tex

55 lines
3.7 KiB
TeX

% -*- root: main.tex -*-
\section{Combining Sketches}
\label{sec:combining}
\subsection{Adding Sketches}
When assuming that the variables are independent, as in the TIDB model, it is a known result that
\[
\varParam{X + Y} = \varParam{X} + \varParam{Y}.
\]
By \eqref{eq:sub-bounds-final} it immediately follows that adding $n$ base (base meaning a sketch that has not previously been added to another sketch) sketches results in the following variance:
\[
3n\left(\frac{\sqrt{\norm{\genV}_\infty}\left(|\pw|\right)}{\sqrt{\norm{\genV}_0\norm{\genV}_1} \epsilon^2} + \frac{1}{\epsilon^2}\right).
\]
\subsection{Multiplying Sketches}
For the case of multiplication, when assumming independent variables, it is a known result that
\[
\varParam{X \cdot Y} = \expect{X^2}\expect{Y^2} - (\expect{X})^2 (\expect{Y})^2.
\]
It is necessary then to calculate the expectation of the square of the sum of estimates. Assuming discreet variables the expectation of the square of a random variable is simply the sum of its weighted squares. This yields
\begin{align}
&\expect{\left(\sum_{\wVec \in \pw}\sketchJParam{\sketchHashParam{\wVec}}\cdot \sketchPolarParam{\wVec}\right)^2}\label{eq:rand-sq}\\
=& \sum_{\wVec \in \pw}\expect{\left(\sketchJParam{\sketchHashParam{\wVec}}\cdot\sketchPolarParam{\wVec}\right)^2}\label{eq:rand-sq-ex-push}\\
=& \sum_{\wVec \in \pw}\expect{\left(\sum_{\substack{\wVecPrime \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}} \genVParam{\wVecPrime}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}\right)^2}\label{eq:rand-sq-equiv}\\
=& \sum_{\wVec \in \pw}\expect{\left(\genVParam{\wVec}^2\sketchPolarParam{\wVec}^2 + \sum_{\substack{\wVecPrime \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec},
\wVecPrime \neq \wVec}} \genVParam{\wVecPrime}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}\right)^2}\label{eq:rand-sq-assoc}\\
=& \sum_{\wVec \in \pw}\expect{\genVParam{\wVec}^2}\label{eq:rand-sq-reduce}\\
=& \sum_{\wVec \in \pw}\genVParam{\wVec}^2\label{eq:rand-sq-final}.
\end{align}
\begin{Justification}
\hfill
\begin{itemize}
\item Starting out with \eqref{eq:rand-sq} since we need to know the expectation of the square of the sum of estimates.
\item \eqref{eq:rand-sq-ex-push} is the sum of weighted squares, or alternatively, pushes the expectation inside the summation by linearity of expectation.
\item \eqref{eq:rand-sq-equiv} substitutes the definition of a sketch bucket.
\item \eqref{eq:rand-sq-assoc} uses associativity to rearrange the operands of the sum.
\item \eqref{eq:rand-sq-reduce} reduces the second term of \eqref{eq:rand-sq-assoc} to $0$ by the property of uniform distribution of $\sketchPolar$.
\item \eqref{eq:rand-sq-final} is obtained by the fact that the expectation of $\genVParam{\wVec}$ is simply itself.
\end{itemize}
\end{Justification}
\begin{Assumption}
\hfill
\begin{itemize}\item Uniform distribution of both $\sketchHash$ and $\sketchPolar$.\end{itemize}
\end{Assumption}
It then follows that the variance corresponding to the muliplication of two base sketches is
\begin{align}
&\sum_{\wVec \in \pw}\genV_1\paramBox{\wVec}^2\sum_{\wVec \in \pw}\genV_2\paramBox{\wVec}^2 - \left(\sum_{\wVec \in \pw} \genV_1\paramBox{\wVec}\right)^2\left(\sum_{\wVec \in \pw} \genV_2\paramBox{\wVec}\right)^2\\
=&\norm{\genV_1}_2^2\cdot\norm{\genV_2}_2^2 - \norm{\genV_1}_1^2\cdot\norm{\genV_2}_1^2.
\end{align}
\AH{I don't think this equation makes sense. Where am I missing it?}
The subscript notation for $\genV$ is used to denote sketch identity. Substituting upper bounds obtained for the L1 norm squared from \eqref{eq:norm1-sq-cauchy} results in
\[
\norm{\genV_1}_2^2\cdot\norm{\genV_2}_2^2 - \left(|\pw|\right)\norm{\genV_1}_2^2 \cdot \left(|\pw|\right)\norm{\genV_2}_2^2.
\]