paper-BagRelationalPDBsAreHard/pos.tex

69 lines
9.1 KiB
TeX
Raw Normal View History

% -*- root: main.tex -*-
\pagebreak
\section{POS Queries}
\AH{The following lemma will probably be moved later on.}
The following property of the sine function $\sine$ is used in $\ex{\pos}$ derivation.
\begin{Lemma}\label{lem:exp-sine}
$\forall \wElem \in \wSet$,\newline
$\ex{\sine(\wElem)^i} = \begin{cases}
0 &1 \leq i < k\\
1 &\text{otherwise}.
\end{cases}$
\end{Lemma}
Notice that, $\forall i \in [1, k - 1]$, $\ex{\sine(\wElem)^i} = \frac{\sum\limits_{\omega \in \Omega}\omega^i}{k} = \frac{\sum\limits_{l = 0}^{k - 1}(\omega^i)^l}{k}$. To prove the lemma then, one needs only to prove that $\sum\limits_{l = 0}^{k - 1}\omega^i = \begin{cases}0&1 \leq i < k\\k&\text{otherwise}.\end{cases}$
For the case of $i = k$,
\begin{equation}
\frac{\sum\limits_{l = 0}^{k - 1}(\omega^k)^l}{k} = \frac{\sum\limits_{l = 0}^{k - 1}1^l}{k} = \frac{k}{k} = 1.
\end{equation}
For $i \in [1, k - 1]$, we can show by geometric sum series that
\begin{equation}
\sum_{l = 0}^{k - 1}(\omega^i)^l = \frac{(\omega^i)^k - 1}{\omega^i - 1} = \frac{1 - 1}{\omega^i - 1} = 0.
\end{equation}
\qed
We target the specific query where it is optimal to push down projections below join operators. Such a query is a product of sums ($\pos$). To show that our scheme works in this setting, we first compute the expectation of a $\pos$~ query over sketch annotations, i.e. $\pos$ = $\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i \in \kvec'}\sk^{\vect_i}\left[\buck\right]\right) \left(\sum_{i' \in \kvec''}\sk^{\vect_{i'}}\left[\buck'\right]\right)$, for the set of matching projected tuples from each input, denoted $k', k''$. Note that we denote the $i^{th}$ vector as $\vect_i$ and the sketch of the $i^{th}$ vector $\sk^{\vect_i}$.
\begin{align}
&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i \in \kvec'}\sk^{\vect_i}\left[\buck\right]\right) \left(\sum_{i' \in \kvec''}\sk^{\vect_{i'}}\left[\buck\right]\right)}\nonumber\\
=&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{i \in \kvec'}\sum_{\wElem \in \wSet}\vect_i(\wElem)\ind{\hash(\wElem) = \buck}\sine(\wElem)\right) \left(\sum_{i' \in \kvec''}\sum_{\wElem' \in \wSet}\vect_{i'}(\wElem')\ind{\hash(\wElem) = \buck}\sine(\wElem')\right)}\label{eq:exp-pos1}\\
=&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem \in \wSet}\ind{\hash(\wElem) = \buck}\left(\sum_{i \in \kvec'}\vect_i(\wElem)\right)\sine(\wElem)\right) \left(\sum_{\wElem' \in \wSet}\ind{\hash(\wElem') = j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem')\right)\sine(\wElem')\right)}\label{eq:exp-pos2}\\
=&\ex{\sum_{\buck = 1}^{\sketchCols} \left(\sum_{\wElem \in \wSet}\ind{\hash(\wElem) = \buck} \left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\sine(\wElem)^{2 = k}\right) + \left(\sum_{\substack{\wElem, \wElem' \in \wSet,\\\wElem \neq \wElem'}}\ind{\hash(\wElem) = j}\ind{\hash(\wElem') = j}\left(\left(\sum_{i \in k'}\vect_i(\wElem)\right)\sine(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem')\right)\sine(\wElem')\right)}\label{eq:exp-pos3}\\
=& \sum_{\buck = 1}^{\sketchCols}\sum_{\wElem \in \wSet}\ind{\hash(\wElem) = \buck}\left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\label{eq:exp-pos4}\\
=& \sum_{\wElem \in \wSet}\left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\label{eq:exp-pos5}
\end{align}
\qed\newline
Equation \eqref{eq:exp-pos1} follows from expanding the definitions of $\sk^{v_i}$. Equation \eqref{eq:exp-pos2} follows from the associative property of addition and the distributive property of addition over multiplication. Equation \eqref{eq:exp-pos3} also uses the associative and distributive properties to rearrange the $\pos$. Equation \eqref{eq:exp-pos4} results from Lemma \ref{lem:exp-sine}, where it can be seen that $\ex{\sine(\wElem)\sine(\wElem')} = 0$, thus eliminating the right hand term. The left hand operand stays, since by Lemma \ref{lem:exp-sine} we know that $\ex{\sine(\wElem)^k} = 1$. Finally, equation \eqref{eq:exp-pos4} follows from the construction of $\sk$.
We now move to computing the variance of a $\pos$~ query. Note, that the use of complex numbers requires the variance formula $\var = \ex{\pos \cdot\conj{\pos}} - \ex{\pos}\ex{\conj{\pos}}$.
To make this easier to present and digest, we start by turning our focus on the first term, $T_1 = \ex{\pos \cdot \conj{\pos}}$.
\begin{align}
&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sine(\wElem_1)\right) \left(\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right)\sine(\wElem_2)\right) \cdot \sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right)\conj{\sine(\wElem_3)}\right) \left(\sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\conj{\sine(\wElem_4)}\right)}\label{eq:var-pos1}\\
=&\ex{\sum_{\buck, \buck' \in \sketchCols}\left(\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sine(\wElem_1)\right) \left(\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right)\sine(\wElem_2)\right) \cdot \left(\sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right)\conj{\sine(\wElem_3)}\right) \left(\sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\conj{\sine(\wElem_4)}\right)}\label{eq:var-pos2}\\
=&\sum_{\buck, \buck' \in \sketchCols}\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right) \sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right) \sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\ex{\sine(\wElem_1)\cdot \sine(\wElem_2)\cdot\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}\label{eq:var-pos3}
\end{align}
Equation \eqref{eq:var-pos1} follows from substituting the $\pos$ ($\conj{\pos}$) equivalence derived in \eqref{eq:exp-pos2}. The conjugate term ($\conj{\pos}$) in equation \eqref{eq:var-pos1} uses the facts that the complex conjugate of a sum (product) is equal to the sum (product) of the conjugates.
Equation \eqref{eq:var-pos2} follows from a simple rewriting of the summations.
Equation \eqref{eq:var-pos3} is the result of factoring out non-random terms from the expectation.
Next, we show that the second term, $T_2 = \ex{\pos}\ex{\conj{\pos}}$, has the same term factor out of the expectations.
\begin{align}
&\ex{\sum_{\buck = 1}^{\sketchCols}\left(\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sine(\wElem_1)\right) \left(\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right)\sine(\wElem_2)\right)} \cdot \ex{\sum_{\buck' = 1}^{\sketchCols}\left(\sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right)\conj{\sine(\wElem_3)}\right) \left(\sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\conj{\sine(\wElem_4)}\right)}\label{eq:var-t2-pos1}\\
=&\sum_{\buck, \buck' \in \sketchCols}\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right) \sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right) \sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\ex{\sine(\wElem_1)\cdot \sine(\wElem_2)}\ex{\cdot\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}\label{eq:var-t2-pos2}
\end{align}
Here, equation \eqref{eq:var-t2-pos1} is the substitution of definitions for both $\pos$ and $\conj{\pos}$.
Equation \eqref{eq:var-t2-pos2} follows from the factoring out of non-random terms from the expectation and the commutativity/associativity of product.
Putting things together we have,
\begin{align}
&\sum_{\buck, \buck' \in \sketchCols}\sum_{\wElem_1 \in \wSet_j}\left(\sum_{i \in \kvec'}\vect_i(\wElem_1)\right)\sum_{\wElem_2 \in \wSet_j}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_2)\right) \sum_{\wElem_3 \in \wSet_{j'}}\left(\sum_{i \in \kvec'}\vect_i(\wElem_3)\right) \sum_{\wElem_4 \in \wSet_{j'}}\left(\sum_{i' \in \kvec''}\vect_{i'}(\wElem_4)\right)\left(\ex{\sine(\wElem_1) \sine(\wElem_2)\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}-\ex{\sine(\wElem_1) \sine(\wElem_2)}\ex{\conj{\sine(\wElem_3)}\cdot \conj{\sine(\wElem_4)}}\right)\label{eq:var-both-pos1}\\
=&\sum_{\buck}\sum_{\wElem \neq \wElem' \in \wSet}\left(\sum_{i \in k'}\vect_i(\wElem)\right)^2\left(\sum_{i' \in k''}\vect_{i'}(\wElem')\right)^2 + \left(\sum_{i \in k'}\vect_i(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem)\right)\left(\sum_{i' \in k''}\vect_{i'}(\wElem')\right) \left(\sum_{i \in k'}\vect_i(\wElem')\right)\label{eq:var-both-pos2}\\
\leq&\norm{\sum_{i \in k'}\vect_i}_2^2\cdot\norm{\sum_{i' \in k''}\vect_{i'}}_2^2 + \norm{\sum_{i \in k'}\vect_i \had \sum_{i' \in k''}\vect_{i'}}_2^2\label{eq:var-both-pos3}
\end{align}
\qed
Equation \eqref{eq:var-both-pos2} relies on the fact that the difference in expectation will only be non-zero when $\wElem_1 = \wElem_3 \neq \wElem_2 = \wElem_4$ or $\wElem_1 = \wElem_4 \neq \wElem_2 = \wElem_3$.