paper-BagRelationalPDBsAreHard/var_estj.tex

169 lines
14 KiB
TeX

% -*- root: main.tex -*-
\onecolumn
\section{Bounding $\sigsq$}
\label{sec:var_est}
We wish to prove that
\[
\sigsq \leq \sum_j \sigsq_j.
\]
Therefore, substituting in the definition of variance for complex numbers,
\begin{align}
\sigsq &= \ex{\sum_j \est_j \cdot \conj{\sum_{j'} \est_j'}} - \ex{\sum_j \est_j}\cdot\ex{\conj{\sum_{j'} \est_{j'}}}\nonumber\\
&= \ex{\sum_j \est_j \cdot \sum_{j'} \conj{\est_j'}} - \ex{\sum_j \est_j}\cdot\ex{\sum_{j'} \conj{\est_{j'}}}\nonumber\\
&= \sum_{j, j'}\left(\ex{\est_j \cdot \overline{\est_j'}} - \ex{\est_j}\ex{\overline{\est_{j'}}} = \cvar{j, j'}\right)\nonumber\\
&= \sum_j\ex{\est_j \cdot \overline{\est_j'}} - \ex{\est_j}\ex{\overline{\est_j}} + \sum_{j \neq j'}\cvar{j, j'}\nonumber\\
&= \sum_j \sigsq_j + \sum_{j \neq j'}\cvar{j, j'} \label{eq:sigsq-jneqj}\\
&\Rightarrow \sum_{j \neq j'}\cvar{j, j'}\leq 0. \nonumber
\end{align}
\subsection{Bounding $\sum_{j \neq j'}\cvar{j, j'}$}
\begin{align*}
\sum_{j \neq j'}\cvar{j, j'} &= \sum_{j \neq j'} \ex{\est_j \cdot \conj{\est_{j'}}} - \ex{\est_j}\cdot\ex{\conj{\est_{j'}}}\\
&=\ex{\prod_{i = 1}^{\prodsize}\sum_{\wElem \in W}v_i(\wElem)s(\wElem)\ind{h(\wElem) = j}\cdot \prod_{i = 1}^{\prodsize}\sum_{\wElem' \in W}v_i(\wElem')\conj{s(\wElem')}\ind{h(\wElem') = j'}} - \ex{\prod_{i = 1}^{\prodsize}\sum_{\wElem \in W}v_i(\wElem)s(\wElem)\ind{h(\wElem) = j}}\cdot \ex{\prod_{i = 1}^{\prodsize}\sum_{\wElem' \in W}v_i(\wElem')\conj{s(\wElem')}\ind{h(\wElem') = j'}}\\
&=\ex{\sum_{\substack{\wElem_1,\cdots,\wElem_\prodsize,\\\wElem'_1,\cdots,\wElem'_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem_i)s(\wElem_i)v_i(\wElem'_i)s(\wElem'_i) \ind{h(\wElem_i) = j} \ind{h(\wElem'_i) = j'}} - \ex{\sum_{\substack{\wElem_1,\cdots, \wElem_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem_i)s(\wElem_i) \ind{h(\wElem_i) = j}}\cdot\ex{\sum_{\substack{\wElem'_1,\cdots, \wElem'_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem'_i)s(\wElem'_i) \ind{h(\wElem'_i) = j'}}\\
&=\sum_{\substack{\wElem_1,\cdots,\wElem_\prodsize,\\\wElem'_1,\cdots,\wElem'_\prodsize\\\in W}}\ex{\prod_{i = 1}^{\prodsize}v_i(\wElem_i)s(\wElem_i)v_i(\wElem'_i)s(\wElem'_i)\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j'}} - \ex{\prod_{i = 1}^{\prodsize} v_i(\wElem_i)s(\wElem_i) \ind{h(\wElem_i) = j}} \cdot \ex{\prod_{i = 1}^{\prodsize}v_i(\wElem'_i)s(\wElem'_i)\ind{h(\wElem'_i) = j'}}\\
&= \sum_{\substack{\wElem_1,\cdots,\wElem_\prodsize,\\\wElem'_1,\cdots,\wElem'_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem_i)v_i(\wElem'_i)\ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)s(\wElem'_i)\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j'}} - \prod_{i = 1}^{\prodsize}v_i(\wElem_i)\ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)\ind{h(\wElem_i) = j}}\cdot \prod_{i = 1}^{\prodsize}v_i(\wElem'_i)\ex{\prod_{i = 1}^{\prodsize}s(\wElem'_i)\ind{h(\wElem_i') = j'}}\\
&= \sum_{\substack{\wElem_1,\cdots,\wElem_\prodsize,\\\wElem'_1,\cdots,\wElem'_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem_i)v_i(\wElem'_i)\left(\ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)s(\wElem'_i)\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j'}} - \ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)\ind{h(\wElem_i) = j}}\cdot\ex{\prod_{i = 1}^{\prodsize}s(\wElem'_i)\ind{h(\wElem_i') = j'}} \right).
\end{align*}
For $T_1 = \ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)s(\wElem'_i)\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j'}}$, because hash function $h$ cannot bucket the same world to two different buckets, the only surviving terms occur when there is no overlap between the $\wElem_i$ and $\wElem'_i$ variables. Given the condition of no overlap, the only terms that survive are when $\forall i \in [\prodsize], \wElem_i = \wElem, \wElem'_i = \wElem', \wElem \neq \wElem'$. Notice, however, that in such a case, the product of the remaining expectations will cancel this out. Looking at the remaining two expectations, each can only survive when $\forall i \in [\prodsize], \wElem_i = \wElem, \wElem'_i = \wElem'$. Such constraints leave us with only one surviving case, when all variables are the same world. Thus,
\begin{align}
&\sum_{j \neq j'}\cvar{j, j'} = - \frac{1}{B^2}\sum_{\wElem \in W}\prod_{i = 1}^{\prodsize}v_i^2(\wElem)\label{eq:cvar-bound}.
\end{align}
\subsection{Bounding $\sigsq_j$}
We now seek to bound the remaining term in ~\eqref{eq:sigsq-jneqj}. We take a look at the variance of a single bucket estimate.
\begin{align*}
&\sigsq_j = \ex{\est_j \cdot \overline{\est_j}} - \ex{\est_j} \cdot \ex{\overline{\est_j}} \\
&= \ex{\prod_{i = 1}^{\prodsize}\sum_{\wElem \in W_j}v_i(\wElem)s(\wElem) \cdot \prod_{i = 1}^\prodsize\sum_{\wElem' \in W_j}v_i(\wElem')\overline{s(\wElem')}} -
\ex{\prod_{i = 1}^{\prodsize}\sum_{\wElem \in W_j}v_i(\wElem)s(\wElem)}\cdot \ex{\prod_{i = 1}^\prodsize\sum_{\wElem' \in W_j}v_i(\wElem')\overline{s(\wElem')}}\\
&= \ex{\sum_{\substack{\wElem_1...\wElem_\prodsize\\\wElem'_1...\wElem'_\prodsize\\ \in W}}\prod_{i = 1}^\prodsize v_i(\wElem_i)v(\wElem'_i)s(\wElem_i)\overline{s(\wElem'_i)}\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j}} -
\ex{\sum_{\wElem_1...\wElem_\prodsize \in W} \prod_{i = 1}^\prodsize v_i(\wElem_i)s(\wElem_i)\ind{h(\wElem_i) = j}} \cdot
\ex{\sum_{\wElem'_1...\wElem'_\prodsize \in W} \prod_{i = 1}^\prodsize v_i(\wElem'_i)\overline{s(\wElem'_i)}\ind{h(\wElem'_i) = j}}\\
=&\sum_{\substack{\wElem_1...\wElem_\prodsize\\\wElem'_1...\wElem'_\prodsize\\ \in W}}\ex{\prod_{i = 1}^\prodsize v_i(\wElem_i)v_i(\wElem'_i)s(\wElem_i)\overline{s(\wElem'_i)}\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j}} -
\ex{\prod_{i = 1}^kv_i(\wElem_i)s(\wElem_i)\ind{h(\wElem_i) = j}} \cdot \ex{\prod_{i = 1}^\prodsize v_i(\wElem'_i)\overline{s(\wElem'_i)}\ind{h(\wElem'_i) = j}}\\
&= \sum_{\substack{\wElem_1...\wElem_\prodsize\\\wElem'_1...\wElem'_\prodsize\\ \in W}}\prod_{i = 1}^\prodsize v_i(\wElem_i)v_i(\wElem'_i)\cdot\left( \ex{\prod_{i = 1}^\prodsize s(\wElem_i)\overline{s(\wElem'_i)}\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j}} -
\ex{\prod_{i = 1}^ks(\wElem_i)\ind{h(\wElem_i) = j}}\cdot \ex{\prod_{i = 1}^\prodsize\overline{s(\wElem'_i)}\ind{h(\wElem'_i) = j}} \right).
\end{align*}
\subsection{Non-generic $\prodsize$}
\subsubsection{$\prodsize = 2$}
Taking $\prodsize = 2$ and looking at $T_1 = \ex{\prod\limits_{i = 1}^\prodsize s(\wElem_i)\overline{s(\wElem'_i)}\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j}}$, it can be seen that only specific combinations of $\wElem$ can survive. First, when $\forall i \in [\prodsize], \wElem_i = \wElem, \wElem'_i = \wElem'$, then we end up with $s(\wElem)^\prodsize = 1$ and $s(\wElem')^\prodsize = 1$. This translates into:
\begin{align*}
\frac{1}{B}\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem) +\frac{1}{B^2}\sum_{\wElem\neq \wElem' \in W}\prod_{i = 1}^{2}v_i(\wElem)v_i(\wElem'_i).
\end{align*}
Taking into account that for $\omega \in \mathbb{C}, \omega \cdot \conj{\omega} = 1$, terms in $T_1$ also survive the expectation when all $\wElem_i$ have a matching counterpart in $\wElem'_i$, yielding
\begin{align*}
\frac{1}{B^2}\sum_{\substack{\wElem_1 \neq \wElem_2\\ \in W}}\prod_{i = 1}^{2}v_i^2(\wElem_i) + \frac{1}{B^2}\sum_{\wElem \neq \wElem' \in W}\prod_{i = 1}^{2}v_i(\wElem)v_i(\wElem').
\end{align*}
Putting all cases together we have that
\begin{align*}
T_1 = \frac{1}{B}\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem) + \frac{1}{B^2}\left(2\sum_{\wElem\neq \wElem' \in W}\prod_{i = 1}^{2}v_i(\wElem)v_i(\wElem') + \sum_{\substack{\wElem_1 \neq \wElem_2\\ \in W}}\prod_{i = 1}^{2}v_i^2(\wElem_i)\right).
\end{align*}
For $T_2 = \ex{\prod_{i = 1}^ks(\wElem_i)\ind{h(\wElem_i) = j}}$ and $T_3 = \ex{\prod_{i = 1}^\prodsize\overline{s(\wElem'_i)}\ind{h(\wElem'_i) = j}}$, we get
\begin{align*}
&T_2 = \frac{1}{B}\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i(\wElem),\\
&T_3 = \frac{1}{B}\sum_{\wElem' \in W}\prod_{i = 1}^{2}v_i(\wElem'),\\
&T_2 \cdot T_3 = \frac{1}{B^2}\sum_{\wElem, \wElem' \in W}\prod_{i = 1}^{2}v_i(\wElem)v_i(\wElem') = \frac{1}{B^2}\left(\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem) + \sum_{\wElem \neq \wElem' \in W}\prod_{i = 1}^{2}v_i(\wElem)v_i(\wElem')\right).
\end{align*}
Combining all $T_i$,
\begin{align*}
\sigsq_j = T_1 - T_2 \cdot T_3 = \frac{B - 1}{B^2}\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem) + \frac{1}{B^2}\left(\sum_{\substack{\wElem_1\neq \wElem_2 \\ \in W}}\prod_{i = 1}^{2}v_i(\wElem_1)v_i(\wElem_2) + v_i^2(\wElem_i)\right).%+ \sum_{\wElem}\prod_{i = 1}^{2}v_i(\wElem)^2\right)
\end{align*}
Recall ~\eqref{eq:cvar-bound}, that $\sum\limits_{j \neq j'}\cvar{j, j'} = -\frac{1}{B^2}\sum\limits_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem)$. Thus, for $\prodsize = 2$ we can compute ~\eqref{eq:sigsq-jneqj}
\begin{align*}
&\sigsq = \sum_{j \in B}\sigsq_j + \sum_{j \neq j'}\cvar{j, j'}\\
&=B \cdot \left(\frac{B - 1}{B^2}\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem) + \frac{1}{B^2}\left(\sum_{\substack{\wElem_1\neq \wElem_2 \\ \in W}}\prod_{i = 1}^{2}v_i(\wElem_1)v_i(\wElem_2) + v_i^2(\wElem_i)\right)
\right) - \frac{B\left(B - 1\right)}{B^2}\sum_{\wElem \in W}\prod_{i = 1}^{2}v_i^2(\wElem)\\
&= \frac{1}{B}\left(\sum_{\substack{\wElem_1\neq \wElem_2 \\ \in W}}\prod_{i = 1}^{2}v_i(\wElem_1)v_i(\wElem_2) + v_i^2(\wElem_i)\right)\\
&= \frac{1}{B}\left(\left(\sum_{\wElem \in W}v_1(\wElem)\right)^2\left(\sum_{\wElem \in W}v_2(\wElem)\right)^2 + \left(\sum_{\wElem \in W}v_1(\wElem)v_2(\wElem)\right)^2\right)\\
&= \frac{1}{B}\left(\norm{v_1}_2^2\norm{v_2}_2^2 + \left(\sum_{\wElem \in W}v_1(\wElem)v_2(\wElem)\right)^2\right)\\
&\leq \frac{1}{B}\left(\norm{v_1}_2^2\norm{v_2}_2^2 + \norm{v_1}_2^2\norm{v_2}_2^2\right)\\
&\leq \frac{2}{B}\left(\norm{v_1}_2^2\norm{v_2}_2^2\right).
\end{align*}
\subsubsection{$\prodsize = 3$}
\begin{align*}
&= \sum_{\substack{\wElem_1...\wElem_3\\\wElem'_1...\wElem'_3\\ \in W}}\prod_{i = 1}^3 v_i(\wElem_i)v_i(\wElem'_i)\cdot\left( \ex{\prod_{i = 1}^3 s(\wElem_i)\overline{s(\wElem'_i)}\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j}} -
\ex{\prod_{i = 1}^3s(\wElem_i)\ind{h(\wElem_i) = j}}\cdot \ex{\prod_{i = 1}^3\overline{s(\wElem'_i)}\ind{h(\wElem'_i) = j}} \right)
\end{align*}
In the above expression, we seek to know which combinations of $\wElem_i$ and $\wElem'_i$ variables will survive the expectation calculations. We can divide the possibilities up into several different cases.
First, for roots of unity, we have that $\omega^\prodsize = 1$ if $\omega$ is a kth root of unity. This gives our first case.
\underline{Case 1:}
\begin{align*}
&\wElem_1 = \wElem_2 = \wElem_3 = \wElem\\
&\wElem'_1 = \wElem'_2 = \wElem'_3 = \wElem'\\
&1.1)~ \wElem = \wElem'\qquad1.2)~ \wElem \neq \wElem'
\end{align*}
The remaining cases take into account the property for roots of unity that $\omega \cdot \conj{\omega} = 1$. Note that we omit the case of all variables being equal because that has already been covered above.
\underline{Case 2:}
\begin{align*}
&\wElem_1 = \wElem'_1 = \wElem\\
&\wElem_2 = \wElem'_2 = \wElem'\\
&\wElem_3 = \wElem'_3 = \wElem''\\
&2.1)~ \wElem = \wElem' \neq \wElem''\qquad2.2)~ \wElem \neq \wElem'= \wElem''\qquad2.3)\wElem = \wElem'' \neq \wElem'\qquad2.4) \wElem \neq \wElem' \neq \wElem''
\end{align*}
\underline{Case 3:}
\begin{align*}
&\wElem_1 = \wElem'_2 = \wElem\\
&\wElem_2 = \wElem'_3 = \wElem'\\
&\wElem_3 = \wElem'_1 = \wElem''\\
&3.1)~ \wElem = \wElem' \neq \wElem''\qquad3.2)~ \wElem \neq \wElem'= \wElem''\qquad3.3)\wElem = \wElem'' \neq \wElem'\qquad3.4) \wElem \neq \wElem' \neq \wElem''
\end{align*}
\underline{Case 4:}
\begin{align*}
&\wElem_1 = \wElem'_3 = \wElem\\
&\wElem_2 = \wElem'_1 = \wElem'\\
&\wElem_3 = \wElem'_2 = \wElem''\\
&4.1)~ \wElem = \wElem' \neq \wElem''\qquad4.2)~ \wElem \neq \wElem'= \wElem''\qquad4.3)\wElem = \wElem'' \neq \wElem'\qquad4.4) \wElem \neq \wElem' \neq \wElem''
\end{align*}
\underline{Case 5:}
\begin{align*}
&\wElem_1 = \wElem'_2 = \wElem\\
&\wElem_2 = \wElem'_1 = \wElem'\\
&\wElem_3 = \wElem'_3 = \wElem''\\
&5.1)~ \wElem = \wElem' \neq \wElem''\qquad5.2)~ \wElem \neq \wElem'= \wElem''\qquad5.3)\wElem = \wElem'' \neq \wElem'\qquad5.4) \wElem \neq \wElem' \neq \wElem''
\end{align*}
\underline{Case 6:}
\begin{align*}
&\wElem_1 = \wElem'_1 = \wElem\\
&\wElem_2 = \wElem'_3 = \wElem'\\
&\wElem_3 = \wElem'_2 = \wElem''\\
&6.1)~ \wElem = \wElem' \neq \wElem''\qquad6.2)~ \wElem \neq \wElem'= \wElem''\qquad6.3)\wElem = \wElem'' \neq \wElem'\qquad6.4) \wElem \neq \wElem' \neq \wElem''
\end{align*}
\underline{Case 7:}
\begin{align*}
&\wElem_1 = \wElem'_3 = \wElem\\
&\wElem_2 = \wElem'_2 = \wElem'\\
&\wElem_3 = \wElem'_1 = \wElem''\\
&7.1)~ \wElem = \wElem' \neq \wElem''\qquad7.2)~ \wElem \neq \wElem'= \wElem''\qquad7.3)\wElem = \wElem'' \neq \wElem'\qquad7.4) \wElem \neq \wElem' \neq \wElem''
\end{align*}
The surviving terms are:
\begin{align*}
&\text{Case 1:}\\
&\frac{B - 1}{B^2}\left(\sum_\wElem v_1^2(\wElem) v_2^2(\wElem) v_3^2(\wElem)\right) + \\
&Case 2:\\
&\frac{1}{B^2}\left(\sum_{\wElem \neq \wElem'}v_1^2(\wElem)\left(v_2^2(\wElem)v_3^2(\wElem') + v_2^2(\wElem')v_3^2(\wElem') + v_2^2(\wElem')v_3^2(\wElem)\right)\right) + \frac{1}{B^3}\sum_{\wElem \neq \wElem' \neq \wElem''}v_1^2(\wElem)v_2^2(\wElem')v_3^2(\wElem'') +\\
&\text{Case 3 and 4:}\\
&\frac{2}{B^2}\left(\sum_{\wElem \neq \wElem'}v_1(\wElem)v_1(\wElem')v_2^2(\wElem)v_3(\wElem)v_3(\wElem') + v_1(\wElem)v_1(\wElem')v_2(\wElem)v_2(\wElem')v_3^2(\wElem') + v_1^2(\wElem)v_2(\wElem)v_2(\wElem')v_3(\wElem)v_3(\wElem')\right) + \\
&\qquad\qquad \frac{1}{B^3}\left(\sum_{\wElem \neq \wElem' \neq \wElem''}v_1(\wElem)v_1(\wElem'')v_2(\wElem')v_2(\wElem)v_3(\wElem'')v_3(\wElem') + v_1(\wElem)v_1(\wElem')v_2(\wElem')v_2(\wElem'')v_2(\wElem)v_3(\wElem'')\right)\\
&\text{Case 5, 6, 7:}\\
&\frac{1}{B^3}\left(\sum_{\wElem \neq \wElem' \neq \wElem''}v_1(\wElem)v_1(\wElem')v_2(\wElem)v_2(\wElem')v_3^2(\wElem'') + v_1^2(\wElem)v_2(\wElem')v_2(\wElem'')v_3(\wElem')v_3(\wElem'') + v_1(\wElem)v_1(\wElem'')v_2^2(\wElem')v_3(\wElem)v_3(\wElem'')\right)
\end{align*}