\section{Analysis of a $\prodsize$-way join}
There are several steps involved to obtaining bounds on the Sum of Products (SOP) query. We start by analyzing a $\prodsize$ product. Define the $j_{th}$ bucket of a sketch $\sk$ for a vector $\vect$ as
\[\sk^\vect[j] = \sum_{\substack{\wElem \in \wSet,\\ \hfunc(\wElem) = j}}\vect(\wElem)\sine(\wElem)\].
Define the estimate of the $j_{th}$ bucket to be
\[\est_j = \prod_{i = 1}^{\prodsize}\sk^{\vect_i}[j]\].
For notational convenience define
&\wSet_j = \{\wElem ~|~ \hfunc(\wElem) = j\}\\
&\term_j = \sum_{\wElem \in \wSet_j} \prod_{i = 1}^{\prodsize}\vect_i(\wElem)
Let us show first that the expectation of the estimate does in fact yield the value we are estimating, $\term_j$.
\ex{\est_j} = &\ex{\prod_{i = 1}^{\prodsize}\sk^{\vect_i}[j]} \\
= &\ex{\prod_{i = 1}^{\prodsize} \sum_{\substack{\wElem \in \wSet_j, \\ \hfunc(\wElem) = j}}\vect_i(\wElem)\sine(\wElem)}\\
= &\ex{\sum_{\substack{\wElem_1,\ldots, \wElem_{\prodsize}\\ \in \wSet_j}} \prod_{i = 1}^{\prodsize}\vect_i(\wElem_i)\prod_{i = 1}^{\prodsize}\sine(\wElem_i)}\\
= &\sum_{\substack{\wElem_1,\ldots, \wElem_{\prodsize}\\ \in \wSet_j}} \prod_{i = 1}^{\prodsize}\vect_i(\wElem_i)\ex{\prod_{i = 1}^{\prodsize}\sine(\wElem_i)}
\term_1^{\est_j} = &\ex{\prod_{i = 1}^{\prodsize}\sine(\wElem_i)}\\
= &\ex{\prod_{l = 1}^{\dist} \sine(\wElem_l)^{e_l}}\\
= & \begin{cases}
0 &1 <\dist < \prodsize\\
1 & \dist = 1.
Notice, that the above leaves us with the condition that $\forall i, j \in [\prodsize], \wElem_i = \wElem_j$,
= &\sum_{\wElem \in \wSet_j}\prod_{i = 1}^{\prodsize} \vect_i(w) \cdot \term_1^{\est_j} = \term_j.
The proof for $\est = \sum_j \est_j$ follows by linearity of expectation.\qed\newline
We need to compute the variance of the $\prodsize$-way product $\est$. We wish to prove that
\sigsq \leq \sum_j \sigsq_j \label{eq:var-to-prove}.
Therefore, substituting in the definition of variance for complex numbers,
\sigsq &= \ex{\sum_j \est_j \cdot \conj{\sum_{j'} \est_j'}} - \ex{\sum_j \est_j}\cdot\ex{\conj{\sum_{j'} \est_{j'}}}\nonumber\\
&= \ex{\sum_j \est_j \cdot \sum_{j'} \conj{\est_j'}} - \ex{\sum_j \est_j}\cdot\ex{\sum_{j'} \conj{\est_{j'}}}\nonumber\\
&= \sum_{j, j'}\left(\ex{\est_j \cdot \overline{\est_j'}} - \ex{\est_j}\ex{\overline{\est_{j'}}} = \cvar{j, j'}\right)\nonumber\\
&= \sum_j\ex{\est_j \cdot \overline{\est_j'}} - \ex{\est_j}\ex{\overline{\est_j}} + \sum_{j \neq j'}\cvar{j, j'}\nonumber\\
&= \sum_j \sigsq_j + \sum_{j \neq j'}\cvar{j, j'} \label{eq:sigsq-jneqj}
Notice that assuming independence of $\sigsq_j ~\forall j \in \sketchCols$, we can push the variance through the sum and obtain the result
&\sigsq - \sum_j \sigsq_j = \cvar{j, j'}\\
&\implies \cvar{j, j'} \leq 0.
Recall that we started this section out by seeking to prove \cref{eq:var-to-prove}. Should this be true, the use of $\leq$ in the above implication results from the fact that $\sigsq \leq \sum_j \sigsq_j \implies \cvar{j, j'} \leq 0$.
One can see that \cref{eq:sigsq-jneqj} is composed of two addends. We now bound each of them separately.
\subsection{Bounding $\sum_{j \neq j'}\cvar{j, j'}$}
\sum_{j \neq j'}\cvar{j, j'} &= \sum_{j \neq j'} \ex{\est_j \cdot \conj{\est_{j'}}} - \ex{\est_j}\cdot\ex{\conj{\est_{j'}}}\\
&=\ex{\prod_{i = 1}^{\prodsize}\sum_{\wElem \in W}v_i(\wElem)s(\wElem)\ind{h(\wElem) = j}\cdot \prod_{i = 1}^{\prodsize}\sum_{\wElem' \in W}v_i(\wElem')\conj{s(\wElem')}\ind{h(\wElem') = j'}} - \ex{\prod_{i = 1}^{\prodsize}\sum_{\wElem \in W}v_i(\wElem)s(\wElem)\ind{h(\wElem) = j}}\cdot \ex{\prod_{i = 1}^{\prodsize}\sum_{\wElem' \in W}v_i(\wElem')\conj{s(\wElem')}\ind{h(\wElem') = j'}}\\
&=\ex{\sum_{\substack{\wElem_1,\cdots,\wElem_\prodsize,\\\wElem'_1,\cdots,\wElem'_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem_i)s(\wElem_i)v_i(\wElem'_i)\conj{s(\wElem'_i)} \ind{h(\wElem_i) = j} \ind{h(\wElem'_i) = j'}} - \ex{\sum_{\substack{\wElem_1,\cdots, \wElem_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem_i)s(\wElem_i) \ind{h(\wElem_i) = j}}\cdot\ex{\sum_{\substack{\wElem'_1,\cdots, \wElem'_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem'_i)\conj{s(\wElem'_i)} \ind{h(\wElem'_i) = j'}}\\
&=\sum_{\substack{\wElem_1,\cdots,\wElem_\prodsize,\\\wElem'_1,\cdots,\wElem'_\prodsize\\\in W}}\ex{\prod_{i = 1}^{\prodsize}v_i(\wElem_i)s(\wElem_i)v_i(\wElem'_i)\conj{s(\wElem'_i)}\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j'}} - \ex{\prod_{i = 1}^{\prodsize} v_i(\wElem_i)s(\wElem_i) \ind{h(\wElem_i) = j}} \cdot \ex{\prod_{i = 1}^{\prodsize}v_i(\wElem'_i)\conj{s(\wElem'_i)}\ind{h(\wElem'_i) = j'}}\\
&= \sum_{\substack{\wElem_1,\cdots,\wElem_\prodsize,\\\wElem'_1,\cdots,\wElem'_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem_i)v_i(\wElem'_i)\ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)\conj{s(\wElem'_i)}\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j'}} - \prod_{i = 1}^{\prodsize}v_i(\wElem_i)\ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)\ind{h(\wElem_i) = j}}\cdot \prod_{i = 1}^{\prodsize}v_i(\wElem'_i)\ex{\prod_{i = 1}^{\prodsize}\conj{s(\wElem'_i)}\ind{h(\wElem_i') = j'}}\\
&= \sum_{\substack{\wElem_1,\cdots,\wElem_\prodsize,\\\wElem'_1,\cdots,\wElem'_\prodsize\\\in W}}\prod_{i = 1}^{\prodsize}v_i(\wElem_i)v_i(\wElem'_i)\left(\ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)\conj{s(\wElem'_i)}\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j'}} - \ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)\ind{h(\wElem_i) = j}}\cdot\ex{\prod_{i = 1}^{\prodsize}\conj{s(\wElem'_i)}\ind{h(\wElem_i') = j'}} \right).
For $\term_1^{\cvar{j, j'}} = \ex{\prod_{i = 1}^{\prodsize}s(\wElem_i)s(\wElem'_i)\ind{h(\wElem_i) = j}\ind{h(\wElem'_i) = j'}}$, because hash function $h$ cannot bucket the same world to two different buckets, the only instance $\term_1^{\cvar{j, j'}} = 1$ occurs when there is no overlap between the $\wElem_i$ and $\wElem'_i$ variables. Given the condition of no overlap, $\term_1^{\cvar{j, j'}} = 1$ only with the further condition that $\forall i \in [\prodsize], \wElem_i = \wElem, \wElem'_i = \wElem', \wElem \neq \wElem'$. Notice, however, given the conditions, the product of the remaining expectations will cancel this out. Looking at the remaining two expectations $\term_2^{\cvar{j, j'}} = \ex{\prod_{i = 1}^{\prodsize}\sine(\wElem_i) \ind{\hfunc(\wElem_i) = j}} \cdot \ex{\prod_{i = 1}^{\prodsize}\conj{\sine(\wElem'_i)} \ind{\hfunc(\wElem'_i) = j'}}$, that $\term_2^{\cvar{j, j'}} = 1$ only when $\forall i \in [\prodsize], \wElem_i = \wElem, \wElem'_i = \wElem'$. Taken together, the constraints leave us with only one possible case for $\term_1^{\cvar{j, j'}} - \term_2^{\cvar{j, j'}} \neq 0$, when all variables are the same world. Thus,
&\sum_{j \neq j'}\cvar{j, j'} = - \frac{1}{B^2}\sum_{\wElem \in W}\prod_{i = 1}^{\prodsize}v_i^2(\wElem)\label{eq:cvar-bound}.
We now move on to bound the variance of a $\prodsize$-way join.
&\sigsq_j = \ex{\est_j \cdot \overline{\est_j}} - \ex{\est_j} \cdot \ex{\overline{\est_j}} \nonumber\\
&= \ex{\prod_{i = 1}^{\prodsize}\sum_{w \in W_j}v_i(w)s(w) \cdot \prod_{i = 1}^\prodsize\sum_{w' \in W_j}v_i(w')\overline{s(w')}} -
\ex{\prod_{i = 1}^{\prodsize}\sum_{w \in W_j}v_i(w)s(w)}\cdot \ex{\prod_{i = 1}^\prodsize\sum_{w' \in W_j}v_i(w')\overline{s(w')}}\nonumber\\
&= \ex{\sum_{\substack{w_1...w_\prodsize\\w'_1...w'_\prodsize\\ \in W}}\prod_{i = 1}^\prodsize v_i(w_i)v(w'_i)s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\sum_{w_1...w_\prodsize \in W} \prod_{i = 1}^\prodsize v_i(w_i)s(w_i)\ind{h(w_i) = j}} \cdot
\ex{\sum_{w'_1...w'_\prodsize \in W} \prod_{i = 1}^\prodsize v_i(w'_i)\overline{s(w'_i)}\ind{h(w'_i) = j}}\nonumber\\
=&\sum_{\substack{w_1...w_\prodsize\\w'_1...w'_\prodsize\\ \in W}}\ex{\prod_{i = 1}^\prodsize v_i(w_i)v_i(w'_i)s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\prod_{i = 1}^kv_i(w_i)s(w_i)\ind{h(w_i) = j}} \cdot \ex{\prod_{i = 1}^\prodsize v_i(w'_i)\overline{s(w'_i)}\ind{h(w'_i) = j}}\nonumber\\
&= \sum_{\substack{w_1...w_\prodsize\\w'_1...w'_\prodsize\\ \in W}}\prod_{i = 1}^\prodsize v_i(w_i)v_i(w'_i)\cdot\left( \ex{\prod_{i = 1}^\prodsize s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\prod_{i = 1}^ks(w_i)\ind{h(w_i) = j}}\cdot \ex{\prod_{i = 1}^\prodsize\overline{s(w'_i)}\ind{h(w'_i) = j}} \right)\label{eq:sig-j-last}.
Before proceeding, we introduce some notation and terminology that will aid in communicating the bounds we are about to establish. We refer to the leftmost expectation of \cref{eq:sig-j-last} in the following way:
\[\term_1\left(\wElem_1,\ldots,\wElem_\prodsize, \wElem_1',\ldots, \wElem_\prodsize'\right) = \ex{\prod_{i = 1}^\prodsize s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}}.%\text{, and}
%\[\term_2\left(\wElem_1,\ldots,\wElem_\prodsize, \wElem_1',\ldots, \wElem_\prodsize'\right) = \ex{\prod_{i = 1}^ks(w_i)\ind{h(w_i) = j}}\cdot \ex{\prod_{i = 1}^\prodsize\overline{s(w'_i)}\ind{h(w'_i) = j}}. \]
We will use the vocabulary 'term' to denote $\prod_{i = 1}^{\prodsize}\vect_i(\wElem_i)\vect_i(\wElem_i') \cdot\term_1\left(\wElem_1,\ldots,\wElem_\prodsize\right)$ given a specific set of world values. %To say that a term survives \AR{You should not care about whether the $T_1$ term survives or not. See the above comment on why.} the expectation is to mean that $\term_1 - \term_2 \neq 0$. Note, that the only terms that survive the expectation above are mappings of $w_i = w'_j = w$ for $i, j \in [\prodsize]$, such that each $w_i$ has a match, i.e., no $w_i$ or $w'_j$ stands alone without a matching world in its complimentary set. In other words, the set of values in $\wElem_1,\ldots,\wElem_k$ has a bijective mapping to the set of values in $\wElem'_1,\ldots,\wElem'_k$.
We next describe the nonzero terms of \cref{eq:sig-j-last}.
2020-03-26 12:38:07 -04:00
\subsection{f, f'}
2020-04-15 18:49:49 -04:00
Define and then fix a total ordering of the $\dist$ distinct world elements to follow the total order of the natural numbers in $[\dist]$, such that $\forall i, j \in [\dist], i < j \implies \dw_i < \dw_j, i.e. \wElem_1 \prec\ldots\prec\wElem_\prodsize$.
%Given a fixed order $\wSet_{\order}: \left(\wSet, \wSet\right)\mapsto \mathbb{B}$ of possible worlds, define the lexographical order of distinct worlds $\wSet_\dist$ to be the ordering which complies to the identity mapping of elements in $[\prodsize]$ to elements in $[\dist]$ up to $\dist$, such that . In other worlds, $\forall \wElem, \wElem' \in \wSet_\dist, \dw < \wElem' \leftrightarrow \wSet_{\order}\left(\wElem, \wElem'\right) = T$.
To help describe all possible world value matchings we introduce functions $f$ and $f'$.
2020-03-26 12:38:07 -04:00
Functions f, f' are the set of surjective mappings from $\prodsize$ to $\dist$ elements: $f: [\prodsize] \rightarrow [\dist], f': [\prodsize] \rightarrow [\dist'].$
2020-03-26 12:38:07 -04:00
The functions $f, f'$ are used to produce the mappings $w_i \mapsto \dMap{w_{f(i)}}$. In particular, $f$ and $f'$ are machinery for mapping $\prodsize$ $\wElem$-world variables to $\dist$ distinct values.
2020-04-01 10:57:37 -04:00
We rewrite equation \eqref{eq:sig-j-last} in terms of $\dist$ distinct worlds, with $f, f'$ mappings.
2020-04-16 12:02:52 -04:00
\sum_{\dist = 2}^{\prodsize}\sum_{\dist' = 2}^{\prodsize}\sum_{f, f'}\sum_{\substack{\dw_1, \ldots,\dw_\dist,\\ \dw'_{1},\ldots,\dw'_{\dist'}\\ \in W}}\prod_{i = 1}^{\prodsize}\vect_i(\dw_{_{f(i)}})\vect_i(\dw_{'_{f'(i)}})\cdot \term_1\left(\dw_{f(1)},\ldots,\dw_{f(\prodsize)}, \dw'_{f'(1)},\ldots, \dw'_{f'(\prodsize)}\right)
2020-04-01 10:57:37 -04:00
2020-04-15 18:49:49 -04:00
Observe that the cartesian product of world values assigned to $\wElem_1,\ldots,\wElem_\prodsize$ throughout the summation can be rearranged into groups of variables with distinct values, for each distinct element $\dist$ in the set $[\prodsize]$. For each $\dist \in [\prodsize]$, all possible combinations of $\dist$ world values can be equivalently modeled by taking the set of surjective functions $f:[\prodsize]\mapsto [\dist]$ and computing all world value combinations based on the total ordering of $\dw_{f(1)}\prec\cdots\prec\dw_{f(m)}$. For any $\dist$, all surjective mappings $f$ constitute all unique mappings with their symmetrical counterparts. Combining that with the total order over $\dw_{f(1)},\ldots,\dw_{f(\dist)}$ yields exactly the world value combinations containing $\dist$ distinct values which appear in the cartesian product of the sum, without double counting. What this all boils down to is a rearrangement of addends in the sum.
2020-03-26 20:15:00 -04:00
2020-03-31 11:52:00 -04:00
Functions $f:[\prodsize]\mapsto [\dist], f':[\prodsize]\mapsto [\dist']$ are said to be matching, denoted $\match{f}{f'}$, if and only if
2020-03-31 11:52:00 -04:00
2020-04-01 10:57:37 -04:00
\item $\dist = \dist'$
2020-04-15 18:49:49 -04:00
\item $\forall i \in [\dist], |f^{-1}(i)| = |f'^{-1}(i)|$, i.e., the cardinality of variables mapped to $\dw_i$ equals the cardinality of variables mapped to $\dw_i'$, for all $i \in [\dist]$.
2020-03-31 11:52:00 -04:00
2020-04-01 10:57:37 -04:00
2020-04-01 10:57:37 -04:00
2020-04-16 12:02:52 -04:00
When $f, f'$are matching, where $\forall j \in[\dist], \dw_{_j} = \dw_{'_j}$, \cref{eq:sig-j-distinct} is exactly
2020-04-16 12:02:52 -04:00
\sum_{\substack{\dw_{_1}, \ldots,\dw_{_\dist},\\\dw_{'_1},\ldots,\dw_{'_{\dist'}}\\ \in W}}\prod_{i = 1}^{\prodsize}\vect_i(\dw_{_{f(i)}})\vect_i(\dw_{'_{f'(i)}})
and $0$ otherwise.
2020-03-31 11:52:00 -04:00
2020-04-03 10:47:13 -04:00
In proving \cref{lem:sig-j-survive}, we introduce another fact.
2020-04-14 18:26:57 -04:00
Given a $\prodsize^{th}$ root of unity $\rou$, the expectation of the product of $(\rou^i)^l \cdot (\rou^j)^{l'}$ for uniformly random $i, j$, where $i, j, l, l' \in [\prodsize]$, is zero.
2020-04-03 10:47:13 -04:00
2020-04-14 18:26:57 -04:00
2020-04-14 18:26:57 -04:00
The proof only needs the case when $\wElem \neq \wElem'$, since $i, j$ are both uniformly random.
2020-04-03 10:47:13 -04:00
&\ex{\sine(\wElem)^i \conj{\sine(w')}^j}\\
2020-04-06 11:35:13 -04:00
= &\ex{\sine(\wElem^i)}\ex{\conj{\sine(\wElem')}^j}\\
2020-04-03 10:47:13 -04:00
= &0
2020-04-06 11:35:13 -04:00
In the above, since we have more than pairwise independence for $\wElem \neq \wElem'$, we can push the expectation into the product. Then by \cref{lem:exp-sine} we get 0 for both expectations.\newline
2020-04-14 18:26:57 -04:00
2020-04-16 12:02:52 -04:00
\AR{First some typos/things that are incorrect below-- note this is \textbf{not} an exhaustive list. (1) In the proof below the $w_i$ and $w'_i$ should be $\tilde{w}_i$ and $\tilde{w'}_i$ respectively. (2) The expression for $T_1$ below is incorrect since it seems to assume that all the pre-image sizes are $1$-- the expression for $T_2$ is fine except the $j_i$ terms are not defined. However, ``taking out" one term for $\tilde{w'}_{m'}$ for $T_2$ is incorrect since e.g. we could have the pre-image of $m'$ have size $>1$. (3) The proof below never explicitly argues why the condition $\dw_{_j} = \dw_{'_j}$ is needed.}
2020-04-10 11:11:53 -04:00
\AR{Here is how I recommend that you re-write the proof. First as mentioned earlier, you should only consider the $T_1$ terms (as you account for the $T_2$ terms later on. Second you should first start off by re-stating the $T_1$ term like so. Consider the ``generic term"--
\[T_1(\tilde{w}_{f(1)},\dots, \tilde{w}_{f(m)}, \tilde{w'}_{f'(1)},\dots, \tilde{w'}_{f'(m')}).\]
2020-04-10 11:11:53 -04:00
Then re-write the what the above term is based on the exact definition (BTW I'm dropping the $\mathbf{E}$ terms for convenience but they should be all there below.) In particular, the above term by definition is exactly
\[\prod_{i=1}^k s(\tilde{w}_{f(i)})\cdot \overline{s(\tilde{w'}_{f'(i)})}.\]
Now re-write the above in terms of ``powers" of distinct worlds:
\[ (\prod_{i=1}^m s(\tilde{w}_{i})^{|f^{-1}(i)|})\cdot \overline{(\prod_{j=1}^m s(\tilde{w'}_j)^{|f^{-1}(j)|})}\]
Now once you have the above expression, then it will be much easier to argue why if any of the matching conditions are not satisfied then the expression is $0$. I also believe that working with the above expression will also make it more ``obvious" as to why the different conditions are required. Currently the arguments below do not explicitly bring this out...
2020-04-14 18:26:57 -04:00
Consider the "generic term"--
2020-04-16 12:02:52 -04:00
\[T_1(\tilde{w}_{f(1)},\dots, \tilde{w}_{f(\prodsize)}, \tilde{w'}_{f'(1)},\dots, \tilde{w'}_{f'(\prodsize')}).\]
2020-04-14 18:26:57 -04:00
Let's rewrite the term based on its exact definition:
2020-04-15 18:49:49 -04:00
= &\ex{\prod_{i = 1}^{\prodsize}\sine(\dw_{f(i)})\cdot\conj{\sine(\dw'_{f'(i)})}}\\
= &\ex{\left(\prod_{i = 1}^{\dist}\sine(\dw_{i})^{|f^{-1}(i)|}\right) \cdot \left(\prod_{j = 1}^{\dist'}\conj{\sine(\dw'_{j})}^{|f^{-1}(j)|}\right)}
2020-04-14 18:26:57 -04:00
2020-04-15 18:49:49 -04:00
Notice that each $i \in [\prodsize]$ has its own mapping to an element in $[\dist]$. We can thus rearrange all the elements of the product such that the preimage of function $f(i)$, i.e., $f^{-1}(i)$ yields the number of terms that will be mapped to a distinct variable $\dw_i$.
Further see how the requirement that $\dw_i = \dw'_i$ gives us the precise combinations we are looking for, where each random $\sine$ output value has its own matching complex conjugate.
2020-04-15 18:49:49 -04:00
To prove that \cref{lem:sig-j-survive} is true, consider what the expectation looks like when $f, f'$ are not matching. The first condition for $f, f'$ to be matching is violated when $\dist \neq \dist'$.
Observe that $\forall \dist \in [\prodsize], \sum_{i = 1}^{\dist}|f^{-1}(i)| = \prodsize$ and that this fact implies for $\dist, \dist' \in [\prodsize] ~|~\dist \neq \dist', \exists i \in [m] ~|~\forall j \in [m], f^{-1}(i)| \neq |f'^{-1}(j)|$, meaning that if we have that $\dist \neq \dist'$, then the second matching condition is also violated.
2020-04-14 18:26:57 -04:00
2020-04-03 10:47:13 -04:00
2020-04-15 18:49:49 -04:00
&\ex{\left(\prod_{i = 1}^{\dist}\sine(\dw_{i})^{|f^{-1}(i)|}\right) \cdot \left(\prod_{j = 1}^{\dist'}\conj{\sine(\dw'_{j})}^{|f^{-1}(j)|}\right)}\nonumber\\
= &\ex{\left(\prod_{i = 1}^{\dist}\sine(\dw_{i})^{|f^{-1}(i)|}\right) \cdot \left(\prod_{j = 1}^{\dist}\conj{\sine(\dw'_{j})}^{|f^{-1}(j)|}\right)\left(\prod_{l = \dist + 1}^{\dist'}\conj{\sine(\dw_l)}^{f'^{-1}(l)}\right)}\nonumber\\
= &\ex{\left(\prod_{i = 1}^{\dist}\sine(\dw_{i})^{|f^{-1}(i)|}\right) \cdot \left(\prod_{j = 1}^{\dist}\conj{\sine(\dw'_{j})}^{|f^{-1}(j)|}\right)} \cdot \prod_{l = \dist + 1}^{\dist'} \ex{\conj{\sine(\dw_l)}^{|f^{-1}(l)|}} = 0.\label{eq:lem-fmatch-pt1}
2020-04-03 10:47:13 -04:00
2020-04-15 18:49:49 -04:00
In \cref{eq:lem-fmatch-pt1} the expectation can be pushed through the last product group since we know that all the operands are distinct from any others appearing in the overall product. Then, by \cref{lem:exp-sine} we get $0$ for that rightmost term, and this cancels out the rest of the terms in the overall product.
2020-04-03 10:47:13 -04:00
2020-04-15 18:49:49 -04:00
To complete the proof, we now approach the case where $\dist = \dist'$, but there is a $\dw_i, \dw_i'$ with an unequal number of mappings.
2020-03-31 11:52:00 -04:00
2020-04-15 18:49:49 -04:00
&\exists i \in [\dist], |f^{-1}(i)| \neq |f'^{-1}(i)|\\
\implies &\exists j \in [m] ~|~i \neq j, |f^{-1}(j)| \neq |f'^{-1}(j)|\\
2020-04-16 10:39:52 -04:00
\implies &\exists i, j \in [\dist], i \neq j ~|~ |\dw_i| \neq |\dw_i|, |\dw_j| \neq |\dw'_j|, \\
2020-04-15 18:49:49 -04:00
%\implies &\exists \wElem_i \in \wSet ~|~ \nexists \wElem_i' \in \wSet ~|~ \wElem_i = \wElem_i'
2020-04-16 10:39:52 -04:00
The above means that we will have at least two world values that don't match. Put another way, after the optimal number of matching world value pairs have been assigned, there will be at least one world value whose matching conjugate product is not the conjugate of the sine of the same world value, i.e. for $i \neq j$, there will exist at least one product of $\sine(\dw_i) \conj{\sine(\dw_{j}')}$.
Such cross terms exist since
\[\left(\sum_{\substack{i \in [\dist],\\|f^{-1}(i)| \neq |f'^{-1}(i)|}}|f^{-1}(i)|\right) = \left(\sum_{\substack{i' \in [\dist],\\|f^{-1}(i')| \neq |f'^{-1}(i')|}}|f'^{-1}(i')|\right)\]
2020-03-26 20:15:00 -04:00
2020-04-15 18:49:49 -04:00
Let $n = \{i ~|~ |f^{-1}(i)| \neq |f'^{-1}(i)|\}$. Further, let $\dist_* = [\dist] - n$ and $f^{*-1}(i) = min\left(f^{-1}(i), f'^{-1}(i)\right)$. Then,
2020-03-26 12:38:07 -04:00
2020-04-16 10:39:52 -04:00
\term_1 = &\ex{\left(\prod_{i \in [\dist_*]} \sine(\dw_i)^{|f^{-1}(i)|} \conj{\sine(\dw'_i)}^{|f'^{-1}(i)|}\right)
\left(\prod_{j \in [n]}\sine(\dw_i)^{|f^{*-1}|} \conj{\sine(\dw'_i)}^{|f^{*-1}|}\right)
\left(\prod_{\substack{i' \in [n],\\f^{-1}(i') > f'^{-1}(i')}} \sine(\dw_{i'})^{|f^{-1}(i')| - |f'^{-1}(i')|} \prod_{\substack{j' \in n ~|~\\ f'^{-1}('j) > f^{-1}(j')}} \conj{\sine(\dw'_{j'})}^{|f'^{-1}(j')| - |f^{-1}(j')|}\right)} \label{eq:lem-match-pt2-line1}\\
= &\ex{\left(\prod_{i \in [\dist_*]} \sine(\dw_i)^{|f^{-1}(i)|} \conj{\sine(\dw'_i)}^{|f'^{-1}(i)|}\right)
\left(\prod_{j \in [n]}\sine(\dw_i)^{|f^{*-1}|} \conj{\sine(\dw'_i)}^{|f^{*-1}|}\right)} \cdot
\ex{\left(\prod_{\substack{i' \in n ~|~\\f^{-1}(i') > f'^{-1}(i')}} \sine(\dw_{i'})^{|f^{-1}(i')| - |f'^{-1}(i')|} \prod_{\substack{j' \in n ~|~\\ f'^{-1}('j) > f^{-1}(j')}} \conj{\sine(\dw'_{j'})}^{|f'^{-1}(j')| - |f^{-1}(j')|}\right)}\nonumber\\
= &\ex{\left(\prod_{i \in [\dist_*]} \sine(\dw_i)^{|f^{-1}(i)|} \conj{\sine(\dw'_i)}^{|f'^{-1}(i)|}\right)
\left(\prod_{j \in [n]}\sine(\dw_i)^{|f^{*-1}|} \conj{\sine(\dw'_i)}^{|f^{*-1}|}\right)} \cdot
\ex{\prod_{\substack{i' \in n ~|~\\f^{-1}(i') > f'^{-1}(i')}} \sine(\dw_{i'})^{|f^{-1}(i')| - |f'^{-1}(i')|}} \cdot \ex{\prod_{\substack{j' \in n ~|~\\ f'^{-1}('j) > f^{-1}(j')}} \conj{\sine(\dw'_{j'})}^{|f'^{-1}(j')| - |f^{-1}(j')|}}\nonumber\\
= &\ex{\left(\prod_{i \in [\dist_*]} \sine(\dw_i)^{|f^{-1}(i)|} \conj{\sine(\dw'_i)}^{|f'^{-1}(i)|}\right)
\left(\prod_{j \in [n]}\sine(\dw_i)^{|f^{*-1}|} \conj{\sine(\dw'_i)}^{|f^{*-1}|}\right)} \cdot
\prod_{\substack{i' \in n ~|~\\f^{-1}(i') > f'^{-1}(i')}} \ex{\sine(\dw_{i'})^{|f^{-1}(i')| - |f'^{-1}(i')|}} \cdot \prod_{\substack{j' \in n ~|~\\ f'^{-1}('j) > f^{-1}(j')}} \ex{\conj{\sine(\dw'_{j'})}^{|f'^{-1}(j')| - |f^{-1}(j')|}}\label{eq:lem-match-pt2-last}\\
& = 0.\nonumber
Looking at \cref{eq:lem-match-pt2-line1}, each $\sine$ function in the first two products has its matching complex conjugate in the product terms. However, the rightmost products of the expectation are all distinct world value inputs, i.e. random $\sine$ values with no matching conjugate counterparts. Since we have distinct, non-matching world value inputs for the rightmost products, we can push the expectation through the products until we arrive at \cref{eq:lem-match-pt2-last}, where finally, by \cref{lem:exp-sine}, each of those inner expectations computes to $0$. This in turn zeroes out the whole product.
We now seek to show that when $f, f'$ are matching, that $\term_1$ will always equal 1. Recall that when $\match{f}{f'}$, that
\item $\dist = \dist'$, i.e., the output size of both functions is the same,
\item $\forall i \in [\dist],| f^{-1}(i)| = |f'^{-1}(i)|$, i.e. each $\dw_i$ has the same number of variables assigned to it as its $\dw'_i$ counterpart.
This means,
\term_1 = &\ex{\prod_{i = 1}^{\dist}\sine(\dw_i)^{|f^{-1}(i)|}\conj{\sine(\dw'_i)}^{|f'^{-1}(i)|}}\nonumber\\
= &\ex{\prod_{i = 1}^{\dist}\left(\sine(\dw_i) \cdot \conj{\sine(\dw'_i)}\right)^{|f^{-1}(i)|}}\label{eq:lem-match-pt3-2}\\
= &1\nonumber
We arrive at \cref{eq:lem-match-pt3-2} since $\forall i \in [\dist], |f^{-1}(i)| = |f'^{-1}(i)|$ and we can use the distributive law of exponents
over multiplication. This then implies that each individiual $\sine(\dw_i)$ has its own matching conjugate $\conj{\sine(\dw'_i)}$,and by the property of roots of unity in complex numbers, each $\sine(\dw_i)\cdot \conj{\sine(\dw'_i)} = 1$, yielding an overall product of $1$.
2020-03-26 12:38:07 -04:00
2020-04-14 18:26:57 -04:00
2020-04-01 10:57:37 -04:00
Using the above definitions, we can now present the variance bounds for $\sigsq_j$ based on \eqref{eq:sig-j-distinct}.
2020-03-27 12:10:41 -04:00
By the fact that the expectations cancel when $\forall i, i', j, j'\in [\prodsize], \wElem_i = \wElem_j = \wElem, \wElem_{i'}' = \wElem_{j'}' = \wElem'$, for both $\wElem = \wElem'$ and $\wElem \neq \wElem'$, we can rid ourselves of the case when there exists only one distinct world value. This is precisely why we have not needed to account for the last two expectations in \cref{eq:sig-j-last}. We then need to sum up all the $\dist$ distinct world value possibilities for $\dist \in [2, \prodsize]$. Starting with \cref{eq:sig-j-distinct},
2020-04-10 11:11:53 -04:00
2020-04-16 12:02:52 -04:00
\sigsq_j = &\sum_{\dist = 2}^{\prodsize}\sum_{\dist' = 2}^{\prodsize}\sum_{f, f'}\sum_{\substack{\dw_{_1}, \ldots,\dw_{_\dist},\\\dw'_{1},\ldots,\dw'_{\dist'}\\ \in W}}\prod_{i = 1}^{\prodsize}\vect_i(\dw_{f(i)})\vect_i(\dw'_{f'(i)})\cdot \term_1\left(\dw_{f(1)},\ldots,\dw_{f(\prodsize)}, \dw'_{f'(1)},\ldots, \dw'_{f'(\prodsize)}\right)\nonumber\\
= &\sum_{\dist = 2}^{\prodsize}\sum_{f, f'}\sum_{\substack{\dw_{_1}, \ldots,\dw_{_\dist},\\\dw'_{1},\ldots,\dw'_{\dist}\\ \in W}}\prod_{i = 1}^{\prodsize}\vect_i(\dw_{f(i)})\vect_i(\dw'_{f'(i)})\cdot \term_1\left(\dw_{f(1)},\ldots,\dw_{f(\prodsize)}, \dw'_{f'(1)},\ldots, \dw'_{f'(\prodsize)}\right)\label{eq:sig-j-bnd-1}\\
= &\sum_{\dist = 2}^{\prodsize}\sum_{\substack{f, f'\\\match{f}{f'}}}\sum_{\substack{\dw_{_1}, \ldots,\dw_{_\dist},\\\dw'_{1},\ldots,\dw'_{\dist}\\ \in W}}\prod_{i = 1}^{\prodsize}\vect_i(\dw_{f(i)})\vect_i(\dw'_{f'(i)})\cdot \prod_{i = 1}^{\dist}\ind{\hfunc(\dw_i) = j}\ind{\hfunc(\dw'_i) = j}\label{eq:sig-j-bnd-2}\\
= &\sum_{\dist = 2}^{\prodsize}\sum_{\substack{f, f'\\\match{f}{f'}}}\sum_{\substack{\dw_{_1}, \ldots,\dw_{_\dist}\\ \in W}}\prod_{i = 1}^{\prodsize}\vect_i(\dw_{f(i)})\vect_i(\dw_{f'(i)})\cdot \prod_{i = 1}^{\dist}\ind{\hfunc(\dw_i) = j}\label{eq:sig-j-bnd-3}\\
= &\sum_{\dist = 2}^{\prodsize}\frac{1}{\sketchCols^{\dist}}\sum_{\substack{f, f'\\\match{f}{f'}}}\sum_{\substack{\dw_{_1}, \ldots,\dw_{_\dist}\\ \in W}}\prod_{i = 1}^{\prodsize}\vect_i(\dw_{f(i)})\vect_i(\dw_{f'(i)})\label{eq:sig-j-bnd-4}
We obtain \cref{eq:sig-j-bnd-1} by the fact that $\dist = \dist'$. Next, we arrive at \cref{eq:sig-j-bnd-2} by \cref{lem:sig-j-survive} as well as bringing out the indicator variables of $\term_1$. Equation \ref{eq:sig-j-bnd-3} is derived from the fact that $\forall i \in [\dist], \dw_i = \dw'_i$. We arrive at \cref{eq:sig-j-bnd-4}, since with $\dist$ distinct variables, the product of indicator variables will result in multiplying the uniform distribution probability distribution $\dist$ times.
2020-04-16 12:02:52 -04:00
Using \cref{eq:cvar-bound} and \cref{eq:sig-j-bnd-4}, we state the general bounds for $\sigsq$,
\[\sigsq = \sum_{\dist = 2}^{\prodsize}\frac{1}{\sketchCols^{\dist}}\sum_{\substack{f, f'\\\match{f}{f'}}}\sum_{\substack{\dw_{_1}, \ldots, \dw_{_\dist}\\ \in W}}\prod_{i = 1}^{\prodsize}\vect_i(\dw_{f(i)})\vect_i(\dw_{f'(i)}) -
\frac{1}{B^2}\sum_{\wElem \in W}\prod_{i = 1}^{\prodsize}v_i^2(\wElem)\label{eq:cvar-bound}.\]
