paper-BagRelationalPDBsAreHard/sop.tex

43 lines
3.5 KiB
TeX

%root--main.tex
\section{Sum of Products Analysis}
We now seek to bound the variance of a k-way join.
\begin{align}
&\sigsq_j = \ex{est_j \cdot \overline{est_j}} - \ex{est_j} \cdot \ex{\overline{est_j}} \nonumber\\
&= \ex{\prod_{i = 1}^{k}\sum_{w \in W_j}v_i(w)s(w) \cdot \prod_{i = 1}^k\sum_{w' \in W_j}v_i(w')\overline{s(w')}} -
\ex{\prod_{i = 1}^{k}\sum_{w \in W_j}v_i(w)s(w)}\cdot \ex{\prod_{i = 1}^k\sum_{w' \in W_j}v_i(w')\overline{s(w')}}\nonumber\\
&= \ex{\sum_{\substack{w_1...w_k\\w'_1...w'_k\\ \in W}}\prod_{i = 1}^k v_i(w_i)v(w'_i)s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\sum_{w_1...w_k \in W} \prod_{i = 1}^k v_i(w_i)s(w_i)\ind{h(w_i) = j}} \cdot
\ex{\sum_{w'_1...w'_k \in W} \prod_{i = 1}^k v_i(w'_i)\overline{s(w'_i)}\ind{h(w'_i) = j}}\nonumber\\
=&\sum_{\substack{w_1...w_k\\w'_1...w'_k\\ \in W}}\ex{\prod_{i = 1}^k v_i(w_i)v_i(w'_i)s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\prod_{i = 1}^kv_i(w_i)s(w_i)\ind{h(w_i) = j}} \cdot \ex{\prod_{i = 1}^k v_i(w'_i)\overline{s(w'_i)}\ind{h(w'_i) = j}}\nonumber\\
&= \sum_{\substack{w_1...w_k\\w'_1...w'_k\\ \in W}}\prod_{i = 1}^k v_i(w_i)v_i(w'_i)\cdot\left( \ex{\prod_{i = 1}^k s(w_i)\overline{s(w'_i)}\ind{h(w_i) = j}\ind{h(w'_i) = j}} -
\ex{\prod_{i = 1}^ks(w_i)\ind{h(w_i) = j}}\cdot \ex{\prod_{i = 1}^k\overline{s(w'_i)}\ind{h(w'_i) = j}} \right)\label{eq:sig-j-last}.
\end{align}
Before proceeding, we introduce some notation that will aid in communicating the bounds we are about to establish. First note, that the only terms that survive the expectation above are mappings of $w_i = w'_i = w$ such that each $w_i$ has a match, i.e., no $w_i$ or $w'_i$ stands alone without a matching world in its complimentary set. To help describe all possible matchings we use m-tuples and functions $f$ and $f'$.
\subsection{M-tuples}
\begin{Definition}
Given a $k$-way join, define $m \in [k]$. An m-tuple then is a set of tuples, each tuple conatining $m$ elements, such that the values of each tuple sum up to $m$, i.e. $\forall i \in [m], \sum_j m_{t_{i, j}} = m$, where i is the $i^{th}$ tuple in $m_t$, and $j$ is the $j^{th}$ index of that tuple $t$. The set consists of each unique sum up to symmetry, meaning a tuple with the same elements only reversed is disallowed.
\end{Definition}
For example, when $k = 4$, $m = 2$, the m-tuple, denoted, $m_2$, would be$\left\{\left(1, 3\right), \left(2, 2\right)\right\}$. Here, $m_{2_{1, 1}} = 1$, and while the tuple $\left(3, 1\right)$ sums up to $k = 4$, we do not include it since we have it's symmetrical term $\left(1, 3\right)$.
\subsection{f, f'}
\begin{Definition}
Functions f, f' are the set of surjective mappings from $k$ to $m$ elements: $f: [k] \rightarrow [m].$
\end{Definition}
\begin{equation*}
f(i) = \begin{cases}
\widetilde{w_1} &f(i) = 1\\
\widetilde{w_2} &f(i) = 2\\
\vdots &\vdots\\
\widetilde{w_m} &f(i) = m.
\end{cases}
\end{equation*}
In particular, $f$ and $f'$ are machinery for mapping $k$ $\wElem$-world variables to $m$ distinct values. We restrict $f$ and $f'$ in our results to belonging to the same $m_{t_i}$ tuple. In the example above, $f$ mappings for $m_{2_1}$ may only cross product with $f'$ mappings for $m_{2_1}$ and not with those for $m_{2_2}$. Likewise for $f, f'$ mappings of $m_{2_2}$.
Using the above definitions, we can now present the variance bounds for $\sigsq_j$ based on \eqref{eq:sig-j-last}.
\begin{equation*}
\sigsq_j = \sum_{m \in [k]} \frac{1}{B^m} \sum_{\widetilde{w_1}\cdots\widetilde{w_m}} \sum{f, f'} \prod_{i = 1}^{k} v_i(\widetilde{w_{f(i)}}) v_i(\widetilde{w_{f'(i)}})
\end{equation*}