Expectation of Sketch Products

master
Aaron Huber 2019-08-26 10:26:51 -04:00
parent be6764acc0
commit ec80c510dd
6 changed files with 153 additions and 139 deletions

View File

@ -14,34 +14,34 @@
We start off by making the claim that the expectation of the estimate of annotations across all worlds is $\sum\limits_{\wVec \in \pw}\genVParam{\wVec}$, formally
\begin{equation}
\expect{\sum_{\wVec \in \pw} \sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}} = \sum_{\wVec \in \pw}\genVParam{\wVec}\label{eq:allWorlds-est}.
\expect{\sum_{\wVec \in \pw} \sketchJParam{\hashP{\wVec}} \cdot \polP{\wVec}} = \sum_{\wVec \in \pw}\genVParam{\wVec}\label{eq:allWorlds-est}.
\end{equation}
To verify this claim, we argue that $\forall \wVec \in \pw$, the expectation of the estimate of an annotation in a single world is its annotation, i.e. the output of $\genVParam{\wVec}$, i.e.
\begin{equation}
\expect{\sketchJParam{\sketchHashParam{\wVec}}\cdot \sketchPolarParam{\wVec}} = \genVParam{\wVec} \label{eq:single-est}.
\expect{\sketchJParam{\hashP{\wVec}}\cdot \polP{\wVec}} = \genVParam{\wVec} \label{eq:single-est}.
\end{equation}
For a given $\wVec \in \pw$, substituting definitions we have
\begin{align}
&\expect{\sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}} = \nonumber\\
&\phantom{{}\sketchJParam{\sketchHashParam{\wVec}}}\expect{\big(\sum_{\substack{\wVecPrime \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\genVParam{\wVecPrime} \cdot \sketchPolarParam{\wVecPrime}\big) \cdot \sketchPolarParam{\wVec} }\label{eq:step-one}\\.
&\expect{\sketchJParam{\hashP{\wVec}} \cdot \polP{\wVec}} = \nonumber\\
&\phantom{{}\sketchJParam{\hashP{\wVec}}}\expect{\big(\sum_{\substack{\wVecPrime \in \pw \st \\
\hashP{\wVecPrime} = \hashP{\wVec}}}\genVParam{\wVecPrime} \cdot \polP{\wVecPrime}\big) \cdot \polP{\wVec} }\label{eq:step-one}\\.
%\end{align}
%Since $\wVec \in \pw$, we know that for $\wVecPrime\in \pw, \exists \wVecPrime \st \wVecPrime = \wVec$. This yields
%\[
=&~\expect{\genVParam{\wVecPrime}\sketchPolarParam{\wVec}^2 +
=&~\expect{\genVParam{\wVecPrime}\polP{\wVec}^2 +
\sum\limits_{\substack{\wVecPrime, \wVec \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec},\\
\wVecPrime \neq \wVec}}\genVParam{\wVecPrime}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}}\label{eq:step-two}\\
=&~\expect{\genVParam{\wVec}\sketchPolarParam{\wVec}^2} +
\hashP{\wVecPrime} = \hashP{\wVec},\\
\wVecPrime \neq \wVec}}\genVParam{\wVecPrime}\polP{\wVecPrime}\polP{\wVec}}\label{eq:step-two}\\
=&~\expect{\genVParam{\wVec}\polP{\wVec}^2} +
\expect{\sum\limits_{\substack{\wVecPrime, \wVec \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec} \\
\wVecPrime \neq \wVec}}\genVParam{\wVecPrime}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}}\label{eq:step-three}\\
=&~\genVParam{\wVec} \cdot \expect{\sketchPolarParam{\wVec}^2} + \nonumber\\
\hashP{\wVecPrime} = \hashP{\wVec} \\
\wVecPrime \neq \wVec}}\genVParam{\wVecPrime}\polP{\wVecPrime}\polP{\wVec}}\label{eq:step-three}\\
=&~\genVParam{\wVec} \cdot \expect{\polP{\wVec}^2} + \nonumber\\
&\qquad\sum\limits_{\substack{\wVecPrime, \wVec \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec} \\
\hashP{\wVecPrime} = \hashP{\wVec} \\
\wVecPrime \neq \wVec}}\genVParam{\wVecPrime}\cdot\expect{\sum\limits_{\substack{\wVecPrime, \wVec \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec} \\
\wVecPrime \neq \wVec}}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}}\label{eq:step-four}\\
\hashP{\wVecPrime} = \hashP{\wVec} \\
\wVecPrime \neq \wVec}}\polP{\wVecPrime}\polP{\wVec}}\label{eq:step-four}\\
=&~\genVParam{\wVec}\label{eq:step-five}
\end{align}
@ -51,18 +51,18 @@ For a given $\wVec \in \pw$, substituting definitions we have
\item \eq{\eqref{eq:step-one}} is a substitution of the definition of $\sketch$.
\item \eq{\eqref{eq:step-two}} uses the associativity of addition to rearrange the sum.
\item \eq{\eqref{eq:step-three}} uses linearity of expectation to reduce the large expectation into smaller expectations.
\item \eq{\eqref{eq:step-four}} further pushes expectation into the $\sketchPolar$ terms.
\item \eq{\eqref{eq:step-four}} further pushes expectation into the $\pol$ terms.
\item \eq{\eqref{eq:step-five}} follows from evaluating the expectations and taking the product of their results and their respective operands.
\end{itemize}
\end{Justification}
%which in turn
%\begin{multline*}
%\mathbb{E}\big[\genVParam{\wVecPrime_0}\cdot \sketchPolarParam{\wVecPrime_0} + \cdots \\
%+\genVParam{\wVecPrime_j}\cdot \sketchPolarParam{\wVecPrime_j}\cdot \sketchPolarParam{\wVecPrime_j}+ \cdots \\
%+ \genVParam{\wVecPrime_n}\sketchPolarParam{\wVecPrime_n}\big]
%\mathbb{E}\big[\genVParam{\wVecPrime_0}\cdot \polP{\wVecPrime_0} + \cdots \\
%+\genVParam{\wVecPrime_j}\cdot \polP{\wVecPrime_j}\cdot \polP{\wVecPrime_j}+ \cdots \\
%+ \genVParam{\wVecPrime_n}\polP{\wVecPrime_n}\big]
%\end{multline*}
%\AH{break it up into w' and w}
%Due to the uniformity of $\sketchPolar$, we have
%Due to the uniformity of $\pol$, we have
%\begin{equation*}
%= \genVParam{\wVec},
%\end{equation*}
@ -71,19 +71,19 @@ thus verifying \eqref{eq:single-est}.
\begin{Assumption}
\hfill
\begin{itemize}
\item \eq{\eqref{eq:step-four}} assumes that $\sketchPolar$ is pairwise independent.
%\item $\sketchHash$ is uniformly distributed.
\item \eq{\eqref{eq:step-four}} assumes that $\pol$ is pairwise independent.
%\item $\hash$ is uniformly distributed.
\end{itemize}
\end{Assumption}
Since \eqref{eq:single-est} holds, by linearity of expectation, \eqref{eq:allWorlds-est} also must hold.
%We can now take \eqref{eq:single-est}, substitute it in for \eqref{eq:allWorlds-est} and show by linearity of expectation that \eqref{eq:allWorlds-est} holds.
%\begin{align}
%&\expect{\sum_{\wVec \in \pw} \sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}} \nonumber\\
%&= \expect{\sum_{\wVecPrime \in \pw}\genVParam{\wVecPrime} \cdot \sketchPolarParam{\wVecPrime} \cdot \sum_{\substack{\wVec \in \pw \st \\
%\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\sketchPolarParam{\wVec}}\nonumber\\
%&\expect{\sum_{\wVec \in \pw} \sketchJParam{\hashP{\wVec}} \cdot \polP{\wVec}} \nonumber\\
%&= \expect{\sum_{\wVecPrime \in \pw}\genVParam{\wVecPrime} \cdot \polP{\wVecPrime} \cdot \sum_{\substack{\wVec \in \pw \st \\
%\hashP{\wVecPrime} = \hashP{\wVec}}}\polP{\wVec}}\nonumber\\
%&= \sum_{\wVec \in \pw} \expect{\left( \sum_{\substack{\wVecPrime \in \pw \st \\
%\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\genVParam{\wVecPrime}\cdot\sketchPolarParam{\wVecPrime}\right) \cdot \sketchPolarParam{\wVec}}\nonumber\\
%\hashP{\wVecPrime} = \hashP{\wVec}}}\genVParam{\wVecPrime}\cdot\polP{\wVecPrime}\right) \cdot \polP{\wVec}}\nonumber\\
%&= \sum_{\wVec \in \pw}\genVParam{\wVec}\label{eq:estExpect}.
%\end{align}
@ -91,19 +91,19 @@ Since \eqref{eq:single-est} holds, by linearity of expectation, \eqref{eq:allWor
%&\expect{\estimate}\\
%=&\expect{\estExpOne}\\
%=&\expect{\sum_{\substack{j \in [B],\\
% \wVec \in \pw~|~ \sketchHash{i}[\wVec] = j,\\
% \wVec[w']\in \pw~|~ \sketchHash{i}[\wVec[w']] = j} } v_t[\wVec] \cdot s_i[\wVec] \cdot s_i[\wVec[w']]}\\
% \wVec \in \pw~|~ \hash{i}[\wVec] = j,\\
% \wVec[w']\in \pw~|~ \hash{i}[\wVec[w']] = j} } v_t[\wVec] \cdot s_i[\wVec] \cdot s_i[\wVec[w']]}\\
%=&\multLineExpect\big[\sum_{\substack{j \in [B],\\
% \wVec~|~\sketchHashParam{\wVec}= j,\\
% \wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\
% \wVec = \wVecPrime}} \genVParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime} + \nonumber \\
% \wVec~|~\hashP{\wVec}= j,\\
% \wVecPrime~|~\hashP{\wVecPrime} = j,\\
% \wVec = \wVecPrime}} \genVParam{\wVec} \cdot \polP{\wVec} \cdot \polP{\wVecPrime} + \nonumber \\
%&\phantom{{}\genVParam{\wVec}}\sum_{\substack{j \in [B], \\
% \wVec~|~\sketchHashParam{\wVec} = j,\\
% \wVecPrime ~|~ \sketchHashParam{\wVecPrime} = j,\\ \wVec \neq \wVecPrime}} \genVParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot\sketchPolarParam{\wVecPrime}\big]\textit{(by linearity of expectation)}\\
% \wVec~|~\hashP{\wVec} = j,\\
% \wVecPrime ~|~ \hashP{\wVecPrime} = j,\\ \wVec \neq \wVecPrime}} \genVParam{\wVec} \cdot \polP{\wVec} \cdot\polP{\wVecPrime}\big]\textit{(by linearity of expectation)}\\
%=&\expect{\sum_{\substack{j \in [B],\\
% \wVec~|~\sketchHashParam{\wVec}= j,\\
% \wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\
% \wVec = \wVecPrime}} \genVParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime}} \nonumber \\
% \wVec~|~\hashP{\wVec}= j,\\
% \wVecPrime~|~\hashP{\wVecPrime} = j,\\
% \wVec = \wVecPrime}} \genVParam{\wVec} \cdot \polP{\wVec} \cdot \polP{\wVecPrime}} \nonumber \\
%&\phantom{{}\big[}\textit{(by uniform distribution in the second summation)}\\
%=& \estExp \label{eq:estExpect}
%\end{align}
@ -114,24 +114,24 @@ Since \eqref{eq:single-est} holds, by linearity of expectation, \eqref{eq:allWor
For the next step, we show that the variance of an estimate is small.%$$\varParam{\estimate}$$
\begin{align}
&\varParam{\sum_{\wVec \in \pw}\sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}}\\%\nonumber\\
=~&\varParam{\sum_{\wVec \in \pw}\genVParam{\wVec} \cdot \sketchPolarParam{\wVec} \sum_{\substack{\wVecPrime \in \pw \st\\ \sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}}\sketchPolarParam{\wVecPrime}}\label{eq:var_step-one}\\%\nonumber\\%\estExpOne}\\
&\varParam{\sum_{\wVec \in \pw}\sketchJParam{\hashP{\wVec}} \cdot \polP{\wVec}}\\%\nonumber\\
=~&\varParam{\sum_{\wVec \in \pw}\genVParam{\wVec} \cdot \polP{\wVec} \sum_{\substack{\wVecPrime \in \pw \st\\ \hashP{\wVec} = \hashP{\wVecPrime}}}\polP{\wVecPrime}}\label{eq:var_step-one}\\%\nonumber\\%\estExpOne}\\
=~& \mathbb{E}\big[\big(\sum_{\substack{ \wVec, \wVecPrime \in \pw \st \\
\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}} \genVParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime}\nonumber\\
&\qquad - \expect{\sum_{\wVec \in \pw} \sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}}\big)^2\big]\label{eq:var_step-two}\\%\nonumber\\
\hashP{\wVec} = \hashP{\wVecPrime}}} \genVParam{\wVec} \cdot \polP{\wVec} \cdot \polP{\wVecPrime}\nonumber\\
&\qquad - \expect{\sum_{\wVec \in \pw} \sketchJParam{\hashP{\wVec}} \cdot \polP{\wVec}}\big)^2\big]\label{eq:var_step-two}\\%\nonumber\\
=~&\mathbb{E}\big[\sum_{\substack{
\wVec_1, \wVec_2,\\
\wVecPrime_1, \wVecPrime_2 \in \pw,\\
\sketchHashParam{\wVec_1} = \sketchHashParam{\wVecPrime_1},\\
\sketchHashParam{\wVec_2} = \sketchHashParam{\wVecPrime_2}
}}\genVParam{\wVec_1} \genVParam{\wVec_2}\sketchPolarParam{\wVec_1}\sketchPolarParam{\wVec_2}\sketchPolarParam{\wVecPrime_1}\sketchPolarParam{\wVecPrime_2}\big]\nonumber\\
\hashP{\wVec_1} = \hashP{\wVecPrime_1},\\
\hashP{\wVec_2} = \hashP{\wVecPrime_2}
}}\genVParam{\wVec_1} \genVParam{\wVec_2}\polP{\wVec_1}\polP{\wVec_2}\polP{\wVecPrime_1}\polP{\wVecPrime_2}\big]\nonumber\\
&\qquad - \left(\sum_{\wVec \in \pw}\genVParam{\wVec}\right)^2 \label{eq:var-sum-w}.
\end{align}
\begin{Justification}
\hfill
\begin{itemize}
\item \eq{\eqref{eq:var_step-one}} follows from substituting the definition of $\sketch$ and the commutativity of addition. Note the constraint on $\sketchHash$ hashing to the same bucket follows from the definition of $\sketch$. Also, the sum can be rearranged to take each component item in the sum of each bucket and take its sum of products with each of the $\sketchPolar$ mapped to it. This can be done as previously stated, using the commutativity of addition.
\item \eq{\eqref{eq:var_step-one}} follows from substituting the definition of $\sketch$ and the commutativity of addition. Note the constraint on $\hash$ hashing to the same bucket follows from the definition of $\sketch$. Also, the sum can be rearranged to take each component item in the sum of each bucket and take its sum of products with each of the $\pol$ mapped to it. This can be done as previously stated, using the commutativity of addition.
\item \eq{\eqref{eq:var_step-two}} by substituting the definition of variance.
\item \eq{\eqref{eq:var-sum-w}} results from the further evaluation of \eqref{eq:var_step-two}.
\end{itemize}
@ -139,13 +139,13 @@ For the next step, we show that the variance of an estimate is small.%$$\varPara
\begin{Assumption}
\hfill
\begin{itemize}
\item The subsequent evaluations of expectation assume 4-wise independence of $\sketchPolar$.
\item The subsequent evaluations of expectation assume 4-wise independence of $\pol$.
\end{itemize}
\end{Assumption}
Note that four-wise independence is assumed across all four random variables of \eqref{eq:var-sum-w}. Zooming in on the products of the $\sketchPolar$ functions,
Note that four-wise independence is assumed across all four random variables of \eqref{eq:var-sum-w}. Zooming in on the products of the $\pol$ functions,
\begin{equation}
\sketchPolarParam{\wOne}\cdot\sketchPolarParam{\wOneP}\cdot\sketchPolarParam{\wTwo}\cdot\sketchPolarParam{\wTwoP} \label{eq:polar-product}
\polP{\wOne}\cdot\polP{\wOneP}\cdot\polP{\wTwo}\cdot\polP{\wTwoP} \label{eq:polar-product}
\end{equation}
we see that %it can be seen that for $\wOne, \wOneP \in \pw$ and $\wTwo, \wTwoP \in \pw'$, all four random variables in \eqref{eq:polar-product} take their values from $\pw$, although we have iteration over two separate sets $\pw$.
there are five possible sets of $\wVec$ variable combinations. The following sets all assume each $\wVec$ to be from the set $\pw$. For $a, b, c, d \in \{1, 1', 2, 2'\} \st a \neq b \neq c \neq d$:
@ -161,7 +161,7 @@ With four random variables coming from sets containing the same elements, there
The use of variable subscripts in the notation is necessary as different combinations of equal $\wVec$ variables produce different results in the variance computation, as we will see shortly.
Note that each $\wVec$ is the input of the same $\sketchPolar$ function, meaning, that equal worlds will produce the same output.
Note that each $\wVec$ is the input of the same $\pol$ function, meaning, that equal worlds will produce the same output.
We are interested in those particular cases whose expectation does not equal zero, since an expectation of zero will not add to the summation of \eqref{eq:var-sum-w}. In expectation we have that
\begin{align}
@ -169,13 +169,13 @@ We are interested in those particular cases whose expectation does not equal zer
%\st \cOne}}
\polarProdNEq} = 1 \label{eq:polar-prod-all}
\end{align}
since we have the same element of the image of $\sketchPolar$ being multiplied to itself an even number of times. Similarly,
since we have the same element of the image of $\pol$ being multiplied to itself an even number of times. Similarly,
\begin{align}
\forAllW{\distPattern{2}}&\rightarrow\expect{%\sum_{\substack{\elems \\
%\st \cTwo}}
\polarProdEq} = 1 \label{eq:polar-prod-two-and-two}
\end{align}
because the same element of the image of $\sketchPolar$ is being multiplied to itself for each equality, producing a polarity of 1 for each equality, and then a final product of 1. For $\distPattern{3}, \distPattern{4}, \distPattern{5}$, we have a final product of two, three or four independent variables $\in \{-1, 1\}$, thus producing the following results:
because the same element of the image of $\pol$ is being multiplied to itself for each equality, producing a polarity of 1 for each equality, and then a final product of 1. For $\distPattern{3}, \distPattern{4}, \distPattern{5}$, we have a final product of two, three or four independent variables $\in \{-1, 1\}$, thus producing the following results:
\begin{align}
\forAllW{\distPattern{3}}&\rightarrow\expect{%\sum_{\substack{\elems \\
%\st \cThree}}
@ -203,7 +203,7 @@ This is the case because we have that
\begin{align*}
&\sum_{\substack{\wOne, \wOneP, \wTwo, \wTwoP \in \pw \st \\
\wOne = \wTwo = \wOneP = \wTwoP = \wVec}}
\expect{\genVParam{\wVec} \cdot \genVParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVec}}\\
\expect{\genVParam{\wVec} \cdot \genVParam{\wVec} \cdot \polP{\wVec} \cdot \polP{\wVec} \cdot \polP{\wVec} \cdot \polP{\wVec}}\\
= &\sum_{\wVec \in \pw} \expect{\genVParam{\wVec}\cdot \genVParam{\wVec}}\\
= &\sum_{\wVec \in \pw} \expect{\genVParam{\wVec}^2}.
\end{align*}
@ -220,24 +220,24 @@ Considered separately, the subsets result in the following $\var$.
&\wOne = \wOneP \neq \wTwo =\wTwoP \rightarrow\nonumber\\
&\qquad = \sum_{\substack{\wOne, \wOneP, \wTwo, \wTwoP \in \pw \st \\
\wOne = \wOneP = \wVec \neq\\
\wTwo = \wTwoP = \wVecPrime}}\expect{\genVParam{\wVec}\genVParam{\wVecPrime}\sketchPolarParam{\wVec}\sketchPolarParam{\wVec}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVecPrime}} \label{eq:variantOne}\nonumber\\
\wTwo = \wTwoP = \wVecPrime}}\expect{\genVParam{\wVec}\genVParam{\wVecPrime}\polP{\wVec}\polP{\wVec}\polP{\wVecPrime}\polP{\wVecPrime}} \label{eq:variantOne}\nonumber\\
&\qquad = \sum_{\wVec, \wVecPrime \in \pw \st \wVec \neq \wVecPrime}\expect{\genVParam{\wVec}\genVParam{\wVecPrime}}\\
&\wOne = \wTwo \neq \wOneP = \wTwoP \rightarrow\nonumber\\
&\qquad = \sum_{\substack{\wOne, \wOneP, \wTwo, \wTwoP \in \pw \st \\
\wOne = \wTwo = \wVec \neq\\
\wOneP = \wTwoP = \wVecPrime,\\
\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}} \expect{\genVParam{\wVec}\genVParam{\wVec}\sketchPolarParam{\wVec}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}\sketchPolarParam{\wVecPrime}}\nonumber \\
&\qquad = \sum_{\wVec \in \pw}\expect{| \{\wVecPrime \st \wVecPrime \neq \wVec, \sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}\} | \cdot \genVParam{\wVec}^2}\label{eq:variantTwo} \\
\hashP{\wVec} = \hashP{\wVecPrime}}} \expect{\genVParam{\wVec}\genVParam{\wVec}\polP{\wVec}\polP{\wVecPrime}\polP{\wVec}\polP{\wVecPrime}}\nonumber \\
&\qquad = \sum_{\wVec \in \pw}\expect{| \{\wVecPrime \st \wVecPrime \neq \wVec, \hashP{\wVec} = \hashP{\wVecPrime}\} | \cdot \genVParam{\wVec}^2}\label{eq:variantTwo} \\
&\wOne = \wTwoP \neq \wOneP =\wTwo \rightarrow \nonumber \\
&\qquad = \sum_{\substack{\wOne, \wOneP, \wTwo, \wTwoP \in \pw \st \\
\wOne = \wTwoP = \wVec \neq \\
\wOneP = \wTwo = \wVecPrime,\\
\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}}\expect{ \genVParam{\wVec} \genVParam{\wVecPrime}\sketchPolarParam{\wVec}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}} \nonumber \\
\hashP{\wVec} = \hashP{\wVecPrime}}}\expect{ \genVParam{\wVec} \genVParam{\wVecPrime}\polP{\wVec}\polP{\wVecPrime}\polP{\wVecPrime}\polP{\wVec}} \nonumber \\
&\qquad = \sum_{\substack{\wVec, \wVecPrime \in \pw \st \\
\wVec \neq \wVecPrime,\\
\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}}\expect{\genVParam{\wVec}\cdot\kMapParam{\wVecPrime}}\label{eq:variantThree}
\hashP{\wVec} = \hashP{\wVecPrime}}}\expect{\genVParam{\wVec}\cdot\kMapParam{\wVecPrime}}\label{eq:variantThree}
\end{align}
Note that for $\distPattern{22}$, we have the cardinality of a bucket as a multiplicative factor for each squared annotation. This is because of the constraint that $\wOne \neq \wOneP$ coupled with the additional constraint that $\sketchHashParam{\wOne} = \sketchHashParam{\wOneP}$. Since $\wOneP$ must belong to the same bucket as $\wOne$, yet not equal to $\wOne$, we have that each operand of the sum must be the annotation squared for each $\wOneP$ that belongs to the same bucket but is not equal to $\wOne$.
Note that for $\distPattern{22}$, we have the cardinality of a bucket as a multiplicative factor for each squared annotation. This is because of the constraint that $\wOne \neq \wOneP$ coupled with the additional constraint that $\hashP{\wOne} = \hashP{\wOneP}$. Since $\wOneP$ must belong to the same bucket as $\wOne$, yet not equal to $\wOne$, we have that each operand of the sum must be the annotation squared for each $\wOneP$ that belongs to the same bucket but is not equal to $\wOne$.
Looking at $\distPattern{23}$, we have a similar case as $\distPattern{22}$, but this time there is no multiplicative factor since $\wOneP$ and $\wTwoP$ are constrained to equal their opposite $\wVec$ counterparts, which are the arguments for both $\genV$ terms.
@ -259,18 +259,18 @@ With only \eqref{eq:variantTwo} and \eqref{eq:variantThree} remaining, we have
\begin{multline*}
\varParam{\estimate} = \\
\expect{\sum_{\wVec, \wVecPrime \in \pw \st \wVec \neq \wVecPrime}| \{\wVecPrime \st \sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}\} | \cdot \genVParam{\wVec}^2} ~+ \\
\expect{\sum_{\wVec, \wVecPrime \in \pw \st \wVec \neq \wVecPrime}| \{\wVecPrime \st \hashP{\wVec} = \hashP{\wVecPrime}\} | \cdot \genVParam{\wVec}^2} ~+ \\
\expect{\sum_{\substack{\wVec, \wVecPrime \in \pw \st \\
\wVec \neq \wVecPrime,\\
\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}}\genVParam{\wVec}\cdot\kMapParam{\wVecPrime}}.
\hashP{\wVec} = \hashP{\wVecPrime}}}\genVParam{\wVec}\cdot\kMapParam{\wVecPrime}}.
\end{multline*}
%Our current analysis is limited to TIPDBs, where the annotations are in the boolean $\mathbb{B}$ set. Because this is the case, the square of any element is itself.
Computing each term separately gives
\begin{align}
&\expect{\sum_{\wVec \in \pw}\big|~ \{\wVecPrime \st \wVecPrime \neq \wVec, \sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}\} ~\big| \cdot \genVParam{\wVec}^2}\nonumber\\
&~=\sum_{\wVec \in \pw}\genVParam{\wVec}^2 \cdot \expect{\big|~ \{\wVecPrime \st \wVecPrime \neq \wVec, \sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}\} ~\big|}\label{eq:s22-one}\\%\numWorldsP
&\expect{\sum_{\wVec \in \pw}\big|~ \{\wVecPrime \st \wVecPrime \neq \wVec, \hashP{\wVec} = \hashP{\wVecPrime}\} ~\big| \cdot \genVParam{\wVec}^2}\nonumber\\
&~=\sum_{\wVec \in \pw}\genVParam{\wVec}^2 \cdot \expect{\big|~ \{\wVecPrime \st \wVecPrime \neq \wVec, \hashP{\wVec} = \hashP{\wVecPrime}\} ~\big|}\label{eq:s22-one}\\%\numWorldsP
&~=\norm{\genV}^2_2\cdot \left(\frac{|\pw|}{\sketchCols} - 1\right)\label{eq:spaceOne}
\end{align}
@ -278,31 +278,31 @@ Computing each term separately gives
\hfill
\begin{itemize}
\item \eqref{eq:s22-one} follows from linearity of expectation.
\item \eqref{eq:spaceOne} follows from the uniform distribution of $\sketchHash$.
\item \eqref{eq:spaceOne} follows from the uniform distribution of $\hash$.
\end{itemize}
\end{Justification}
\begin{Assumption}
\hfill
\begin{itemize}
\item $\sketchHash$ must be uniformally distributed.
\item $\hash$ must be uniformally distributed.
\end{itemize}
\end{Assumption}
\begin{align}
&\expect{ \sum_{\substack{\wVec, \wVecPrime \in \pw \st \\
\wVec \neq \wVecPrime,\\
\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}}\genVParam{\wVec}\cdot\kMapParam{\wVecPrime}}\nonumber \\
\hashP{\wVec} = \hashP{\wVecPrime}}}\genVParam{\wVec}\cdot\kMapParam{\wVecPrime}}\nonumber \\
&~= \expect{\sum_{\wVec \in \pw}\genVParam{\wVec} \cdot \sum_{\substack{\wVecPrime \in\pw \st\\
\wVecPrime \neq \wVec,\\
\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}}\genVParam{\wVecPrime}} \label{eq:s23-one} \\
\hashP{\wVec} = \hashP{\wVecPrime}}}\genVParam{\wVecPrime}} \label{eq:s23-one} \\
%\numWorldsP \cdot \frac{\numWorldsP - 1}{\sketchCols}\label{eq:spaceTwo}.
&~= \expect{\sum_{\wVec \in \pw}\genVParam{\wVec} \cdot \big((\sum_{\substack{\wVecPrime \in\pw \st\\
\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}} }\genVParam{\wVecPrime } )- \genVParam{\wVec}\big)} \nonumber \\
\hashP{\wVec} = \hashP{\wVecPrime}} }\genVParam{\wVecPrime } )- \genVParam{\wVec}\big)} \nonumber \\
&~=\expect{\left(\sum_{\wVec \in \pw}\genVParam{\wVec} \cdot \sum_{\substack{\wVecPrime \in \pw \st \\
\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}}\genVParam{\wVecPrime}\right) - \sum_{\wVec \in \pw}\genVParam{\wVec}^2}\label{eq:s23-two}\\
\hashP{\wVec} = \hashP{\wVecPrime}}}\genVParam{\wVecPrime}\right) - \sum_{\wVec \in \pw}\genVParam{\wVec}^2}\label{eq:s23-two}\\
&~=\expect{\sum_{\wVec \in \pw}\genVParam{\wVec} \cdot \sum_{\substack{\wVecPrime \in \pw \st \\
\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}}\genVParam{\wVecPrime}} - \expect{\sum_{\wVec \in \pw}\genVParam{\wVec}^2}\label{eq:s23-three} \\
\hashP{\wVec} = \hashP{\wVecPrime}}}\genVParam{\wVecPrime}} - \expect{\sum_{\wVec \in \pw}\genVParam{\wVec}^2}\label{eq:s23-three} \\
&~\leq\norm{\genV}_1 \cdot \frac{\norm{\genV}_1}{\sketchCols} - \expect{\sum_{\wVec \in \pw}\genVParam{\wVec}^2}\label{eq:s23-four} \\
&~\leq\frac{\norm{\genV}_1^2 - \norm{\genV}_2^2}{\sketchCols} \label{eq:spaceTwo}.
%&\norm{\genV}\prob \cdot \frac{\norm{\genV}\prob - \frac{\norm{\genV}}{\numWorlds}}{\sketchCols}\label{eq:spaceTwo}.
@ -315,7 +315,7 @@ Computing each term separately gives
\item \eqref{eq:s23-one} is an equivalent representation of the LHS.
\item \eqref{eq:s23-two} follows from the fact that $\wVec \neq \wVecPrime$.
\item \eqref{eq:s23-three} is the result of distributing the multiplication over the terms in the parenthesis.
\item \eqref{eq:s23-four} follows since $\sum\limits_{\wVec \in \pw}\genVParam{\wVec} = \norm{\genV}_1$; the second term also relies the preceding fact and the assumption of uniform distribution of $\sketchHash$.
\item \eqref{eq:s23-four} follows since $\sum\limits_{\wVec \in \pw}\genVParam{\wVec} = \norm{\genV}_1$; the second term also relies the preceding fact and the assumption of uniform distribution of $\hash$.
\item \eqref{eq:spaceTwo} is the result of the multiplication of the first two terms in \eqref{eq:s23-four} and that $\sum\limits_{\wVec \in \pw}\genVParam{\wVec}^2 = \norm{\genV}_2^2$.
\end{itemize}
\end{Justification}
@ -324,7 +324,7 @@ Computing each term separately gives
\begin{Assumption}
\hfill
\begin{itemize}
\item $\sketchHash$ must be pairwise independent.
\item $\hash$ must be pairwise independent.
\end{itemize}
\end{Assumption}
%In both equations, the sum of $\genVParam{\wVec}$ over all $\wVec \in \pw$ is $\numWorldsP$ since as noted in equation \eqref{eq:mu} we are summing the number of worlds a tuple $t$ appears in, and for a TIPDB, that is exactly 2 to the power of the number of tuples in the TIPDB (due to the independence of tuples) times tuple $t$'s probability.

View File

@ -13,61 +13,75 @@ By \eqref{eq:sub-bounds-final} it immediately follows that adding $n$ base (base
\]
\subsection{Multiplying Sketches}
There are various ways we can consider the multiplication of sketches. First, estimates might be multiplied, second, the sketches can be multiplied pointwise, taking then the estimate of the resultant sketch, and finally we consider an estimate simply as the multiplication of corresponding buckets. Stated formally the above is
There are various ways we might 'consider' the multiplication of sketches. First, estimates might be multiplied, second, the sketches can be multiplied pointwise, taking then the estimate of the resultant sketch (this is the correct way), and finally we could consider an estimate to be the multiplication of corresponding buckets. Stated formally the above variations are
\begin{align*}
&\est{1} = \sum_{\wVec \in \pw}\sCom{1}{\sketchHashParam{\wVec}}\sketchPolarParam{\wVec} \cdot \sCom{2}{\sketchHashParam{\wVec}}\sketchPolarParam{\wVec}\\
&\est{2} = \sum_{\wVec \in \pw }\left(\sCom{1}{\sketchHashParam{\wVec}} \cdot \sCom{2}{\sketchHashParam{\wVec}}\right)\sketchPolarParam{\wVec}\\
&\est{1} = \sum_{\wVec \in \pw}\sCom{1}{\hashP{\wVec}}\polP{\wVec} \cdot \sCom{2}{\hashP{\wVec}}\polP{\wVec}\\
&\est{2} = \sum_{\wVec \in \pw }\left(\sCom{1}{\hashP{\wVec}} \cdot \sCom{2}{\hashP{\wVec}}\right)\polP{\wVec}\\
&\est{3} = \sum_{j \in \sketchCols}\sCom{1}{j} \cdot \sCom{2}{j}.
\end{align*}
Calculating the expectation for $\est{1}$ evaluates to
\begin{align*}
&\expect{\sum_{\wVec \in \pw}\sCom{1}{\sketchHashParam{\wVec}}\sketchPolarParam{\wVec} \cdot \sCom{2}{\sketchHashParam{\wVec}}\sketchPolarParam{\wVec}}\\
=& \expect{\sum_{\wVec \in \pw}\sketchPolarParam{\wVec}\sketchPolarParam{\wVec}\sum_{\substack{\wVecPrime \in \pw \st\\ \sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}} \genV_1\paramBox{\wVecPrime}\sketchPolarParam{\wVecPrime} \sum_{\substack{\wVecPrime \in \pw \st\\ \sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\genV_2\paramBox{\wVecPrime}\sketchPolarParam{\wVecPrime}}\\
=& \mathbb{E}\big[\sum_{\wVec \in \pw}\sketchPolarParam{\wVec}\sketchPolarParam{\wVec}\left(\sum_{\substack{\wVecPrime \in \pw \st\\
\wVecPrime \neq \wVec}} \genV_1\paramBox{\wVecPrime}\sketchPolarParam{\wVecPrime} + \genV_1\paramBox{\wVec}\sketchPolarParam{\wVec}\right)\\
&\expect{\sum_{\wVec \in \pw}\sCom{1}{\hashP{\wVec}}\polP{\wVec} \cdot \sCom{2}{\hashP{\wVec}}\polP{\wVec}}\\
=& \expect{\sum_{\wVec \in \pw}\polP{\wVec}\polP{\wVec}\sum_{\substack{\wVecPrime \in \pw \st\\ \hashP{\wVecPrime} = \hashP{\wVec}}} \genV_1\paramBox{\wVecPrime}\polP{\wVecPrime} \sum_{\substack{\wVecPrime \in \pw \st\\ \hashP{\wVecPrime} = \hashP{\wVec}}}\genV_2\paramBox{\wVecPrime}\polP{\wVecPrime}}\\
=& \mathbb{E}\big[\sum_{\wVec \in \pw}\polP{\wVec}\polP{\wVec}\left(\sum_{\substack{\wVecPrime \in \pw \st\\
\wVecPrime \neq \wVec}} \genV_1\paramBox{\wVecPrime}\polP{\wVecPrime} + \genV_1\paramBox{\wVec}\polP{\wVec}\right)\\
& \qquad \left(\sum_{\substack{\wVecPrime \in \pw \st\\
\wVecPrime \neq \wVec}} \genV_2\paramBox{\wVecPrime}\sketchPolarParam{\wVecPrime} + \genV_2\paramBox{\wVec}\sketchPolarParam{\wVec}\right)\big]\\
=& \expect{\sum_{\wVec \in \pw}\sketchPolarParam{\wVec}\sketchPolarParam{\wVec}\genV_1\paramBox{\wVec}\sketchPolarParam{\wVec}\genV_2\paramBox{\wVec}\sketchPolarParam{\wVec}}\\
\wVecPrime \neq \wVec}} \genV_2\paramBox{\wVecPrime}\polP{\wVecPrime} + \genV_2\paramBox{\wVec}\polP{\wVec}\right)\big]\\
=& \expect{\sum_{\wVec \in \pw}\polP{\wVec}\polP{\wVec}\genV_1\paramBox{\wVec}\polP{\wVec}\genV_2\paramBox{\wVec}\polP{\wVec}}\\
=& \genV_1\paramBox{\wVec}\genV_2\paramBox{\wVec}.
\end{align*}
This result is consistent for an arbitrary number of sketches in the product.
In expectation $\est{2}$ results in
\begin{align*}
&\expect{\sum_{\wVec \in \pw }\left(\sCom{1}{\sketchHashParam{\wVec}} \cdot \sCom{2}{\sketchHashParam{\wVec}}\right)\sketchPolarParam{\wVec}}\\
&\expect{\sum_{\wVec \in \pw }\left(\sCom{1}{\hashP{\wVec}} \cdot \sCom{2}{\hashP{\wVec}}\right)\polP{\wVec}}\\
= &\expect{\sum_{\wVec \in \pw}\left(\sum_{\substack{\wVecPrime \in \pw \st\\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}} \genV_1\paramBox{\wVecPrime}\sketchPolarParam{\wVecPrime}\sum_{\substack{\wVecPrime \in \pw \st\\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\genV_2\paramBox{\wVecPrime}\sketchPolarParam{\wVecPrime}\right)\sketchPolarParam{\wVec}}\\
= &\mathbb{E}\big[\sum_{\wVec \in \pw}\sketchPolarParam{\wVec}\left(\sum_{\substack{\wVecPrime \in \pw \st\\ \sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}\\\wVecPrime \neq \wVec}}\genV_1\paramBox{\wVecPrime}\sketchPolarParam{\wVecPrime} + \genV_1\paramBox{\wVec}\sketchPolarParam{\wVec}\right)\\
&\qquad\left(\sum_{\substack{\wVecPrime \in \pw \st\\ \sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}\\\wVecPrime \neq \wVec}}\genV_2\paramBox{\wVecPrime}\sketchPolarParam{\wVecPrime} + \genV_2\paramBox{\wVec}\sketchPolarParam{\wVec}\right)\big]\\
= &\expect{\sum_{\wVec \in \pw}\sketchPolarParam{\wVec}\genV_1\paramBox{\wVec}\sketchPolarParam{\wVec}\genV_2\paramBox{\wVec}\sketchPolarParam{\wVec}}\\
\hashP{\wVecPrime} = \hashP{\wVec}}} \genV_1\paramBox{\wVecPrime}\polP{\wVecPrime}\sum_{\substack{\wVecPrime \in \pw \st\\
\hashP{\wVecPrime} = \hashP{\wVec}}}\genV_2\paramBox{\wVecPrime}\polP{\wVecPrime}\right)\polP{\wVec}}\\
= &\mathbb{E}\big[\sum_{\wVec \in \pw}\polP{\wVec}\left(\sum_{\substack{\wVecPrime \in \pw \st\\ \hashP{\wVecPrime} = \hashP{\wVec}\\\wVecPrime \neq \wVec}}\genV_1\paramBox{\wVecPrime}\polP{\wVecPrime} + \genV_1\paramBox{\wVec}\polP{\wVec}\right)\\
&\qquad\left(\sum_{\substack{\wVecPrime \in \pw \st\\ \hashP{\wVecPrime} = \hashP{\wVec}\\\wVecPrime \neq \wVec}}\genV_2\paramBox{\wVecPrime}\polP{\wVecPrime} + \genV_2\paramBox{\wVec}\polP{\wVec}\right)\big]\\
= &\expect{\sum_{\wVec \in \pw}\polP{\wVec}\genV_1\paramBox{\wVec}\polP{\wVec}\genV_2\paramBox{\wVec}\polP{\wVec}}\\
= & 0.
\end{align*}
Note that with an odd number of sketches being multiplied, such as 3, we would get an expectation equal to the ground truth
\begin{align*}
= &\expect{\sum_{\wVec \in \pw}\sketchPolarParam{\wVec}\genV_1\paramBox{\wVec}\sketchPolarParam{\wVec}\genV_2\paramBox{\wVec}\sketchPolarParam{\wVec}\genV_3\paramBox{\wVec}\sketchPolarParam{\wVec}}\\
= &\expect{\sum_{\wVec \in \pw}\polP{\wVec}\genV_1\paramBox{\wVec}\polP{\wVec}\genV_2\paramBox{\wVec}\polP{\wVec}\genV_3\paramBox{\wVec}\polP{\wVec}}\\
= &\genV_1\paramBox{\wVec}\genV_2\paramBox{\wVec}\genV_3\paramBox{\wVec}.
\end{align*}
For $\est{3}$, multiplying an even number of sketches yields
\begin{align*}
&\expect{\sum_{j \in \sketchCols}\sCom{1}{j} \cdot \sCom{2}{j}}\\
=&\expect{\sum_{j \in \sketchCols}\left(\sum_{\substack{\wVec \in \pw \st\\\sketchHashParam{\wVec} = j}}\gVP{1}{\wVec}\sketchPolarParam{\wVec}\cdot \sum_{\substack{\wVecPrime \in \pw \st\\\sketchHashParam{\wVecPrime} = j}}\gVP{2}{\wVecPrime}\sketchPolarParam{\wVecPrime}\right)}\\
=&\expect{\sum_{j \in \sketchCols}\sum_{\substack{\wVec, \wVecPrime \in \pw \st\\\sketchHashParam{\wVec} = j\\\wVec = \wVecPrime}}\gVP{1}{\wVec}\gVP{2}{\wVec}\sketchPolarParam{\wVec}\sketchPolarParam{\wVec}\sum_{\substack{\wVec, \wVecPrime \in \pw \st\\\sketchHashParam{\wVec} = j\\\wVec \neq \wVecPrime}}\gVP{1}{\wVec}\gVP{2}{\wVecPrime}\sketchPolarParam{\wVec}\sketchPolarParam{\wVecPrime}}\\
=&\expect{\sum_{j \in \sketchCols}\left(\sum_{\substack{\wVec \in \pw \st\\\hashP{\wVec} = j}}\gVP{1}{\wVec}\polP{\wVec}\cdot \sum_{\substack{\wVecPrime \in \pw \st\\\hashP{\wVecPrime} = j}}\gVP{2}{\wVecPrime}\polP{\wVecPrime}\right)}\\
=&\expect{\sum_{j \in \sketchCols}\sum_{\substack{\wVec, \wVecPrime \in \pw \st\\\hashP{\wVec} = j\\\wVec = \wVecPrime}}\gVP{1}{\wVec}\gVP{2}{\wVec}\polP{\wVec}\polP{\wVec}\sum_{\substack{\wVec, \wVecPrime \in \pw \st\\\hashP{\wVec} = j\\\wVec \neq \wVecPrime}}\gVP{1}{\wVec}\gVP{2}{\wVecPrime}\polP{\wVec}\polP{\wVecPrime}}\\
=&\expect{\sum_{\wVec \in \pw}\gVP{1}{\wVec}\gVP{2}{\wVec}}\\
=&\gVP{1}{\wVec}\gVP{2}{\wVec}
\end{align*}
Following the reversal of the pattern of $\est{2}$, an odd number of sketches would produce an expectation of $0$, since each product in the sum has an operand whose expectation evaluates to $0$, as seen in the following,
\begin{align*}
&\expect{\sum_{\wVec \in \pw}\gVP{1}{\wVec}\polP{\wVec} \cdot \sum_{\wVecPrime \in \pw}\gVP{2}{\wVecPrime}\polP{\wVecPrime}\cdot\sum_{\wVec'' \in \pw}\gVP{3}{\wVec''}\polP{\wVec''}}\\
= &\mathbb{E}\big[\sum_{\wVec \in \pw}\gVP{1}{\wVec}\polP{\wVec} \left(\gVP{2}{\wVec}\polP{\wVec} + \sum_{\substack{\wVecPrime \in \pw \st\\\wVecPrime \neq \wVec}}\gVP{2}{\wVecPrime}\polP{\wVecPrime}\right)\\
&\qquad \left(\gVP{3}{\wVec}\polP{\wVec} + \sum_{\substack{\wVecPrime' \in \pw \st\\\wVecPrime' \neq \wVec}}\gVP{3}{\wVecPrime'}\polP{\wVecPrime'}\right)\big]\\
= &\mathbb{E}\big[\sum_{\w \in \pw}\gVP{1}{\w}\polP{\w}\gVP{2}{\w}\polP{\w}\gVP{3}{\w}\polP{\w} + \\
&\qquad \gVP{1}{\w}\polP{\w}\gVP{2}{\w}\polP{\w}\sum_{\substack{\w'' \in \pw\st\\\w''\neq\w}}\gVP{3}{\w''}\polP{\w''} + \\
&\qquad\gVP{1}{\w}\polP{\w}\gVP{3}{\w}\polP{\w}\sum_{\substack{\wVecPrime \in \pw\st\\\wVecPrime\neq\w\\}}\gVP{2}{\wVecPrime}\polP{\wVecPrime} + \\
&\qquad \gVP{1}{\w}\polP{\w}\sum_{\substack{\wVecPrime \in \pw\st\\\wVecPrime\neq\w\\}}\gVP{2}{\wVecPrime}\polP{\wVecPrime}\sum_{\substack{\w'' \in \pw\st\\\w''\neq\w}}\gVP{3}{\w''}\polP{\w''}\big] \\
= & 0.
\end{align*}
For the case of multiplication, when assumming independent variables, it is a known result that
\[
\varParam{X \cdot Y} = \expect{X^2}\expect{Y^2} - (\expect{X})^2 (\expect{Y})^2.
\]
It is necessary then to calculate the expectation of the square of the sum of estimates. Assuming discreet variables the expectation of the square of a random variable is simply the sum of its weighted squares. This yields
\begin{align}
&\expect{\left(\sum_{\wVec \in \pw}\sketchJParam{\sketchHashParam{\wVec}}\cdot \sketchPolarParam{\wVec}\right)^2}\label{eq:rand-sq}\\
=& \sum_{\wVec \in \pw}\expect{\left(\sketchJParam{\sketchHashParam{\wVec}}\cdot\sketchPolarParam{\wVec}\right)^2}\label{eq:rand-sq-ex-push}\\
&\expect{\left(\sum_{\wVec \in \pw}\sketchJParam{\hashP{\wVec}}\cdot \polP{\wVec}\right)^2}\label{eq:rand-sq}\\
=& \sum_{\wVec \in \pw}\expect{\left(\sketchJParam{\hashP{\wVec}}\cdot\polP{\wVec}\right)^2}\label{eq:rand-sq-ex-push}\\
=& \sum_{\wVec \in \pw}\expect{\left(\sum_{\substack{\wVecPrime \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}} \genVParam{\wVecPrime}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}\right)^2}\label{eq:rand-sq-equiv}\\
=& \sum_{\wVec \in \pw}\expect{\left(\genVParam{\wVec}^2\sketchPolarParam{\wVec}^2 + \sum_{\substack{\wVecPrime \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec},
\wVecPrime \neq \wVec}} \genVParam{\wVecPrime}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}\right)^2}\label{eq:rand-sq-assoc}\\
\hashP{\wVecPrime} = \hashP{\wVec}}} \genVParam{\wVecPrime}\polP{\wVecPrime}\polP{\wVec}\right)^2}\label{eq:rand-sq-equiv}\\
=& \sum_{\wVec \in \pw}\expect{\left(\genVParam{\wVec}^2\polP{\wVec}^2 + \sum_{\substack{\wVecPrime \in \pw \st \\
\hashP{\wVecPrime} = \hashP{\wVec},
\wVecPrime \neq \wVec}} \genVParam{\wVecPrime}\polP{\wVecPrime}\polP{\wVec}\right)^2}\label{eq:rand-sq-assoc}\\
=& \sum_{\wVec \in \pw}\expect{\genVParam{\wVec}^2}\label{eq:rand-sq-reduce}\\
=& \sum_{\wVec \in \pw}\genVParam{\wVec}^2\label{eq:rand-sq-final}.
\end{align}
@ -79,13 +93,13 @@ It is necessary then to calculate the expectation of the square of the sum of es
\item \eqref{eq:rand-sq-ex-push} is the sum of weighted squares, or alternatively, pushes the expectation inside the summation by linearity of expectation.
\item \eqref{eq:rand-sq-equiv} substitutes the definition of a sketch bucket.
\item \eqref{eq:rand-sq-assoc} uses associativity to rearrange the operands of the sum.
\item \eqref{eq:rand-sq-reduce} reduces the second term of \eqref{eq:rand-sq-assoc} to $0$ by the property of uniform distribution of $\sketchPolar$.
\item \eqref{eq:rand-sq-reduce} reduces the second term of \eqref{eq:rand-sq-assoc} to $0$ by the property of uniform distribution of $\pol$.
\item \eqref{eq:rand-sq-final} is obtained by the fact that the expectation of $\genVParam{\wVec}$ is simply itself.
\end{itemize}
\end{Justification}
\begin{Assumption}
\hfill
\begin{itemize}\item Uniform distribution of both $\sketchHash$ and $\sketchPolar$.\end{itemize}
\begin{itemize}\item Uniform distribution of both $\hash$ and $\pol$.\end{itemize}
\end{Assumption}
It then follows that the variance corresponding to the muliplication of two base sketches is
\begin{align}

View File

@ -5,9 +5,9 @@
\subsection{Algorithm for $\gIJ$}
\begin{algorithmic}
\ForAll{$\wVec \in \pw$}
\If{$\sketchHashParam{\wVec} = \buck$}
%\If{$\sketchPolarParam{\wVec} = 1$}
\State $\polSum \mathrel{+}=\sketchPolarParam{\wVec}$%+= 1$
\If{$\hashP{\wVec} = \buck$}
%\If{$\polP{\wVec} = 1$}
\State $\polSum \mathrel{+}=\polP{\wVec}$%+= 1$
%\Else
% \State $\gIJ -= 1$
%\EndIf
@ -36,7 +36,7 @@
\subsection{Algorithm for Initialization}
\begin{algorithmic}
\ForAll{$\wVec \in \pw \st \kMapParam{\wVec} = 1$}
\State $\sketchJParam{\sketchHashParam{\wVec}} = \sketchPolarParam{\wVec}$
\State $\sketchJParam{\hashP{\wVec}} = \polP{\wVec}$
\EndFor.
\end{algorithmic}
%Non-generic Algorithm

View File

@ -8,35 +8,35 @@ From my discussion with the folks here at the workshop the requirement (3) seems
\subsection{Requirements}
As mentioned in section $\ref{sec:notation}$, we define our row wise hash functions (bucket mapping and polarity) as follows:
\begin{align*}
\sketchHash&: \pw \to \sketchCols \\
\sketchPolar&: \pw \to \{-1, 1\}.
\hash&: \pw \to \sketchCols \\
\pol&: \pw \to \{-1, 1\}.
\end{align*}
We require that $\sketchHash$ be pairwise independent and $\sketchPolar$ 4-wise independent.
We require that $\hash$ be pairwise independent and $\pol$ 4-wise independent.
Turning to the computation of the exact values of
\begin{equation}
\sum\limits_{\wVec \in \pw } \sketchJParam{\sketchHashParam{\wVec}}\sketchPolarParam{\wVec} =
\sum\limits_{\wVec \in \pw } \kMapParam{\wVec}\sketchPolarParam{\wVec}
\sum\limits_{\wVec \in \pw } \sketchJParam{\hashP{\wVec}}\polP{\wVec} =
\sum\limits_{\wVec \in \pw } \kMapParam{\wVec}\polP{\wVec}
\sum_{\substack{\wVecPrime \in \pw\st\\
\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}} \sketchPolarParam{\wVecPrime}\label{eq:exact-results} .
\hashP{\wVec} = \hashP{\wVecPrime}}} \polP{\wVecPrime}\label{eq:exact-results} .
\end{equation}
Starting with the latter term $\gIJ = \sum\limits_{\wVecPrime \in \pw}\sketchPolarParam{\wVecPrime}$, by the definition of the image of $\sketchPolar$ and the property of associativity in addition, we can break the sum into
Starting with the latter term $\gIJ = \sum\limits_{\wVecPrime \in \pw}\polP{\wVecPrime}$, by the definition of the image of $\pol$ and the property of associativity in addition, we can break the sum into
\begin{equation*}
\gIJ = \sum_{\substack{\wVecPrime \in \pw \st\\
\sketchPolarParam{\wVecPrime} = 0}} 1 + \sum_{\substack{\wVecPrime \in \pw \st\\
\sketchPolarParam{\wVecPrime} = 1}} -1.
\polP{\wVecPrime} = 0}} 1 + \sum_{\substack{\wVecPrime \in \pw \st\\
\polP{\wVecPrime} = 1}} -1.
\end{equation*}
Setting the terms to $T_1 = \sum\limits_{\substack{\wVecPrime \in \pw \st\\
\sketchPolarParam{\wVecPrime} = 0}} 1$ and $T_2 = \sum\limits_{\substack{\wVecPrime \in \pw \st\\
\sketchPolarParam{\wVecPrime} = 1}} -1$ and fixing $\buck \in \{0,1\}^\lenB$ (with $\lenB = \log\sketchCols$) to a specific value, gives a system of linear equations for each term. It is a known result given a consistent matrix multiplication that the number of solutions are $| \kDom |^{\numTup - rank(\matrixH')}$, where $\kDom$ is the set being considered. For $\kDom = \mathbb{B}$ this gives us an exact calculation for both terms,
\polP{\wVecPrime} = 0}} 1$ and $T_2 = \sum\limits_{\substack{\wVecPrime \in \pw \st\\
\polP{\wVecPrime} = 1}} -1$ and fixing $\buck \in \{0,1\}^\lenB$ (with $\lenB = \log\sketchCols$) to a specific value, gives a system of linear equations for each term. It is a known result given a consistent matrix multiplication that the number of solutions are $| \kDom |^{\numTup - rank(\matrixH')}$, where $\kDom$ is the set being considered. For $\kDom = \mathbb{B}$ this gives us an exact calculation for both terms,
\begin{align*}
T_1 = |\{\wVec \st \matrixH' \cdot \wVec = \buck^{(0)}\}|\rightarrow T_1 \in \{0, 2^{\numTup - rank(\matrixH')}\},\\
T_2 = |\{\wVec \st \matrixH' \cdot \wVec = \buck^{(1)}\}|\rightarrow T_2 \in \{0, 2^{\numTup - rank(\matrixH')}\},
\end{align*}
where the notation $\jpbit{y}$ denotes the polarity bit $\lenB$ value of the $\buck$ bucket identifier, specifically $\buck(b)$, such that $\buck(b)\in \{0, 1\}$. For each bucket $\buck$, we therefore want to compute the following quantitity $\mathrm{poly}(N)$ time, or an approximation thereof:
\[
\big|\{ \wVec \in \pw \st \sketchHashParam{\wVec} = \buck, \sketchPolarParam{\wVec} = 1 \}\big| - \big|\{\wVec \in \pw \st \sketchHashParam{\wVec} = \buck, \sketchPolarParam{\wVec} = -1\}\big|.
\big|\{ \wVec \in \pw \st \hashP{\wVec} = \buck, \polP{\wVec} = 1 \}\big| - \big|\{\wVec \in \pw \st \hashP{\wVec} = \buck, \polP{\wVec} = -1\}\big|.
\]
We refer to the above quantity as $\polSum$.
@ -57,13 +57,13 @@ Examining the former term of equation \eqref{eq:exact-results}, we fix $\kMap{t}
\end{equation*}}
%Therefore, by definition we have
%\begin{equation*}
%\sum_{\wVec \in \pw}\sketchJParam{\sketchHashParam{\wVec}} = \sum_{\wVec \in \pw}\kMapParam{\wVec}\sketchPolarParam{\wVec},
%\sum_{\wVec \in \pw}\sketchJParam{\hashP{\wVec}} = \sum_{\wVec \in \pw}\kMapParam{\wVec}\polP{\wVec},
%\end{equation*}
Using the same argument as in $\gIJ$ yields
\begin{equation*}
\sum_{\wVec \in \pw \st \sketchPolarParam{\wVec} = 0}\kMapParam{\wVec} - \sum_{\wVec \in \pw \st \sketchPolarParam{\wVec} = 1}\kMapParam{\wVec}.
\sum_{\wVec \in \pw \st \polP{\wVec} = 0}\kMapParam{\wVec} - \sum_{\wVec \in \pw \st \polP{\wVec} = 1}\kMapParam{\wVec}.
\end{equation*}
Setting $T_3 = \sum\limits_{\wVec \in \pw \st \sketchPolarParam{\wVec} = 0}\kMapParam{\wVec}$, $T_4 = \sum\limits_{\wVec \in \pw \st \sketchPolarParam{\wVec} = 1}\kMapParam{\wVec}$ gives an exact calculation for each term given a fixed $\buck$:
Setting $T_3 = \sum\limits_{\wVec \in \pw \st \polP{\wVec} = 0}\kMapParam{\wVec}$, $T_4 = \sum\limits_{\wVec \in \pw \st \polP{\wVec} = 1}\kMapParam{\wVec}$ gives an exact calculation for each term given a fixed $\buck$:
\begin{equation*}
T_3 = \gVt{\sum_{\substack{k \in \{\wVec \st \\
\matrixH \cdot \wVec = \buck^{(0)},\\
@ -83,20 +83,20 @@ As with world identification, bucket identification can be viewed as a binary ve
h_{i, \lenB, 0} &\cdots &h_{\lenB, \numTup}\\
\end{pmatrix*}.
\end{equation*}
We can then define the row hash function $\sketchHash$ that maps input to buckets as the multiplication of the matrix $\matrixH \cdot \wVec = \jVec$ , as
We can then define the row hash function $\hash$ that maps input to buckets as the multiplication of the matrix $\matrixH \cdot \wVec = \jVec$ , as
\begin{equation*}
\hVecMatrix \cdot \vecCol{w}{\numTup} = \vecCol{j}{\lenB - 1},
\end{equation*}
or equivalently
\begin{equation*}
\sketchHashParam{\wVec} \coloneq (\forall i \in [\lenB], j_i = \langle\textbf{h}_{i, k}, \wVec\rangle) = \buck
\hashP{\wVec} \coloneq (\forall i \in [\lenB], j_i = \langle\textbf{h}_{i, k}, \wVec\rangle) = \buck
\end{equation*}
Polarity function $\sketchPolar$ can be analogously defined as the inner product of a precomputed vector (abusing notation) $\mathbf{\sketchPolar}$ and $\wVec$,
Polarity function $\pol$ can be analogously defined as the inner product of a precomputed vector (abusing notation) $\mathbf{\pol}$ and $\wVec$,
\begin{equation*}
\sketchPolarParam{\wVec} \coloneq \langle\mathbf{\sketchPolar}, \wVec\rangle
\polP{\wVec} \coloneq \langle\mathbf{\pol}, \wVec\rangle
\end{equation*}
Augmenting $\matrixH$ to $\matrixH$' by adding $\mathbf{\sketchPolar}$ as an additional row in $\matrixH$ gives
Augmenting $\matrixH$ to $\matrixH$' by adding $\mathbf{\pol}$ as an additional row in $\matrixH$ gives
\begin{equation*}
\matrixH' = \begin{pmatrix*}[l]
h_{i, 0, 0}&\cdots &h_{0, \numTup} \\

View File

@ -12,10 +12,10 @@
\newcommand{\sCom}[2]{\mathcal{S}_{#1}\paramBox{i}\paramBox{#2}}
\newcommand{\sketchCols}{B}
\newcommand{\sketchRows}{M}
\newcommand{\sketchHash}[1][i]{h_{#1}}
\newcommand{\sketchHashParam}[1]{\sketchHash\paramBox{#1}}
\newcommand{\sketchPolar}[1][i]{s_{#1}}
\newcommand{\sketchPolarParam}[1]{\sketchPolar\paramBox{#1}}
\newcommand{\hash}[1][i]{h_{#1}}
\newcommand{\hashP}[1]{\hash\paramBox{#1}}
\newcommand{\pol}[1][i]{s_{#1}}
\newcommand{\polP}[1]{\pol\paramBox{#1}}
\newcommand{\gIJ}{\gamma\paramBox{i}\paramBox{j}}
\newcommand{\buck}{\textbf{j}}
\newcommand{\jVec}{\textbf{j}}
@ -33,7 +33,7 @@
{#1}_{#2}
\end{pmatrix}}
\newcommand{\jpbit}[1]{\buck^{(#1)}}
\newcommand{\polSum}{Bias(j, \sketchHash, \sketchPolar)}
\newcommand{\polSum}{Bias(j, \hash, \pol)}
%
%TIDB
%
@ -72,8 +72,8 @@
%%%%%%%%%%%%%%%%
%4-way cases
%%%%%%%%%%%%%%%%
\newcommand{\polarProdNEq}{\sketchPolarParam{\wOne}\cdot\sketchPolarParam{\wOneP}\cdot\sketchPolarParam{\wTwo}\cdot\sketchPolarParam{\wTwoP}}%
\newcommand{\polarProdEq}{\sketchPolarParam{\wa}\cdot\sketchPolarParam{\wb}\cdot\sketchPolarParam{\wc}\cdot\sketchPolarParam{\wVecD}}%
\newcommand{\polarProdNEq}{\polP{\wOne}\cdot\polP{\wOneP}\cdot\polP{\wTwo}\cdot\polP{\wTwoP}}%
\newcommand{\polarProdEq}{\polP{\wa}\cdot\polP{\wb}\cdot\polP{\wc}\cdot\polP{\wVecD}}%
\newcommand{\elems}{\wa, \wb, \wc, \wVecD}
\newcommand{\nElems}{\wOne, \wOneP, \wTwo, \wTwoP}
\newcommand{\forAllW}[1]{\forall (\elems) \in {#1}}
@ -112,22 +112,22 @@
% Equations
%%%%%%%%%%%%%%%%%
\newcommand{\polarFuncSum}[1][]{\sum_{\substack{\wVecPrime ~|~ \\
\sketchHash\left[\wVecPrime\right] = j\\
{#1}}}\sketchPolarParam{\wVecPrime}}
\hash\left[\wVecPrime\right] = j\\
{#1}}}\polP{\wVecPrime}}
\newcommand{\estimate}{\sum_{j \in \sketchCols} \sketchIj \cdot \polarFuncSum }
\newcommand{\estExpOne}{\sum_{\substack{j \in \sketchCols, \\
\wVec \in \pw ~|~\sketchHash\left[\wVec\right] = j}} \kMap{t} \cdot\sketchPolarParam{\wVec} \cdot \polarFuncSum}
\wVec \in \pw ~|~\hash\left[\wVec\right] = j}} \kMap{t} \cdot\polP{\wVec} \cdot \polarFuncSum}
\newcommand{\estTwo}{\sum_{\substack{j \in [B],\\
\wVec \in \pw~|~ \sketchHash{[\wVec]} = j,\\
\wVec[w']\in \pw~|~ \sketchHash{[\wVec[w']]} = j} } v_t[\wVec] \cdot s_i[\wVec] \cdot s_i[\wVec[w']]}
\wVec \in \pw~|~ \hash{[\wVec]} = j,\\
\wVec[w']\in \pw~|~ \hash{[\wVec[w']]} = j} } v_t[\wVec] \cdot s_i[\wVec] \cdot s_i[\wVec[w']]}
\newcommand{\estExp}{ \sum_{\substack{j \in [B],\\
\wVec~|~\sketchHashParam{\wVec}= j,\\}} \kMapParam{\wVec}}
\wVec~|~\hashP{\wVec}= j,\\}} \kMapParam{\wVec}}
\newcommand{\distPatOne}{\sum_{\wVec \in \pw}\expect{ \kMapParam{\wVec}^2}}
\newcommand{\variantOne}{\sum_{\wOne \neq \wTwo}\kMapParam{\wOne} \cdot \kMapParam{\wTwo}}
\newcommand{\variantTwo}{\sum_{\substack{\wVec \neq \wVecPrime,\\
\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}} \big| \sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime} \big|\cdot \kMapParam{\wVec}^2}
\hashP{\wVec} = \hashP{\wVecPrime}}} \big| \hashP{\wVec} = \hashP{\wVecPrime} \big|\cdot \kMapParam{\wVec}^2}
\newcommand{\variantThree}{\sum_{\substack{\wOne \neq \wTwo,\\
\sketchHashParam{\wOne} = \sketchHashParam{\wTwo}}} \kMapParam{\wOne} \cdot \kMapParam{\wTwo}}
\hashP{\wOne} = \hashP{\wTwo}}} \kMapParam{\wOne} \cdot \kMapParam{\wTwo}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% COMMENTS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

View File

@ -4,13 +4,13 @@
The following notation is used to reason about the sketching of world membership for a given tuple. We denote the set of all possible worlds as $\pw$. A given sketch $\sketch$ can be viewed as an $\sketchRows \times \sketchCols$ matrix, i.e. a matrix with $\sketchRows$ rows and $\sketchCols$ columns. Upon initialization each row of $\sketch$ is an estimation of the $\kDom$ frequency for a given tuple represented by $\sketch$ across all possible worlds.
To facilitate binning the $\kDom$ values for a given world $\wVec$, each of the $\sketchRows$ rows has two pairwise independent hash functions $\sketchHash[i]:\pw \to [B]$ and $\sketchPolar[i]:\pw \to \{-1,1\}$, where all functions are independent of one another. Finally, $\genV \in \pwK$ is simply a vector whose values are from the set $\kDom$, each of which denote the annotation of the tuple $t$ in its corresponding world.%defined as $\kMap{t} : \{0, 1\}^\numTup \rightarrow \kDom$ is used to determine the tuple's $\kDom$ annotation for a given world.
To facilitate binning the $\kDom$ values for a given world $\wVec$, each of the $\sketchRows$ rows has two pairwise independent hash functions $\hash[i]:\pw \to [B]$ and $\pol[i]:\pw \to \{-1,1\}$, where all functions are independent of one another. Finally, $\genV \in \pwK$ is simply a vector whose values are from the set $\kDom$, each of which denote the annotation of the tuple $t$ in its corresponding world.%defined as $\kMap{t} : \{0, 1\}^\numTup \rightarrow \kDom$ is used to determine the tuple's $\kDom$ annotation for a given world.
When a world $\wVec$'s $\kDom$ value is updated, it's $\kDom$ value is first retrieved via $\kMap{t}$ and then multiplied by the output of the $i^{th}$ row's polarity function $\sketchPolar$. The resulting computation is then added to the current value contained in the bin mapping. Formally:
$$\sketchJParam{\sketchHashParam{\wVec}} ~+=~ \sketchPolarParam{\wVec} \times \kMapParam{\wVec}$$
When a world $\wVec$'s $\kDom$ value is updated, it's $\kDom$ value is first retrieved via $\kMap{t}$ and then multiplied by the output of the $i^{th}$ row's polarity function $\pol$. The resulting computation is then added to the current value contained in the bin mapping. Formally:
$$\sketchJParam{\hashP{\wVec}} ~+=~ \polP{\wVec} \times \kMapParam{\wVec}$$
After initialization is complete we have that
$$\sketchIj = \sum_{\{\wVec \st \sketchHashParam{\wVec} = j\}}\genVParam{\wVec} \sketchPolarParam{\wVec}.$$
$$\sketchIj = \sum_{\{\wVec \st \hashP{\wVec} = j\}}\genVParam{\wVec} \polP{\wVec}.$$
When referring to Tuple Independent Databases (TIDB), a database $\relation$ contains $\numTup$ tuples, with $\numWorlds$ possible worlds $\pw$. $\pw$ is denoted as $\{0, 1\}^\numTup$, where a specific world $\wVec$ is defined as $\wVec \in \{0, 1\}^\numTup$.