paper-BagRelationalPDBsAreHard/analysis.tex

% -*- root: main.tex -*-
\section{Analysis}
\label{sec:analysis}
We begin the analysis by showing that with high probability an estimate is approximately $\numWorldsP$, where $p$ is a tuple's probability measure for a given TIPD.  Note that
\begin{equation}
\numWorldsP = \numWorldsSum\label{eq:mu}.
\end{equation}

We begin by making the claim that the expectation of the estimate of a tuple t's membership across all worlds is $\numWorldsSum$, formally
\begin{equation}
\expect{\sum_{\wVec \in \pw} \sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}} = \sum_{\wVec \in \pw}\kMapParam{\wVec}\label{eq:allWorlds-est}.
\end{equation}
To verify this claim, we argue that the expectation of the estimate of a tuple's appearance in single world is it's annotation, i.e.
\begin{equation}
\expect{\sketchJParam{\sketchHashParam{\wVec}}\cdot \sketchPolarParam{\wVec}} = \kMapParam{\wVec} \label{eq:single-est}.
\end{equation}

%\AR{While the analysis below is correct, the way it is stated it seems to `come out of the blue.' I would recommend that you re-structure the argument below as follows. First argue that $\expect{\sketch[i][\sketchHash[\wVec]]\cdot s_i[\wVec]}=v_t[\wVec]$. From this the claim below just follows by linearity of expectation but this result is a good thing for the reader to realize. Also instead of summing over $j\in [B],\wVec|h_i[\wVec]=j,\wVec'|h_i[\wVec']=j$ it would be better to just write it as sum over all $\wVec,\wVec'\in W\text{ s.t. }h_i[\wVec]=h_i[\wVec']$-- the latter is bit more compact and it is easier to comprehend as well.}
%\AH{Proof changed as suggested above.  I aired on the verbose side for the sake of clarity.}
For a given $\wVec \in \pw$, substituting definitions we have
\begin{align*}
&\expect{\sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}} = \nonumber\\
&\phantom{{}\sketchJParam{\sketchHashParam{\wVec}}}\expect{\big(\sum_{\substack{\wVecPrime \in \pw \st \\
														\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\kMapParam{\wVecPrime} \cdot \sketchPolarParam{\wVecPrime}\big) \cdot \sketchPolarParam{\wVec} }.
\end{align*}
Since $\wVec \in \pw$, we know that for $j \in [|\pw|], \exists \wVecPrime_j \st \wVecPrime_j = \wVec$.  This yields
\begin{multline*}
\mathbb{E}\big[\kMapParam{\wVecPrime_0}\cdot \sketchPolarParam{\wVecPrime_0} + \cdots \\
+\kMapParam{\wVecPrime_j}\cdot \sketchPolarParam{\wVecPrime_j}\cdot \sketchPolarParam{\wVecPrime_j}+ \cdots \\
+ \kMapParam{\wVecPrime_n}\sketchPolarParam{\wVecPrime_n}\big]
\end{multline*}
\AH{break it up into w' and w}
Due to the uniformity of $\sketchPolar$, we have
\begin{equation*}
= \kMapParam{\wVec},
\end{equation*}
thus verifying \eqref{eq:single-est}.

We can now take \eqref{eq:single-est}, substitute it in for \eqref{eq:allWorlds-est} and show by linearity of expectation that \eqref{eq:allWorlds-est} holds.
\begin{align}
&\expect{\sum_{\wVec \in \pw} \sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}} \nonumber\\
&= \expect{\sum_{\wVecPrime \in \pw}\kMapParam{\wVecPrime} \cdot \sketchPolarParam{\wVecPrime} \cdot \sum_{\substack{\wVec \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\sketchPolarParam{\wVec}}\nonumber\\
&= \sum_{\wVec \in \pw} \expect{\left( \sum_{\substack{\wVecPrime \in \pw \st \\
\sketchHashParam{\wVecPrime} = \sketchHashParam{\wVec}}}\kMapParam{\wVecPrime}\cdot\sketchPolarParam{\wVecPrime}\right) \cdot \sketchPolarParam{\wVec}}\nonumber\\
&= \sum_{\wVec \in \pw}\kMapParam{\wVec}\label{eq:estExpect}.
\end{align}

%\begin{align}
%&\expect{\estimate}\\
%=&\expect{\estExpOne}\\
%=&\expect{\sum_{\substack{j \in [B],\\
%			 \wVec \in \pw~|~ \sketchHash{i}[\wVec] = j,\\
%			 \wVec[w']\in \pw~|~ \sketchHash{i}[\wVec[w']] = j} } v_t[\wVec] \cdot s_i[\wVec] \cdot s_i[\wVec[w']]}\\
%=&\multLineExpect\big[\sum_{\substack{j \in [B],\\
%				\wVec~|~\sketchHashParam{\wVec}= j,\\
%				\wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\
%				\wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime} +  \nonumber \\
%&\phantom{{}\kMapParam{\wVec}}\sum_{\substack{j \in [B], \\
%				\wVec~|~\sketchHashParam{\wVec} = j,\\
%				\wVecPrime ~|~ \sketchHashParam{\wVecPrime} = j,\\ \wVec \neq \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot\sketchPolarParam{\wVecPrime}\big]\textit{(by linearity of expectation)}\\
%=&\expect{\sum_{\substack{j \in [B],\\
%				\wVec~|~\sketchHashParam{\wVec}= j,\\
%				\wVecPrime~|~\sketchHashParam{\wVecPrime} = j,\\
%				\wVec = \wVecPrime}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime}} \nonumber \\
%&\phantom{{}\big[}\textit{(by uniform distribution in the second summation)}\\
%=& \estExp \label{eq:estExpect}
%\end{align}

%\AR{A general comment: The last display equation should have a period at the end. The idea is that display equations are considered part of a sentence and every sentence should end with a period.}
%\AH{Thank you for clarifying this, as I have always wondered what the convention was for display equations.  Hopefully, I haven't missed any end display equations in this paper, and have them all fixed properly.}

For the next step, we show that the variance of an estimate is small.%$$\varParam{\estimate}$$

\begin{align}
&\varParam{\sum_{\wVec \in \pw}\sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}}\nonumber\\
=~&\varParam{\sum_{\wVec \in \pw}\kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \sum_{\substack{\wVecPrime \in \pw \st\\ 												\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}}\sketchPolarParam{\wVecPrime}}\nonumber\\%\estExpOne}\\
=~& \mathbb{E}\big[\big(\sum_{\substack{ \wVec, \wVecPrime \in \pw \st \\
			 \sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}} \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVecPrime}\nonumber\\
&\qquad - \expect{\sum_{\wVec \in \pw} \sketchJParam{\sketchHashParam{\wVec}} \cdot \sketchPolarParam{\wVec}}\big)^2\big]\nonumber\\
=~&\mathbb{E}\big[\sum_{\substack{
		\wVec_1, \wVec_2,\\
		 \wVecPrime_1, \wVecPrime_2 \in \pw,\\
		 \sketchHashParam{\wVec_1} = \sketchHashParam{\wVecPrime_1},\\
		 \sketchHashParam{\wVec_2} = \sketchHashParam{\wVecPrime_2}
		 }}\kMapParam{\wVec_1}  \kMapParam{\wVec_2}\sketchPolarParam{\wVec_1}\sketchPolarParam{\wVec_2}\sketchPolarParam{\wVecPrime_1}\sketchPolarParam{\wVecPrime_2}\big]\nonumber\\
&\qquad - \left(\sum_{\wVec \in \pw}\kMapParam{\wVec}\right)^2 \label{eq:var-sum-w}.
\end{align}
%\AR{The $-\mu^2$ term is missing in the above.}
%\AH{$\mu^2$ added.}

Note that four-wise independence is assumed across all four random variables of \eqref{eq:var-sum-w}.  Zooming in on the products of the $\sketchPolar$ functions,
\begin{equation}
\sketchPolarParam{\wa}\cdot\sketchPolarParam{\wb}\cdot\sketchPolarParam{\wc}\cdot\sketchPolarParam{\wVecD} \label{eq:polar-product}
\end{equation}
we make some key observations.%it can be seen that for $\wOne, \wOneP \in \pw$ and $\wTwo, \wTwoP \in \pw'$, all four random variables in \eqref{eq:polar-product} take their values from $\pw$, although we have iteration over two separate sets $\pw$.
%\AR{I do not know what you mean by ``iteration"} \AH{I don't know how to word what I am saying any better...by iteration I mean if you pictured the summation as nested for loops, one could have one level of nesting, where the outer loop would be iterating over the set $\pw$ and the inner loop would be iterating over a separate set of $\pw$.  However, maybe this is unnecessary to point out, and for now I have commented this out.}

Thus, there are five possible sets of $\wVec$ variable combinations, namely for $a, b, c, d \in \{1, 1', 2, 2'\} \st a \neq b \neq c \neq d$:
\begin{align*}
&\distPattern{1}:&\forElems{\cOne}\\
&\distPattern{2}:&\forElems{\cTwo}\\
&\distPattern{3}:&\forElems{\cThree}\\
&\distPattern{4}:&\forElems{\cFour}\\
&\distPattern{5}:&\forElems{\cFive}
\end{align*}
Note that each $\wVec$ is the preimage of the same $\sketchPolar$ function, meaning, that equal worlds produce the same element in the image of $\sketchPolar$.
%In $\distPattern{1}$, it is the case that if all $\wVec$ variables are equal, that we have only one possible combination of the four $\wVec$ vectors.  For $\distPattern{2}$ and $\distPattern{3}$, there are $\binom{4}{2} = 6$ possible inequalities.  However, note that no matter which world vectors are equal (unequal), we still end up with a positive polarity for $\distPattern{2}$ since, for each group of equal worlds, we have the same image of $\sketchPolar$ multiplied together, resulting in a product of 1 for each group, multiplied together.
%\AR{Two comments on the notation above. You should define the sets exactly-- i.e. you should not put a $*$ on some of the definitions. Second, it is not immediately clear why the above cover all the cases, so you should argue that is the case. I think it is easier to argue this is you argue in terms of number of inequalities in the possible $\binom{4}{2}=6$ comparisons-- note you probably should not write down all the 6 comparisons since that would be cumbersome: just use it in your argument.}
%\AH{I have defined the sets exactly.  For the second comment, I argue later on the fact that all cases are covered.  This second comment was a tough one for me to understand clearly, but I think I got it.  Please let me know if I didn't}

We are interested in those particular cases whose expectation does not equal zero, since an expectation of zero will not add to the summation of \eqref{eq:var-sum-w}.  In expectation we have that
\begin{align}
\forAllW{\distPattern{1}}&\rightarrow\expect{%\sum_{\substack{\elems \\
			%\st \cOne}}
		 \polarProdEq} = 1 \label{eq:polar-prod-all}
\end{align}
since we have the same element of the image of $\sketchPolar$ being multiplied to itself an even number of times.  Similarly,
\begin{align}
\forAllW{\distPattern{2}}&\rightarrow\expect{%\sum_{\substack{\elems \\
			%\st \cTwo}}
		\polarProdEq} = 1 \label{eq:polar-prod-two-and-two}
\end{align}
because the same element of the image of $\sketchPolar$ is being multiplied to itself for each equality, producing a polarity of 1 for each equality, and then a final product of 1.  For $\distPattern{3}, \distPattern{4}, \distPattern{5}$, we have a final product of two, three or four independent variables $\in \{-1, 1\}$, thus producing the following results:
\begin{align}
\forAllW{\distPattern{3}}&\rightarrow\expect{%\sum_{\substack{\elems \\
			%\st \cThree}}
		\polarProdEq} = 0 \nonumber
\end{align}
\begin{align}
\forAllW{\distPattern{4}}&\rightarrow\expect{%\sum_{\substack{\elems \\
			%\st \cFour}}
		\polarProdEq} = 0 \nonumber
\end{align}
\begin{align}
\forAllW{\distPattern{5}}&\rightarrow\expect{%\sum_{\substack{\elems \\
			%\st \cFive}}
		\polarProdEq} = 0. \nonumber
\end{align}
%\AR{You should argue why each of the equalities above. While we might decide to drop the arguments in the submitted paper when we are working things out, it is better to write down all the arguments. This is the best way to spot bugs in a proof. Otherwise, it is easy to introduce bugs by not checking for things that are ``obvious."}
%\AH{Thank you for explaining the process to me.  It makes sense to include 'obvious' arguments to me now.  I have argued the above points, but mostly in plain English.  Is this acceptable, or do I need to use formal notation?}

Only equations \eqref{eq:polar-prod-all} and \eqref{eq:polar-prod-two-and-two} influence the $\var$ computation.
Considering $\distPattern{1}$ the variance results in
\begin{equation}
\distPatOne\label{eq:distPatOne}.
\end{equation}
This is the case because we have that
\begin{align*}
&\sum_{\substack{\wOne, \wOneP, \wTwo, \wTwoP \in \pw \st \\
			\wOne = \wTwo = \wOneP = \wTwoP = \wVec}}
	\kMapParam{\wVec} \cdot \kMapParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVec} \cdot \sketchPolarParam{\wVec}\\
= &\sum_{\substack{\wOne, \wTwo \in \pw \st \\
		\wOne = \wTwo}} \kMapParam{\wVec}\cdot \kMapParam{\wVec}\\
= &\sum_{\wVec \in \pw} \kMapParam{\wVec}^2.
\end{align*}

For the distribution pattern $\cTwo$, we have three subsets $\distPattern{21}, \distPattern{22}, \distPattern{23} \subseteq \distPattern{2}$ to consider.
\begin{align*}
&\distPattern{21}:&\cTwoV{\wOne}{\wOneP}{\wTwo}{\wTwoP} \\
&\distPattern{22}:&\cTwoV{\wOne}{\wTwo}{\wOneP}{\wTwoP}\\
&\distPattern{23}:&\cTwoV{\wOne}{\wTwoP}{\wOneP}{\wTwo}
\end{align*}
%\AR{Again you should be defining sets and not variants. E.g. you could have defined the subsets $S_{21},S_{22},S_{23}$.}
%\AH{Thank you for the great suggestion.  Is it a problem that I have waited to define these subsets explicitly until here?}
Considered separately, the subsets result in the following $\var$.
\begin{align}
&\wOne = \wOneP \neq \wTwo =\wTwoP \rightarrow\nonumber\\
&\qquad = \sum_{\substack{\wOne, \wOneP, \wTwo, \wTwoP \in \pw \st \\
							\wOne = \wOneP = \wVec \neq\\
							 \wTwo = \wTwoP = \wVecPrime}}\kMapParam{\wVec}\kMapParam{\wVecPrime}\sketchPolarParam{\wVec}\sketchPolarParam{\wVec}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVecPrime} \label{eq:variantOne}\nonumber\\
&\qquad = \sum_{\wVec, \wVecPrime \in \pw \st \wVec \neq \wVecPrime}\kMapParam{\wVec}\kMapParam{\wVecPrime}\\
&\wOne = \wTwo \neq \wOneP = \wTwoP \rightarrow\nonumber\\
&\qquad = \sum_{\substack{\wOne, \wOneP, \wTwo, \wTwoP \in \pw \st \\
				\wOne = \wTwo = \wVec \neq\\
				\wOneP = \wTwoP = \wVecPrime,\\
				\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}} \kMapParam{\wVec}\kMapParam{\wVec}\sketchPolarParam{\wVec}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec}\sketchPolarParam{\wVecPrime}\nonumber \\
&\qquad = \sum_{\wVec, \wVecPrime \in \pw \st \wVec \neq \wVecPrime}| \{\wVecPrime \st \sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}\} | \cdot \kMapParam{\wVec}^2\label{eq:variantTwo} \\
&\wOne = \wTwoP \neq \wOneP =\wTwo \rightarrow \nonumber \\
&\qquad = \sum_{\substack{\wOne, \wOneP, \wTwo, \wTwoP \in \pw \st \\
					\wOne = \wTwoP = \wVec \neq \\
					\wOneP = \wTwo = \wVecPrime,\\
					\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}} \kMapParam{\wVec} \kMapParam{\wVecPrime}\sketchPolarParam{\wVec}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVecPrime}\sketchPolarParam{\wVec} \nonumber \\
&\qquad = \sum_{\substack{\wVec, \wVecPrime \in \pw \st \\
					\wVec \neq \wVecPrime,\\
					\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}}\kMapParam{\wVec}\cdot\kMapParam{\wVecPrime}\label{eq:variantThree}
\end{align}
Note that for $\distPattern{22}$, we have the cardinality of a bucket as a multiplicative factor for each squared annotation.  This is because of the constraint that $\wOne \neq \wOneP$ coupled with the additional constraint that $\sketchHashParam{\wOne} = \sketchHashParam{\wOneP}$.  Since $\wOneP$ must belong to the same bucket as $\wOne$, yet not equal to $\wOne$, we have that each operand of the sum must be the annotation squared for each $\wOneP$ that belongs to the same bucket but is not equal to $\wOne$.

Looking at $\distPattern{23}$, we have a similar case as $\distPattern{22}$, but this time there is no multiplicative factor since $\wOneP$ and $\wTwoP$ are constrained to equal their opposite $\wVec$ counterparts, which are the arguments for both $\kMap{t}$ terms.
%\AR{You should again argue each of the claimed equalities above. Actually in the second equality, the term $|h_i[\wVec]=h_i[\wVec']|$ should really be $|\{\wVec'|h_i[\wVec]=h_i[\wVec']\}|$. Also this change needs to be propagated.}
%\AH{I have added both formal equations (to show the step by step evaluation) as well as verbose justification in English.  Please let me know if I can be more clear in arguing the equalities.}

%\AR{Also while I do like the use of macros, I think you have gone over-board in the other direction. It is good to create macros for symbols/variables names that you will use frequently but using macros for entire expressions is not a good idea. Among others, it makes it really hard for others to read it since they have to refer back to your macro definition each time they see it.}
%\AH{I think I have most of them taken care of.}


Notice that the second term (expectation \eqref{eq:estExpect} squared) of the $\var$ calculation is cancelled out by \eqref{eq:distPatOne} and \eqref{eq:variantOne}. %\AR{You should {\bf not} start with a {\em wrong} expression and then later on correct it. Start off with the correct expression in the first place: otherwise it just creates more confusion.}
%\AH{This has been fixed.}
\begin{equation*}
\big(\sum_{\wVec \in \pw}\kMapParam{\wVec}\big)^2 = \sum_{\wVec \in \pw}\kMapParam{\wVec}^2 +
	\sum_{\substack{\wVec, \wVecPrime \in \pw \st\\
				 \wVec \neq \wVecPrime}}\kMapParam{\wVec}\kMapParam{\wVecPrime}.%\distPatOne + \variantOne.
\end{equation*}
With only \eqref{eq:variantTwo} and \eqref{eq:variantThree} remaining, we have

\begin{multline*}
\varParam{\estimate} = \\
 \expect{\sum_{\wVec, \wVecPrime \in \pw \st \wVec \neq \wVecPrime}| \{\wVecPrime \st \sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}\} | \cdot \kMapParam{\wVec}^2} ~+ \\
\expect{\sum_{\substack{\wVec, \wVecPrime \in \pw \st \\
					\wVec \neq \wVecPrime,\\
					\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}}\kMapParam{\wVec}\cdot\kMapParam{\wVecPrime}}.
\end{multline*}
%\AR{The expectations are missing on the RHS. And this needs to be propagated.}
%\AH{Fixed.}
Our current analysis is limited to TIPDBs, where the annotations are in the boolean $\mathbb{B}$ set.  Because this is the case, the square of any element is itself.  Computing each term separately we have
\begin{align}
&\expect{\sum_{\substack{\wVec, \wVecPrime \in \pw \st\\
					 \wVec \neq \wVecPrime}}| \{\wVecPrime \st \sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}\} | \cdot \kMapParam{\wVec}^2} =\numWorldsP \cdot \frac{\numWorlds}{\sketchCols} - 1\label{eq:spaceOne}\\
&\expect{ \sum_{\substack{\wVec, \wVecPrime \in \pw \st \\
					\wVec \neq \wVecPrime,\\
					\sketchHashParam{\wVec} = \sketchHashParam{\wVecPrime}}}\kMapParam{\wVec}\cdot\kMapParam{\wVecPrime}} = \numWorldsP \cdot  \frac{\numWorldsP - 1}{\sketchCols}\label{eq:spaceTwo}.
\end{align}
In both equations, the sum of $\kMapParam{\wVec}$ over all $\wVec \in \pw$ is $\numWorldsP$ since as noted in equation \eqref{eq:mu} we are summing the number of worlds a tuple $t$ appears in, and for a TIPDB, that is exactly 2 to the power of the number of tuples in the TIPDB (due to the independence of tuples) times tuple $t$'s probability.

In equation \eqref{eq:spaceOne} we have the multiplicative factor which in expectation turns out to be the number of worlds $\numWorlds$ divided evenly across the number of buckets $\sketchCols$ minus the one tuple that $\wVecPrime$ cannot be.  This factor is multiplied to each of the $\numWorldsP$ worlds that $t$ appears in.

Equation \eqref{eq:spaceTwo} has each of the $\numWorldsP$ worlds times all the rest of the worlds that tuple $t$ appears in within that bucket.  This factor is represented by $\frac{\numWorldsP - 1}{\sketchCols}$, i.e. we have a world in a given bucket $j$ in which tuple $t$ appears, being summed over each of its products with other worlds in which it is present in bucket $j$.
%\AR{Again, argue why the above claims are true.}
%\AH{All my arguing is plain English.  Is there a better way to go about this?}
\eqref{eq:spaceOne} and \eqref{eq:spaceTwo} further reduce to
\begin{equation}
\frac{2^{2N}(\prob + \prob^2)}{\sketchCols} - \numWorlds(\frac{\prob}{\sketchCols} + \prob)\label{eq:variance}
\end{equation}
By \eqref{eq:variance} we have then
\begin{align*}
\varSym &< 2^{2N}\big(\frac{2\prob}{\sketchCols}\big) \\
\sd &<\sdEq\\
\sdRel& < \sqrt{\frac{2}{\sketchCols\prob}}.
\end{align*}
Recall that $\sdRel = \frac{\sd}{\mu}$ where $\mu$ is defined as $\numWorldsP$ in \eqref{eq:mu}.

Since the sketch has multiple trials, a probability of exceeding error bound $\errB$ smaller than one half guarantees an estimate that is less than or equal to the error bound when taking the median of all trials.  Expressing the error relative to $\mu$ in Chebyshev's Inequality yields
\begin{equation*}
Pr\left[~|X - \mu|~> \Delta\right] < \frac{1}{3}.
%\cheby.
\end{equation*}
Substituting $\Delta = k\sigma \rightarrow k = \frac{\Delta}{\sigma} \rightarrow k^2 = \frac{\Delta^2}{\sigma^2}$ we have
\begin{equation*}
Pr\left[~|X - \mu|~> \Delta~\right] < \frac{\sigma^2}{\Delta^2}
\end{equation*}
%\AR{It would be better to state the deviation as say $\Delta$ instead of $\epsilon\mu$. Then derive the expression for $B$ in terms of $N,p,\Delta$. Then you can state as consequences what values of $B$ you get for the special cases of $\Delta=\epsilon\cdot 2^N$ and $\Delta=\epsilon\mu$.}
%\AH{Done.}
For the case when $\Delta = \mu\epsilon$, taking both Chebyshev bounds, setting them equal to each other, simplifying and solving for $\sketchCols$ results in
\begin{align*}
\frac{\sigma^2}{\Delta^2} &= \frac{1}{3}\\
\frac{ 2^{2N}\big(\frac{2\prob}{\sketchCols}\big)}{\mu^2\epsilon^2} &= \frac{1}{3}\\
\frac{2^{2N + 1}\prob}{\mu^2\epsilon^2\sketchCols} &= \frac{1}{3}\\
\frac{6 \cdot 2^{2N}\prob}{\mu^2\epsilon^2} &= \sketchCols \\
\frac{6}{p\epsilon^2} &= \sketchCols.
\end{align*}
In the above, recall that $\mu$ or the expectation of an estimate is $\numWorldsP$ as seen in equations \eqref{eq:mu} and \eqref{eq:allWorlds-est}.

Setting $\Delta = \epsilon\numWorlds$ gives
\begin{align*}
\frac{ 2^{2N}\big(\frac{2\prob}{\sketchCols}\big)}{\epsilon^22^{2N}} &= \frac{1}{3}\\
\frac{2^{2N+ 1}\prob}{\epsilon^22^{2N}\sketchCols} &= \frac{1}{3}\\
\frac{6 \cdot 2^{2N}\prob}{\epsilon^22^{2N}} &= \sketchCols \\
\frac{6\prob}{\epsilon^2} &= \sketchCols.
\end{align*}

Other cases for $\Delta$ can be solved similarly.