% -*- root: main.tex -*- \section{Bounding the Estimates} \newcommand{\bMu}{\epsilon\mu_{\sketchCols_{sum}}} \newcommand{\bBnd}{\sketchCols_{sketch}} \newcommand{\mBnd}{\sketchRows_{sketch}} \newcommand{\sBnd}{m_{sketch}} For a $\sketchCols$ estimate, denoted $\sketchCols_{est}$, and given the following: \begin{align*} &\bMu \text{ is the expectation for the sum of estimates.}\\ &X = \sum_{i = 1}^{\sketchRows}X_i \\ &X_i\text{ is i.i.d. r.v.} \in [0, 1], i \in \sketchRows \\ &X_i = \begin{cases} 0 &\sketchCols_{est} > \bMu\\ 1 &\sketchCols_{est} \leq \bMu \end{cases}\\ &p[X_i = 1] \geq \frac{2}{3}\\ &p[X_i = 0] \leq \frac{1}{3}\\ &\mu = \frac{2}{3}\sketchRows\\ &\epsilon = 0.5 \end{align*} Because Chebyshev bounds tell us that the probability of a bad row estimate is $\leq \frac{1}{3}$, we set epsilon to the value that, when multiplied to $\mu$, outputs $\frac{1}{3}$. We then derive bounds for $\sketchRows$. Note, because we are only concerned with the left side of the tail, we can use the generic Chernoff bounds for the left tail, \begin{equation*} Pr[|X - \mu| \leq (1 - \epsilon)\mu] \leq e^{-\frac{\epsilon^2}{2 + \epsilon}\mu}. \end{equation*} Solving for $\delta$, \begin{align*} \delta \geq e^{-\frac{(\frac{1}{2})^2}{2 + \frac{1}{2}}\frac{2}{3}\sketchRows}\\ \delta \geq e^{-\frac{1}{15}\sketchRows}\\ e^{\frac{1}{15}\sketchRows} \geq \frac{1}{\delta}\\ \sketchRows \geq \frac{15}{1}ln(\frac{1}{\delta}) \end{align*} We are now ready to combine the bounds we have derived for both $\sketchCols$ and $\sketchRows$ to which we will refer to as $\bBnd$ and $\mBnd$ respectively. \begin{align*} &\mBnd \cdot \bBnd \\ = & \frac{15}{1}ln(\frac{1}{\delta}) \cdot \frac{3\left(\norm{\genV}_2^2\left(|\pw|\right) + \norm{\genV}_1^2\right)}{\epsilon^2 p^2}\\ = & \frac{45\left(\norm{\genV}_2^2\left(|\pw|\right) + \norm{\genV}_1^2\right)}{\epsilon^2 p^2}ln(\frac{1}{\delta}) \end{align*} Sampling bounds, $\sBnd$, are obtained via Chernoff Bounds. Given, \begin{align*} &X = \sum_{i = 1}^{m}X_i\\ &X_i \text{is i.i.d. r.v.} \in [0, 1] \\ &p = \frac{\norm{\genV}_1}{|W|}\\ &\bar{X} = \frac{X}{m}\\ &P[|\bar{X} - p| \geq \epsilon p] \leq 2e^{-\frac{\epsilon^2}{2 + \epsilon}pm} \rightarrow\\ &\delta \geq 2e^{-\frac{\epsilon^2}{2 + \epsilon}pm} \\ &e^{\frac{\epsilon^2}{2 + \epsilon}pm} \geq \frac{2}{\delta} \\ &\frac{\epsilon^2}{2 + \epsilon}pm \geq ln(\frac{2}{\delta})\\ &m \geq \frac{2 + \epsilon}{\epsilon^2 p}ln(\frac{2}{\delta}) \end{align*} We are particularly interested when the former are a lower bound to the latter. We want to know when the following is true. \begin{equation*} \frac{2 + \epsilon}{\epsilon^2 p}ln(\frac{2}{\delta}) > \frac{45\left(\norm{\genV}_2^2\left(|\pw|\right) + \norm{\genV}_1^2\right)}{\epsilon^2 p^2}ln(\frac{1}{\delta}) \end{equation*}