Done till corollaries of main thm of Sec 4.

Still need to make pass on the algo boxes and their lemmas
2020-12-14 14:11:46 -05:00 · 2020-12-14 14:11:46 -05:00 · 747404bf06
parent 28bcd103bf
commit 747404bf06
1 changed files with 29 additions and 3 deletions
--- a/approx_alg.tex
+++ b/approx_alg.tex
@ -42,7 +42,7 @@ tree, whose internal nodes are from the set $\{+, \times\}$, with leaf nodes bei

 Note that $\etree$ need not encode an expression in the standard monomial basis.  For instance, $\etree$ could represent a compressed form of the polynomial in~\cref{eq:poly-eg}, such as $(x + 2y)(2x - y)$.

-\begin{Definition}[poly$(\cdot)$]\label{def:poly-func}
+\begin{Definition}[$\polyf(\cdot)$]\label{def:poly-func}
 Denote $\polyf(\etree)$ to be the function that takes as input expression tree $\etree$ and outputs its corresponding polynomial.  $poly(\cdot)$ is recursively defined on $\etree$ as follows, where $\etree_\lchild$ and $\etree_\rchild$ denote the left and right child of $\etree$ respectively.

 %	\begin{align*}
@ -154,8 +154,15 @@ Given an expression tree $\etree$ and $\vct{v} \in \mathbb{R}^\numvar$, $\etree(
 In the subsequent subsections we will prove the following theorem.

 \begin{Theorem}\label{lem:approx-alg}
-Let $\poly(\vct{X})$ be a query polynomial corresponding to the output of a UCQ in a BIDB. An estimate $\mathcal{E}$  of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in time $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)\cdot  k\cdot \log{k} \cdot depth(\etree))}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right)$, such that
-\[P\left(\left|\mathcal{E} - \rpoly(\prob_1,\dots,\prob_\numvar)\right|> \error \cdot \rpoly(\prob_1,\dots,\prob_\numvar)\right) \leq \conf.\]
+Let $\etree$ be an expression tree for a UCQ over BIDB and define $\poly(\vct{X})=\polyf(\etree)$ and let $k=\deg(\poly)$
+%Let $\poly(\vct{X})$ be a query polynomial corresponding to the output of a UCQ in a BIDB. 
+An estimate $\mathcal{E}$  of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in time 
+\[O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)\cdot  k\cdot \log{k} \cdot depth(\etree))}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right),\] 
+such that
+\begin{equation}
+\label{eq:approx-algo-bound}
+P\left(\left|\mathcal{E} - \rpoly(\prob_1,\dots,\prob_\numvar)\right|> \error \cdot \rpoly(\prob_1,\dots,\prob_\numvar)\right) \leq \conf.
+\end{equation}
 %with multiplicative $(\error,\delta)$-bounds, where $k$ denotes the degree of $\poly$.
 \end{Theorem}

@ -166,6 +173,25 @@ Given an expression tree $\etree$, define
 \end{Definition}
 \AR{Need to make sure use of indicator variable $\onesymbol$ above is consistent with the rest of the paper.}

+We next present couple of corollaries of~\Cref{lem:approx-alg}.
+\begin{Corollary}
+\label{cor:approx-algo-const-p}
+Let $\poly(\vct{X})$ be as in~\Cref{lem:approx-alg} and let $\gamma=\gamma(\etree)$. Further let it be the case that $p_i\ge p_0$ for all $i\in[\numvar]$. Then an estimate $\mathcal{E}$  of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ satisfying~\cref{eq:approx-algo-bound} can be computed in time
+\[O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot k\cdot \log{k} \cdot depth(\etree))}{\error^2\cdot(1-\gamma)^2\cdot p_0^{2k}}\right)\]
+In particular, if $p_0>0$ and $\gamma<1$ are absolute constants then the above runtime simplifies to $O_k\left(\frac 1\eps\cdot\treesize(\etree)\cdot \log{\frac{1}{\conf}}\right)$. 
+\end{Corollary}
+We note that the restiction on $\gamma$ is satisfied by TIDB (where $\gamma=0$) and for some BIDB benchmarks (see~\Cref{sec:experiments} for more on this claim).
+\AR{{\bf Boris/Oliver:} Is there a way to claim that all probabilities in practice are actually constants: i.e. they do not increase with the number of  tuples?}
+
+\begin{proof}[Proof of~\Cref{cor:approx-algo-const-p}]
+The result follows by first noting that by definition of $\gamma$, we have 
+\[\rpoly(1,\dots,1)\ge (1-\gamma)\cdot \abs{\etree}(1,\dots,1).\] 
+Further, since each $p_i\ge p_0$ and $\poly(\vct{X})$ (and hence $\rpoly(\vct{X})$) has degree at most $k$, we have that
+\[ \rpoly(1,\dots,1) \ge p_0^k\cdot \rpoly(1,\dots,1).\]
+The above two inequalities implies $\rpoly(1,\dots,1) \ge p_0^k\cdot (1-\gamma)\cdot \abs{\etree}(1,\dots,1)$.
+Applying this bound in the runtime bound in~\Cref{lem:approx-alg} gives the first claimed runtime. The final runtime of $O_k\left(\frac 1{\eps^2}\cdot\treesize(\etree)\cdot \log{\frac{1}{\conf}}\right)$ follows by noting that $depth(\etree)\le \treesize(\etree)$ and absorbing all factors that just depend on $k$.
+\end{proof}
+
 \subsection{Approximating $\rpoly$}
 We state the approximation algorithm in terms of a $\bi$.
 \subsubsection{Description}