Done till corollaries of main thm of Sec 4.

Still need to make pass on the algo boxes and their lemmas
master
Atri Rudra 2020-12-14 14:11:46 -05:00
parent 28bcd103bf
commit 747404bf06
1 changed files with 29 additions and 3 deletions

View File

@ -42,7 +42,7 @@ tree, whose internal nodes are from the set $\{+, \times\}$, with leaf nodes bei
Note that $\etree$ need not encode an expression in the standard monomial basis. For instance, $\etree$ could represent a compressed form of the polynomial in~\cref{eq:poly-eg}, such as $(x + 2y)(2x - y)$.
\begin{Definition}[poly$(\cdot)$]\label{def:poly-func}
\begin{Definition}[$\polyf(\cdot)$]\label{def:poly-func}
Denote $\polyf(\etree)$ to be the function that takes as input expression tree $\etree$ and outputs its corresponding polynomial. $poly(\cdot)$ is recursively defined on $\etree$ as follows, where $\etree_\lchild$ and $\etree_\rchild$ denote the left and right child of $\etree$ respectively.
% \begin{align*}
@ -154,8 +154,15 @@ Given an expression tree $\etree$ and $\vct{v} \in \mathbb{R}^\numvar$, $\etree(
In the subsequent subsections we will prove the following theorem.
\begin{Theorem}\label{lem:approx-alg}
Let $\poly(\vct{X})$ be a query polynomial corresponding to the output of a UCQ in a BIDB. An estimate $\mathcal{E}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in time $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)\cdot k\cdot \log{k} \cdot depth(\etree))}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right)$, such that
\[P\left(\left|\mathcal{E} - \rpoly(\prob_1,\dots,\prob_\numvar)\right|> \error \cdot \rpoly(\prob_1,\dots,\prob_\numvar)\right) \leq \conf.\]
Let $\etree$ be an expression tree for a UCQ over BIDB and define $\poly(\vct{X})=\polyf(\etree)$ and let $k=\deg(\poly)$
%Let $\poly(\vct{X})$ be a query polynomial corresponding to the output of a UCQ in a BIDB.
An estimate $\mathcal{E}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in time
\[O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)\cdot k\cdot \log{k} \cdot depth(\etree))}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right),\]
such that
\begin{equation}
\label{eq:approx-algo-bound}
P\left(\left|\mathcal{E} - \rpoly(\prob_1,\dots,\prob_\numvar)\right|> \error \cdot \rpoly(\prob_1,\dots,\prob_\numvar)\right) \leq \conf.
\end{equation}
%with multiplicative $(\error,\delta)$-bounds, where $k$ denotes the degree of $\poly$.
\end{Theorem}
@ -166,6 +173,25 @@ Given an expression tree $\etree$, define
\end{Definition}
\AR{Need to make sure use of indicator variable $\onesymbol$ above is consistent with the rest of the paper.}
We next present couple of corollaries of~\Cref{lem:approx-alg}.
\begin{Corollary}
\label{cor:approx-algo-const-p}
Let $\poly(\vct{X})$ be as in~\Cref{lem:approx-alg} and let $\gamma=\gamma(\etree)$. Further let it be the case that $p_i\ge p_0$ for all $i\in[\numvar]$. Then an estimate $\mathcal{E}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ satisfying~\cref{eq:approx-algo-bound} can be computed in time
\[O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot k\cdot \log{k} \cdot depth(\etree))}{\error^2\cdot(1-\gamma)^2\cdot p_0^{2k}}\right)\]
In particular, if $p_0>0$ and $\gamma<1$ are absolute constants then the above runtime simplifies to $O_k\left(\frac 1\eps\cdot\treesize(\etree)\cdot \log{\frac{1}{\conf}}\right)$.
\end{Corollary}
We note that the restiction on $\gamma$ is satisfied by TIDB (where $\gamma=0$) and for some BIDB benchmarks (see~\Cref{sec:experiments} for more on this claim).
\AR{{\bf Boris/Oliver:} Is there a way to claim that all probabilities in practice are actually constants: i.e. they do not increase with the number of tuples?}
\begin{proof}[Proof of~\Cref{cor:approx-algo-const-p}]
The result follows by first noting that by definition of $\gamma$, we have
\[\rpoly(1,\dots,1)\ge (1-\gamma)\cdot \abs{\etree}(1,\dots,1).\]
Further, since each $p_i\ge p_0$ and $\poly(\vct{X})$ (and hence $\rpoly(\vct{X})$) has degree at most $k$, we have that
\[ \rpoly(1,\dots,1) \ge p_0^k\cdot \rpoly(1,\dots,1).\]
The above two inequalities implies $\rpoly(1,\dots,1) \ge p_0^k\cdot (1-\gamma)\cdot \abs{\etree}(1,\dots,1)$.
Applying this bound in the runtime bound in~\Cref{lem:approx-alg} gives the first claimed runtime. The final runtime of $O_k\left(\frac 1{\eps^2}\cdot\treesize(\etree)\cdot \log{\frac{1}{\conf}}\right)$ follows by noting that $depth(\etree)\le \treesize(\etree)$ and absorbing all factors that just depend on $k$.
\end{proof}
\subsection{Approximating $\rpoly$}
We state the approximation algorithm in terms of a $\bi$.
\subsubsection{Description}