Done in Sec 4 till definition of gamma
parent
b7d506364b
commit
28bcd103bf
138
approx_alg.tex
138
approx_alg.tex
|
@ -1,19 +1,49 @@
|
|||
%root: main.tex
|
||||
\section{$1 \pm \epsilon$ Approximation Algorithm}
|
||||
\label{sec:algo}
|
||||
Since it is the case that computing the expected multiplicity of a compressed representation of a bag polynomial is hard, it is then desirable to have an algorithm to approximate the multiplicity in linear time, which is what we describe next.
|
||||
|
||||
First, let us introduce some useful definitions and notation. For illustrative purposes in the definitions below, let us consider when $\poly(\vct{X}) = 2x^2 + 3xy - 2y^2$.
|
||||
In~\cref{sec:hard}, we showed that computing the expected multiplicity of a compressed representation of a bag polynomial for TIDB (even just based on project-join queries) is unlikely to be possible in linear time (\cref{thm:mult-p-hard-result}), even if all tuples have the same probability of being present (\cref{cor:single-p-hard}). Given this, in this section we will design an approrixmation algorithm for our that runs in {\em linear time}. Unlike the results in~\cref{sec:hard} our approximation algorithm works for BIDB though our bounds are more meaningful for a non-trivial sublcass of BIDB that includes TIDB as well as PDB benchmarks (\cref{sec:experiments}).
|
||||
%it is then desirable to have an algorithm to approximate the multiplicity in linear time, which is what we describe next.
|
||||
|
||||
\subsection{Preliminaries and some more notation}
|
||||
|
||||
First, let us introduce some useful definitions and notation related to polynomials and their representations. For illustrative purposes in the definitions below, we will use the following {\em bivariate} polynomial:
|
||||
\begin{equation}
|
||||
\label{eq:poly-eg}
|
||||
\poly(x,y) = 2x^2 + 3xy - 2y^2.
|
||||
\end{equation}
|
||||
|
||||
\AR{The definition from this and my next comments are "new"-- they might be better off in the prelims section and moved to later in this section. Am keeping all of them in one place for easy lookup for now.}
|
||||
|
||||
\begin{Definition}[Variables in a monomial]\label{def:vars}
|
||||
Given a monomial $v$, we use $\var(v)$ to denote the set of variables in $v$.
|
||||
\end{Definition}
|
||||
For example the monomial $3xy$ in the polynomial in~\cref{eq:poly-eg} has $\var(3xy)=\inset{x,y}$.
|
||||
|
||||
\begin{definition}[Modding with a set]\label{def:mod-set}
|
||||
Let $S$ be a {\em set} of polynomials over $\vct{X}$. Then $\poly(\vct{X})\mod{S}$ is the polynomial obtained by taking the mod of $\poly(\vct{X})$ over {\em all} polynomials in $S$ (the order does not matter).
|
||||
\end{definition}
|
||||
For example when $S_0=\inset{x^2-x,y^2-y}$, taking the polynomial in~\cref{eq:poly-eg} mod $S_0$, we get $2x+3xy-2y$.
|
||||
|
||||
\begin{Definition}\label{def:mod-set-polys}
|
||||
Given the set of BIDB variables $\inset{X_{b,i}}$, define
|
||||
\[\mathcal{B}=\inset{X_{b,i}\cdot X_{b,j}|\text{ for every block } b \text{and } i\ne j},\]
|
||||
\[\mathcal{T}=\inset{X_{b,i}^2-X_{b,i}|\text{ for every block } b \text{and } i}.\]
|
||||
\end{Definition}
|
||||
|
||||
\AR{Something to check/square out: we have been using both $X_{b,j}$ and $X_1,\dots,X_n$ for vars in BIDB-- I think this is OK as long as we explicitly talk about these two notations and how we might switch between them. Or we decide not to...}
|
||||
|
||||
\AR{Some of these definitions have been pulled to the prelims section. Another pass is needed to sync up these occurrences. Leaving them in for now.}
|
||||
|
||||
\begin{Definition}[Expression Tree]\label{def:express-tree}
|
||||
An expression tree $\etree$ is a binary %an ADT logically viewed as an n-ary
|
||||
tree, whose internal nodes are from the set $\{+, \times\}$, with leaf nodes being either from the set $\mathbb{R}$ $(\tnum)$ or from the set of monomials $(\var)$. The members of $\etree$ are \type, \val, \vari{partial}, \vari{children}, and \vari{weight}, where \type is the type of value stored in the node $\etree$ (i.e. one of $\{+, \times, \var, \tnum\}$, \val is the value stored, and \vari{children} is the list of $\etree$'s children where $\etree_\lchild$ is the left child and $\etree_\rchild$ the right child. Remaining fields hold values whose semantics we will fix later. When $\etree$ is used as input of ~\cref{alg:mon-sam} and ~\cref{alg:one-pass}, the values of \vari{partial} and \vari{weight} will not be set. %SEMANTICS FOR \etree: \vari{partial} is the sum of $\etree$'s coefficients , n, and \vari{weight} is the probability of $\etree$ being sampled.
|
||||
tree, whose internal nodes are from the set $\{+, \times\}$, with leaf nodes being either from the set $\mathbb{R}$ $(\tnum)$ or from the set of monomials $(\var)$. The members of $\etree$ are \type, \val, \vari{partial}, \vari{children}, and \vari{weight}, where \type is the type of value stored in the node $\etree$ (i.e. one of $\{+, \times, \var, \tnum\}$, \val is the value stored, and \vari{children} is the list of $\etree$'s children where $\etree_\lchild$ is the left child and $\etree_\rchild$ the right child. Remaining fields hold values whose semantics we will fix later. When $\etree$ is used as input of ~\Cref{alg:mon-sam} and ~\Cref{alg:one-pass}, the values of \vari{partial} and \vari{weight} will not be set. %SEMANTICS FOR \etree: \vari{partial} is the sum of $\etree$'s coefficients , n, and \vari{weight} is the probability of $\etree$ being sampled.
|
||||
\end{Definition}
|
||||
|
||||
Note that $\etree$ need not encode an expression in the standard monomial basis. For instance, $\etree$ could represent a compressed form of the running example, such as $(x + 2y)(2x - y)$.
|
||||
Note that $\etree$ need not encode an expression in the standard monomial basis. For instance, $\etree$ could represent a compressed form of the polynomial in~\cref{eq:poly-eg}, such as $(x + 2y)(2x - y)$.
|
||||
|
||||
\begin{Definition}[poly$(\cdot)$]\label{def:poly-func}
|
||||
Denote $poly(\etree)$ to be the function that takes as input expression tree $\etree$ and outputs its corresponding polynomial. $poly(\cdot)$ is recursively defined on $\etree$ as follows, where $\etree_\lchild$ and $\etree_\rchild$ denote the left and right child of $\etree$ respectively.
|
||||
Denote $\polyf(\etree)$ to be the function that takes as input expression tree $\etree$ and outputs its corresponding polynomial. $poly(\cdot)$ is recursively defined on $\etree$ as follows, where $\etree_\lchild$ and $\etree_\rchild$ denote the left and right child of $\etree$ respectively.
|
||||
|
||||
% \begin{align*}
|
||||
% &\etree.\type = +\mapsto&& \polyf(\etree_\lchild) + \polyf(\etree_\rchild)\\
|
||||
|
@ -37,11 +67,11 @@ Note that addition and multiplication above follow the standard interpretation o
|
|||
\begin{Definition}[Expression Tree Set]\label{def:express-tree-set}$\etreeset{\smb}$ is the set of all possible expression trees $\etree$, such that $poly(\etree) = \poly(\vct{X})$.
|
||||
\end{Definition}
|
||||
|
||||
For our running example, $\etreeset{\smb} = \{2x^2 + 3xy - 2y^2, (x + 2y)(2x - y), x(2x - y) + 2y(2x - y), 2x(x + 2y) - y(x + 2y)\}$. Note that \cref{def:express-tree-set} implies that $\etree \in \etreeset{poly(\etree)}$.
|
||||
For the polynomial in~\cref{eq:poly-eg}, $\etreeset{\smb}$ would include the following (represented as their corresponding expression trees): $2x^2 + 3xy - 2y^2, (x + 2y)(2x - y), x(2x - y) + 2y(2x - y), 2x(x + 2y) - y(x + 2y)$. Note that \cref{def:express-tree-set} implies that for any expression tree $\etree$, we have $\etree \in \etreeset{poly(\etree)}$.
|
||||
|
||||
|
||||
\begin{Definition}[Expanded T]\label{def:expand-tree}
|
||||
$\expandtree{\etree}$ is the pure sum of products expansion of $\etree$. The logical view of \expandtree{\etree} ~is a list of tuples $(\monom, \coef)$, where $\monom$ is of type monomial and $\coef$ is in $\mathbb{R}$. \expandtree{\etree} has the following recursive definition.
|
||||
$\expandtree{\etree}$ is the (pure) sum of products expansion of $\etree$, which we formally define next. The logical view of \expandtree{\etree} ~is a list of tuples $(\monom, \coef)$, where $\monom$ is a monomial and $\coef$ is in $\mathbb{R}$. \expandtree{\etree} has the following recursive definition (where $\circ$ is list concatenation).
|
||||
\end{Definition}
|
||||
|
||||
% recursively defined as
|
||||
|
@ -65,7 +95,7 @@ $\expandtree{\etree}$ is the pure sum of products expansion of $\etree$. The lo
|
|||
|
||||
|
||||
\begin{Example}\label{example:expr-tree-T}
|
||||
To illustrate, consider the factorized representation $(x + 2y)(2x - y)$ of the running example. Its expression tree $\etree$ is illustrated in Figure ~\ref{fig:expr-tree-T}. The pure expansion of the product is $2x^2 - xy + 4xy - 2y^2 = \expandtree{\etree}$, logically viewed as $[(2, x^2), (-1, xy), (4, xy), (-2, y^2)]$.
|
||||
Consider the factorized representation $(x + 2y)(2x - y)$ of the polynomial in~\cref{eq:poly-eg}. Its expression tree $\etree$ is illustrated in Figure ~\ref{fig:expr-tree-T}. The pure expansion of the product is $2x^2 - xy + 4xy - 2y^2$ and the $\expandtree{\etree}$ is $[(2, x^2), (-1, xy), (4, xy), (-2, y^2)]$.
|
||||
\end{Example}
|
||||
|
||||
|
||||
|
@ -108,29 +138,34 @@ To illustrate, consider the factorized representation $(x + 2y)(2x - y)$ of the
|
|||
|
||||
|
||||
\begin{Definition}[Positive T]\label{def:positive-tree}
|
||||
Let the positive tree, denoted $\abs{\etree}$ be the resulting expression tree such that, for each leaf node $\etree'$ of $\etree$ where $\etree'.\type$ is $\tnum$, $\etree'.\vari{value} = |\etree'.\vari{value}|$. %value $\coef$ of each coefficient leaf node in $\etree$ is set to %$\coef_i$ in $\etree$ is exchanged with its absolute value$|\coef|$.
|
||||
For any expression tree $\etree$, the correspondign
|
||||
{\em positive tree}, denoted $\abs{\etree}$ obtained from $\etree$ as follows. For each leaf node $\ell$ of $\etree$ where $\ell.\type$ is $\tnum$, update $\ell.\vari{value}$ to $|\ell.\vari{value}|$. %value $\coef$ of each coefficient leaf node in $\etree$ is set to %$\coef_i$ in $\etree$ is exchanged with its absolute value$|\coef|$.
|
||||
\end{Definition}
|
||||
|
||||
Using the same factorization from ~\cref{example:expr-tree-T}, $poly(\abs{\etree}) = (x + 2y)(2x + y) = 2x^2 +xy +4xy + 2y^2 = 2x^2 + 5xy + 2y^2$. Note that this \textit{is not} the same as $\poly(\vct{X})$.
|
||||
Using the same factorization from ~\cref{example:expr-tree-T}, $poly(\abs{\etree}) = (x + 2y)(2x + y) = 2x^2 +xy +4xy + 2y^2 = 2x^2 + 5xy + 2y^2$. Note that this \textit{is not} the same as the polynomial from~\cref{eq:poly-eg}.
|
||||
|
||||
\begin{Definition}[Evaluation]\label{def:exp-poly-eval}
|
||||
Given an expression tree $\etree$ and $\vct{v} \in \mathbb{R}^\numvar$, $\etree(\vct{v}) = poly(\etree)(\vct{v})$.
|
||||
\end{Definition}
|
||||
|
||||
\begin{Definition}[Probability $\gamma$]
|
||||
Define $\gamma$ to be the probability that a monomial with variables from the same block $\block$ is sampled.
|
||||
\end{Definition}
|
||||
Algorithm ~\ref{alg:est-gamma} estimates $\gamma$.
|
||||
|
||||
When a monomial with cross terms from the same block $\block$ is sampled, our algorithm will drop the sample and produce a new sample.
|
||||
\subsection{Our main result}
|
||||
|
||||
|
||||
In the subsequent subsections we lay the groundwork to prove the following theorem.
|
||||
In the subsequent subsections we will prove the following theorem.
|
||||
|
||||
\begin{Theorem}\label{lem:approx-alg}
|
||||
For any query polynomial $\poly(\vct{X})$, an approximation of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)\cdot(1 - \gamma)^2}\right)$, with multiplicative $(\error,\delta)$-bounds, where $k$ denotes the degree of $\poly$.
|
||||
Let $\poly(\vct{X})$ be a query polynomial corresponding to the output of a UCQ in a BIDB. An estimate $\mathcal{E}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in time $O\left(\treesize(\etree) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\etree}^2(1,\ldots, 1)\cdot k\cdot \log{k} \cdot depth(\etree))}{\error^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right)$, such that
|
||||
\[P\left(\left|\mathcal{E} - \rpoly(\prob_1,\dots,\prob_\numvar)\right|> \error \cdot \rpoly(\prob_1,\dots,\prob_\numvar)\right) \leq \conf.\]
|
||||
%with multiplicative $(\error,\delta)$-bounds, where $k$ denotes the degree of $\poly$.
|
||||
\end{Theorem}
|
||||
|
||||
It turns out that to get linear runtime resuls from~\cref{lem:approx-alg}, we will need to define another parameter (which roughly counts the (weighted) number of monomials in $\expandtree{\etree}$ that get `canceled' when modded with $\mathcal{B}$):
|
||||
\begin{Definition}[Parameter $\gamma$]\label{def:param-gamma}
|
||||
Given an expression tree $\etree$, define
|
||||
\[\gamma(\etree)=\frac{\sum_{(\monom, \coef)\in \expandtree{\etree}} \abs{\coef}\cdot \onesymbol\inparen{\monom\mod{\mathcal{B}}\equiv 0}}{\abs{\etree}(1,\ldots, 1)}\]
|
||||
\end{Definition}
|
||||
\AR{Need to make sure use of indicator variable $\onesymbol$ above is consistent with the rest of the paper.}
|
||||
|
||||
\subsection{Approximating $\rpoly$}
|
||||
We state the approximation algorithm in terms of a $\bi$.
|
||||
\subsubsection{Description}
|
||||
|
@ -225,36 +260,36 @@ Recall that the notation $[x, y]$ denotes the range of values between $x$ and $y
|
|||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
\begin{algorithm}[H]
|
||||
\caption{$\algname{Estimate}$ $\gamma(\etree, \numsamp, \abs{\block})$}
|
||||
\label{alg:est-gamma}
|
||||
\begin{algorithmic}[1]
|
||||
\Require \etree: Binary Expression Tree
|
||||
\Require $\numsamp \in \mathbb{N}$
|
||||
\Require $\abs{\block} \in \mathbb{N}$
|
||||
\Ensure \vari{cTerms} $]in \mathbb{R}$
|
||||
|
||||
\State $\vari{cTerms} \gets 0$
|
||||
\State $\vari{isCross} \gets 0$
|
||||
\For{$\vari{i} \text{ in } 1 \text{ to } \numsamp$}
|
||||
\State $\bivec \gets [0]^{\abs{\block}}$
|
||||
\State $(\vari{M}, \vari{sgn}) \gets $ \sampmon($\etree_\vari{mod}$)
|
||||
\For{$\vari{x}_{\vari{b}, \vari{j}} \text{ in } \vari{M}$}
|
||||
\If{$\bivec[b] = 1$}
|
||||
\State $\vari{isCross} \gets 1$
|
||||
\State Break
|
||||
\Else
|
||||
\State $\bivec[b] \gets 1$
|
||||
\EndIf
|
||||
\EndFor
|
||||
\If{$\vari{isCross} = 1$}
|
||||
\State $\vari{cTerms} \gets \vari{cTerms} + 1$
|
||||
\State $\vari{isCross} \gets 0$
|
||||
\EndIf
|
||||
\EndFor
|
||||
\State \Return $\frac{\vari{cTerms}}{\numsamp}$
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
%\begin{algorithm}[H]
|
||||
% \caption{$\algname{Estimate}$ $\gamma(\etree, \numsamp, \abs{\block})$}
|
||||
% \label{alg:est-gamma}
|
||||
% \begin{algorithmic}[1]
|
||||
% \Require \etree: Binary Expression Tree
|
||||
% \Require $\numsamp \in \mathbb{N}$
|
||||
% \Require $\abs{\block} \in \mathbb{N}$
|
||||
% \Ensure \vari{cTerms} $]in \mathbb{R}$
|
||||
%
|
||||
% \State $\vari{cTerms} \gets 0$
|
||||
% \State $\vari{isCross} \gets 0$
|
||||
% \For{$\vari{i} \text{ in } 1 \text{ to } \numsamp$}
|
||||
% \State $\bivec \gets [0]^{\abs{\block}}$
|
||||
% \State $(\vari{M}, \vari{sgn}) \gets $ \sampmon($\etree_\vari{mod}$)
|
||||
% \For{$\vari{x}_{\vari{b}, \vari{j}} \text{ in } \vari{M}$}
|
||||
% \If{$\bivec[b] = 1$}
|
||||
% \State $\vari{isCross} \gets 1$
|
||||
% \State Break
|
||||
% \Else
|
||||
% \State $\bivec[b] \gets 1$
|
||||
% \EndIf
|
||||
% \EndFor
|
||||
% \If{$\vari{isCross} = 1$}
|
||||
% \State $\vari{cTerms} \gets \vari{cTerms} + 1$
|
||||
% \State $\vari{isCross} \gets 0$
|
||||
% \EndIf
|
||||
% \EndFor
|
||||
% \State \Return $\frac{\vari{cTerms}}{\numsamp}$
|
||||
% \end{algorithmic}
|
||||
%\end{algorithm}
|
||||
|
||||
|
||||
\subsubsection{Correctness}
|
||||
|
@ -653,4 +688,11 @@ Finally, line ~\ref{alg:sample-times-product} is in $O(1)$ for a product and an
|
|||
|
||||
\end{proof}
|
||||
\qed
|
||||
|
||||
\subsection{Experimental results}
|
||||
\label{sec:experiments}
|
||||
|
||||
\input{experiments}
|
||||
|
||||
%\AR{Experimental stuff about BIDB should go in here}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
\AR{Experimental stuff about BIDB should go in here}
|
|
@ -85,7 +85,7 @@
|
|||
\newcommand{\type}{\vari{type}\xspace}
|
||||
\newcommand{\wght}{\vari{weight}\xspace}
|
||||
%types of T
|
||||
\newcommand{\var}{var}
|
||||
\newcommand{\var}{\textsc{var}}
|
||||
\newcommand{\tnum}{num}
|
||||
%%%%%%%
|
||||
\renewcommand{\algorithmicrequire}{\textbf{Input:}}
|
||||
|
|
Loading…
Reference in New Issue