Merge branch 'master' of gitlab.odin.cse.buffalo.edu:ahuber/SketchingWorlds
This commit is contained in:
commit
54ff2ef002
|
@ -16,7 +16,7 @@
|
||||||
\For{$\vari{i} \in 1 \text{ to }\numsamp$}\label{alg:sampling-loop}\Comment{Perform the required number of samples}
|
\For{$\vari{i} \in 1 \text{ to }\numsamp$}\label{alg:sampling-loop}\Comment{Perform the required number of samples}
|
||||||
\State $(\vari{M}, \vari{sgn}_\vari{i}) \gets $ \sampmon($\circuit_\vari{mod}$)\label{alg:mon-sam-sample}\Comment{\sampmon is \Cref{alg:sample}. Note that $\vari{sgn}_\vari{i}$ is the \emph{sign} of the monomial's coefficient and \emph{not} the coefficient itself}
|
\State $(\vari{M}, \vari{sgn}_\vari{i}) \gets $ \sampmon($\circuit_\vari{mod}$)\label{alg:mon-sam-sample}\Comment{\sampmon is \Cref{alg:sample}. Note that $\vari{sgn}_\vari{i}$ is the \emph{sign} of the monomial's coefficient and \emph{not} the coefficient itself}
|
||||||
\If{$\vari{M}$ has at most one variable from each block}\label{alg:check-duplicate-block}
|
\If{$\vari{M}$ has at most one variable from each block}\label{alg:check-duplicate-block}
|
||||||
\State $\vari{Y}_\vari{i} \gets \prod_{X_j\in\var\inparen{\vari{M}}}p_j$\label{alg:mon-sam-assign1}
|
\State $\vari{Y}_\vari{i} \gets \prod_{X_j\in\vari{M}}p_j$\label{alg:mon-sam-assign1}\Comment{\vari{M} is the sampled monomial's set of variables (cref. \cref{subsec:sampmon-remarks})}
|
||||||
\State $\vari{Y}_\vari{i} \gets \vari{Y}_\vari{i} \times\; \vari{sgn}_\vari{i}$\label{alg:mon-sam-product}
|
\State $\vari{Y}_\vari{i} \gets \vari{Y}_\vari{i} \times\; \vari{sgn}_\vari{i}$\label{alg:mon-sam-product}
|
||||||
\State $\accum \gets \accum + \vari{Y}_\vari{i}$\Comment{Store the sum over all samples}\label{alg:mon-sam-add}
|
\State $\accum \gets \accum + \vari{Y}_\vari{i}$\Comment{Store the sum over all samples}\label{alg:mon-sam-add}
|
||||||
\EndIf
|
\EndIf
|
||||||
|
|
|
@ -6,19 +6,20 @@
|
||||||
In \Cref{sec:hard}, we showed that computing the expected multiplicity of a compressed lineage polynomial for \ti (even just based on project-join queries), and by extension \bi (or more general \abbrPDB models) %any $\semNX$-PDB)
|
In \Cref{sec:hard}, we showed that computing the expected multiplicity of a compressed lineage polynomial for \ti (even just based on project-join queries), and by extension \bi (or more general \abbrPDB models) %any $\semNX$-PDB)
|
||||||
is unlikely to be possible in linear time (\Cref{thm:mult-p-hard-result}), even if all tuples have the same probability (\Cref{th:single-p-hard}).
|
is unlikely to be possible in linear time (\Cref{thm:mult-p-hard-result}), even if all tuples have the same probability (\Cref{th:single-p-hard}).
|
||||||
Given this, we now design an approximation algorithm for our problem that runs in {\em linear time}.\footnote{For a very broad class of circuits: please see the discussion after \Cref{lem:val-ub} for more.}
|
Given this, we now design an approximation algorithm for our problem that runs in {\em linear time}.\footnote{For a very broad class of circuits: please see the discussion after \Cref{lem:val-ub} for more.}
|
||||||
The folowing approximation algorithm applies to \bi, though our bounds are more meaningful for a non-trivial subclass of \bis that contains both \tis, as well as the PDBench benchmark~\cite{pdbench}.
|
The folowing approximation algorithm applies to \bi, though our bounds are more meaningful for a non-trivial subclass of \bis that contains both \tis, as well as the PDBench benchmark~\cite{pdbench}. As before, all proofs and pseudocode can be found in \Cref{sec:proofs-approx-alg}.
|
||||||
%it is then desirable to have an algorithm to approximate the multiplicity in linear time, which is what we describe next.
|
%it is then desirable to have an algorithm to approximate the multiplicity in linear time, which is what we describe next.
|
||||||
|
|
||||||
\subsection{Preliminaries and some more notation}
|
\subsection{Preliminaries and some more notation}
|
||||||
|
|
||||||
We now introduce useful definitions and notation related to circuits and polynomials. All proofs and missing pseudocode can be found in \Cref{sec:proofs-approx-alg}.
|
We now introduce useful definitions and notation related to circuits and polynomials.
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
%\begin{Definition}[Variables in a monomial]\label{def:vars}
|
%\begin{Definition}[Variables in a monomial]\label{def:vars}
|
||||||
% Given a monomial $v$, we use $\var(v)$ to denote the set of variables in $v$.
|
% Given a monomial $v$, we use $\var(v)$ to denote the set of variables in $v$.
|
||||||
%\end{Definition}
|
%\end{Definition}
|
||||||
|
%\noindent For example the monomial $XY$ has $\var(XY)=\inset{X,Y}$.
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\noindent For example the monomial $XY$ has $\var(XY)=\inset{X,Y}$.
|
|
||||||
|
|
||||||
|
|
||||||
\begin{Definition}[$\expansion{\circuit}$]\label{def:expand-circuit}
|
\begin{Definition}[$\expansion{\circuit}$]\label{def:expand-circuit}
|
||||||
|
@ -34,33 +35,42 @@ $\expansion{\circuit} =
|
||||||
\end{cases}
|
\end{cases}
|
||||||
$
|
$
|
||||||
\end{Definition}
|
\end{Definition}
|
||||||
For further explanation, please refer to \Cref{example:expr-tree-T}.
|
Consider $\circuit$ illustrated in \Cref{fig:circuit}. $\expansion{\circuit}$ is then $[(X, 2), (XY, -1), (XY, 4), (Y, -2)]$.
|
||||||
|
|
||||||
\begin{Definition}[$\abs{\circuit}(\vct{X})$]\label{def:positive-circuit}
|
\begin{Definition}[$\abs{\circuit}(\vct{X})$]\label{def:positive-circuit}
|
||||||
For any circuit $\circuit$, the corresponding
|
For any circuit $\circuit$, the corresponding
|
||||||
{\em positive circuit}, denoted $\abs{\circuit}$, is obtained from $\circuit$ as follows. For each leaf node $\ell$ of $\circuit$ where $\ell.\type$ is $\tnum$, update $\ell.\vari{value}$ to $|\ell.\vari{value}|$.
|
{\em positive circuit}, denoted $\abs{\circuit}$, is obtained from $\circuit$ as follows. For each leaf node $\ell$ of $\circuit$ where $\ell.\type$ is $\tnum$, update $\ell.\vari{value}$ to $|\ell.\vari{value}|$.
|
||||||
\end{Definition}
|
\end{Definition}
|
||||||
Please see \Cref{ex:def-pos-circ} for an illustration.
|
Conveniently, $\abs{\circuit}\inparen{1,\ldots,1}$ gives us the number of terms represented in $\expansion{\circuit}$, i.e. $\sum\limits_{\inparen{\monom, \coef} \in \expansion{\circuit}}\abs{\coef}$.
|
||||||
|
|
||||||
\begin{Definition}[\size($\cdot$)]\label{def:size}
|
\begin{Definition}[\size($\cdot$), \depth$\inparen{\cdot}$]\label{def:size-depth}
|
||||||
The function \size~ takes a circuit $\circuit$ as input and outputs the number of gates (nodes) in \circuit.
|
The functions \size and \depth output the number of gates and levels respectively for input \circuit.
|
||||||
\end{Definition}
|
\end{Definition}
|
||||||
|
|
||||||
\begin{Definition}[\depth($\cdot$)]
|
%\begin{Definition}[\depth($\cdot$)]
|
||||||
The function \depth~ has circuit $\circuit$ as input and outputs the number of levels in \circuit.
|
%The function \depth has circuit $\circuit$ as input and outputs the number of levels in \circuit.
|
||||||
\end{Definition}
|
%\end{Definition}
|
||||||
|
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%NEEDS to be moved to appendix
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
%\begin{Definition}[$\degree(\cdot)$]\label{def:degree}\footnote{Note that the degree of $\polyf(\abs{\circuit})$ is always upper bounded by $\degree(\circuit)$ and the latter can be strictly larger (e.g. consider the case when $\circuit$ multiplies two copies of the constant $1$-- here we have $\deg(\circuit)=1$ but degree of $\polyf(\abs{\circuit})$ is $0$).}
|
||||||
|
%$\degree(\circuit)$ is defined recursively as follows:
|
||||||
|
%\[\degree(\circuit)=
|
||||||
|
%\begin{cases}
|
||||||
|
%\max(\degree(\circuit_\linput),\degree(\circuit_\rinput)) & \text{ if }\circuit.\type=+\\
|
||||||
|
%\degree(\circuit_\linput) + \degree(\circuit_\rinput)+1 &\text{ if }\circuit.\type=\times\\
|
||||||
|
%1 & \text{ if }\circuit.\type = \var\\
|
||||||
|
%0 & \text{otherwise}.
|
||||||
|
%\end{cases}
|
||||||
|
%\]
|
||||||
|
%\end{Definition}
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%END move to appendix
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
\begin{Definition}[$\degree(\cdot)$]\label{def:degree}\footnote{Note that the degree of $\polyf(\abs{\circuit})$ is always upper bounded by $\degree(\circuit)$ and the latter can be strictly larger (e.g. consider the case when $\circuit$ multiplies two copies of the constant $1$-- here we have $\deg(\circuit)=1$ but degree of $\polyf(\abs{\circuit})$ is $0$).}
|
|
||||||
$\degree(\circuit)$ is defined recursively as follows:
|
|
||||||
\[\degree(\circuit)=
|
|
||||||
\begin{cases}
|
|
||||||
\max(\degree(\circuit_\linput),\degree(\circuit_\rinput)) & \text{ if }\circuit.\type=+\\
|
|
||||||
\degree(\circuit_\linput) + \degree(\circuit_\rinput)+1 &\text{ if }\circuit.\type=\times\\
|
|
||||||
1 & \text{ if }\circuit.\type = \var\\
|
|
||||||
0 & \text{otherwise}.
|
|
||||||
\end{cases}
|
|
||||||
\]
|
|
||||||
\end{Definition}
|
|
||||||
Finally, we will need the following notation for the complexity of multiplying large integers:
|
Finally, we will need the following notation for the complexity of multiplying large integers:
|
||||||
\begin{Definition}[$\multc{\cdot}{\cdot}$]\footnote{We note that when doing arithmetic operations on the RAM model for input of size $N$, we have that $\multc{O(\log{N})}{O(\log{N})}=O(1)$. More generally we have $\multc{N}{O(\log{N})}=O(N\log{N}\log\log{N})$.}
|
\begin{Definition}[$\multc{\cdot}{\cdot}$]\footnote{We note that when doing arithmetic operations on the RAM model for input of size $N$, we have that $\multc{O(\log{N})}{O(\log{N})}=O(1)$. More generally we have $\multc{N}{O(\log{N})}=O(N\log{N}\log\log{N})$.}
|
||||||
In a RAM model of word size of $W$-bits, $\multc{M}{W}$ denotes the complexity of multiplying two integers represented with $M$-bits. (We will assume that for input of size $N$, $W=O(\log{N})$.
|
In a RAM model of word size of $W$-bits, $\multc{M}{W}$ denotes the complexity of multiplying two integers represented with $M$-bits. (We will assume that for input of size $N$, $W=O(\log{N})$.
|
||||||
|
@ -82,12 +92,16 @@ such that
|
||||||
\end{equation}
|
\end{equation}
|
||||||
\end{Theorem}
|
\end{Theorem}
|
||||||
|
|
||||||
To get linear runtime results from \Cref{lem:approx-alg}, we will need to define another parameter modeling the (weighted) number of monomials in $\expansion{\circuit}$ to be `canceled' when it is modded with $\mathcal{B}$ (\Cref{def:mod-set-polys}).
|
To get linear runtime results from \Cref{lem:approx-alg}, we will need to define another parameter modeling the (weighted) number of monomials in %$\poly\inparen{\vct{X}}$
|
||||||
|
$\expansion{\circuit}$
|
||||||
|
to be `canceled' monomials with dependent variables are removed (\cref{def:reduced-bi-poly}). %def:hen it is modded with $\mathcal{B}$ (\Cref{def:mod-set-polys}).
|
||||||
|
Let $\isInd{\cdot}$ be a boolean function returning true if monomial $\encMon$ is composed of independent variables and false otherwise.
|
||||||
\begin{Definition}[Parameter $\gamma$]\label{def:param-gamma}
|
\begin{Definition}[Parameter $\gamma$]\label{def:param-gamma}
|
||||||
Given an expression tree $\circuit$, define
|
Given an expression tree $\circuit$, define
|
||||||
\AH{Technically, $\monom$ is a set of variables rather than a monomial. Perhaps we don't need the $\var(\cdot)$ function and can replace is with a function that returns the monomial represented by a set of variables.}
|
\AH{Technically, $\monom$ is a set of variables rather than a monomial. Perhaps we don't need the $\var(\cdot)$ function and can replace is with a function that returns the monomial represented by a set of variables. FIXED: need to propogate this to the appendix ($\encMon$)}
|
||||||
\AH{To add, this is an issue on line 1073, 1117 of app C.}
|
\AH{To add, this is an issue on line 1073, 1117 of app C.}
|
||||||
\[\gamma(\circuit)=\frac{\sum_{(\monom, \coef)\in \expansion{\circuit}} \abs{\coef}\cdot \indicator{\encMon\mod{\mathcal{B}}\equiv 0}}{\abs{\circuit}(1,\ldots, 1)}\]
|
\[\gamma(\circuit)=\frac{\sum_{(\monom, \coef)\in \expansion{\circuit}} \abs{\coef}\cdot \indicator{\neg\isInd{\encMon}} }%\encMon\mod{\mathcal{B}}\equiv 0}}
|
||||||
|
{\abs{\circuit}(1,\ldots, 1)}.\]
|
||||||
\end{Definition}
|
\end{Definition}
|
||||||
|
|
||||||
\noindent We next present a few corollaries of \Cref{lem:approx-alg}.
|
\noindent We next present a few corollaries of \Cref{lem:approx-alg}.
|
||||||
|
@ -108,7 +122,7 @@ $\abs{\circuit}(1,\ldots, 1)\le 2^{2^k\cdot \size(\circuit)}.$
|
||||||
Further, under either of the following conditions:
|
Further, under either of the following conditions:
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item $\circuit$ is a tree,
|
\item $\circuit$ is a tree,
|
||||||
\item $\circuit$ encodes the run of the algorithm in~\cite{DBLP:conf/pods/KhamisNR16} on an FAQ query,
|
\item $\circuit$ encodes the run of the algorithm in~\cite{DBLP:conf/pods/KhamisNR16} on an FAQ\AH{citation would help here, as a reviewer complaint on this was ``What is FAQ?'', though we do cite (I think) in the appendix.} query,
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
we have $\abs{\circuit}(1,\ldots, 1)\le \size(\circuit)^{O(k)}.$
|
we have $\abs{\circuit}(1,\ldots, 1)\le \size(\circuit)^{O(k)}.$
|
||||||
\end{Lemma}
|
\end{Lemma}
|
||||||
|
@ -124,14 +138,25 @@ Given a query polynomial $\poly(\vct{X})=\polyf(\circuit)$ for circuit \circuit
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\label{eq:tilde-Q-bi}
|
\label{eq:tilde-Q-bi}
|
||||||
\rpoly\inparen{X_1,\dots,X_\numvar}=\hspace*{-1mm}\sum_{(\monom,\coef)\in \expansion{\circuit}} \hspace*{-2mm} \indicator{\encMon\mod{\mathcal{B}}\not\equiv 0}\cdot \coef\cdot\hspace*{-2mm}\prod_{X_i\in \var\inparen{\monom}}\hspace*{-2mm} X_i
|
\rpoly\inparen{X_1,\dots,X_\numvar}=\hspace*{-1mm}\sum_{(\monom,\coef)\in \expansion{\circuit}} %\hspace*{-2mm}
|
||||||
|
\indicator{\isInd{\encMon}%\mod{\mathcal{B}}\not\equiv 0
|
||||||
|
}\cdot \coef\cdot\hspace*{-2mm}\prod_{X_i\in \monom}\hspace*{-2mm} X_i
|
||||||
\end{equation}
|
\end{equation}
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
\input{app_approx-alg-pseudo-code}
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%NEED to move to appendix
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%\input{app_approx-alg-pseudo-code}
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%END move to appendix
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
|
||||||
Given the above, the algorithm is a sampling based algorithm for the above sum: we sample (via \sampmon) $(\monom,\coef)\in \expansion{\circuit}$ with probability proportional %\footnote{We could have also uniformly sampled from $\expansion{\circuit}$ but this gives better parameters.}
|
Given the above, the algorithm is a sampling based algorithm for the above sum: we sample (via \sampmon) $(\monom,\coef)\in \expansion{\circuit}$ with probability proportional %\footnote{We could have also uniformly sampled from $\expansion{\circuit}$ but this gives better parameters.}
|
||||||
to $\abs{\coef}$ and compute $Y=\indicator{\monom\mod{\mathcal{B}}\not\equiv 0}\cdot \prod_{X_i\in \var\inparen{\monom}} p_i$. Taking $\numsamp$ samples and computing the average of $Y$ gives us our final estimate. \onepass is used to compute the sampling probabilities needed in \sampmon (details are in \Cref{sec:proofs-approx-alg}).
|
to $\abs{\coef}$ and compute $\vari{Y}=\indicator{\isInd{\encMon}}%\monom\mod{\mathcal{B}}\not\equiv 0}
|
||||||
|
\cdot \prod_{X_i\in \monom} p_i$. Taking $\numsamp$ samples and computing the average of $\vari{Y}$ gives us our final estimate. \onepass is used to compute the sampling probabilities needed in \sampmon (details are in \Cref{sec:proofs-approx-alg}).
|
||||||
%\approxq (\Cref{alg:mon-sam}) modifies \circuit with a call to \onepass. It then samples from $\circuit_{\vari{mod}}\numsamp$ times and uses that information to approximate $\rpoly$.
|
%\approxq (\Cref{alg:mon-sam}) modifies \circuit with a call to \onepass. It then samples from $\circuit_{\vari{mod}}\numsamp$ times and uses that information to approximate $\rpoly$.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -72,14 +72,14 @@
|
||||||
%Function Names and Typesetting %
|
%Function Names and Typesetting %
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\newcommand{\domain}{\func{Dom}}
|
\newcommand{\domain}{\func{Dom}}
|
||||||
\newcommand{\func}[1]{\textsc{#1}}
|
\newcommand{\func}[1]{\textsc{#1}\xspace}
|
||||||
\newcommand{\isInd}[1]{\func{isInd}\inparen{#1}}
|
\newcommand{\isInd}[1]{\func{isInd}\inparen{#1}}
|
||||||
\newcommand{\polyf}{\func{poly}}
|
\newcommand{\polyf}{\func{poly}}
|
||||||
\newcommand{\evalmp}{\func{eval}}
|
\newcommand{\evalmp}{\func{eval}}
|
||||||
\newcommand{\degree}{\func{deg}}
|
\newcommand{\degree}{\func{deg}}
|
||||||
\newcommand{\size}{\func{size}}
|
\newcommand{\size}{\func{size}}
|
||||||
\newcommand{\depth}{\func{depth}}
|
\newcommand{\depth}{\func{depth}}
|
||||||
\newcommand{\topord}{\func{TopOrd}\xspace}
|
\newcommand{\topord}{\func{TopOrd}}
|
||||||
\newcommand{\smbOf}[1]{\func{\abbrSMB}\inparen{#1}}
|
\newcommand{\smbOf}[1]{\func{\abbrSMB}\inparen{#1}}
|
||||||
%Verify if we need the above...
|
%Verify if we need the above...
|
||||||
%saving \treesize for now to keep latex from breaking
|
%saving \treesize for now to keep latex from breaking
|
||||||
|
@ -235,7 +235,7 @@
|
||||||
\newcommand{\mtrix}[1]{M_{#1}}
|
\newcommand{\mtrix}[1]{M_{#1}}
|
||||||
\newcommand{\dtrm}[1]{Det\left(#1\right)}
|
\newcommand{\dtrm}[1]{Det\left(#1\right)}
|
||||||
\newcommand{\tuple}[1]{\left<#1\right>}
|
\newcommand{\tuple}[1]{\left<#1\right>}
|
||||||
\newcommand{\indicator}[1]{\onesymbol\inparen{#1}}
|
\newcommand{\indicator}[1]{\underset{#1}{\onesymbol}}
|
||||||
%----------------------------------------------
|
%----------------------------------------------
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
Loading…
Reference in a new issue