Changes addressing reviewer comments.

master
Aaron Huber 2021-08-30 22:50:21 -04:00
parent d466b2eaa8
commit c384d5f21b
8 changed files with 31 additions and 23 deletions

Binary file not shown.

View File

@ -10,7 +10,7 @@ For the sake of contradiction, let us assume we can solve our problem in $f(\kEl
&\le \inparen{O(\kElem^3) + f(\kElem)}\cdot m^{c+1} \\
&\le \inparen{O(\kElem^3) + f(\kElem)}\cdot n^{2c+2},
\end{align*}
which contradicts \Cref{thm:k-match-hard}.
which together with \Cref{thm:k-match-hard} contradicts the conjecture that $\sharpwone$ problems cannot be solved in $f(k)\cdot \numvar^c$ time.
\qed
\end{proof}

View File

@ -34,7 +34,7 @@ Note that an assignment $\assign: \vct{X} \to \{0,1\}^\numvar$ can be represente
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
For instance, consider a $\pxdb$ consisting of a single tuple $\tup_1 = (1)$ annotated with $X_1 + X_2$ with probability distribution $\probOf([0,0]) = 0$, $\probOf([0,1]) = 0$, $\probOf([1,0]) = 0.3$ and $\probOf([1,1]) = 0.7$. This $\semNX$-PDB encodes two possible worlds (with non-zero) probability that we denote using their world vectors.
For instance, consider a $\pxdb$ consisting of a single tuple $\tup_1 = (1)$ annotated with $X_1 + X_2$ with probability distribution $\probOf([0,0]) = 0$, $\probOf([0,1]) = 0$, $\probOf([1,0]) = 0.3$ and $\probOf([1,1]) = 0.7$. This $\semNX$-PDB encodes two possible worlds (with non-zero probability) that we denote using their world vectors.
%
\[
D_{[0,1]}(\tup_1) = 1 \hspace{0.3cm} \mathbf{and} \hspace{0.3cm} D_{[1,1]}(\tup_1) = 2
@ -91,7 +91,7 @@ As already noted above, in this work, we define \tis and \bis as subclasses of $
In this work, we consider one further deviation from the standard: We use bag semantics for queries.
Even though tuples cannot occur more than once in the input \ti or \bi, they can occur with a multiplicity larger than one in the result of a query.
Since in \tis and \bis, there is a one-to-one correspondence between tuples in the database and variables, we can interpret a vector $\vct{w} \in \{0,1\}^n$ as denoting which tuples exist in the possible world $\assign_{\vct{w}}(\pxdb)$ (the ones where $\vct{w}[j] = 1$).
For BIDBs specifically, note that that at most one of the bits corresponding to tuples in each block will be set (i.e., for any pair of bits $w_j$, $w_{j'}$ that are part of the same block $b_i \supseteq \{t_{i,j}, t_{i,j'}\}$, at most one of them will be set).
For BIDBs specifically, note that at most one of the bits corresponding to tuples in each block will be set (i.e., for any pair of bits $w_j$, $w_{j'}$ that are part of the same block $b_i \supseteq \{t_{i,j}, t_{i,j'}\}$, at most one of them will be set).
Denote the vector $\vct{p}$ to be a vector whose elements are the individual probabilities $\prob_i$ of each tuple $\tup_i$. Let $\pd^{(\vct{p})}$ denote the distribution induced by $\vct{p}$.
%
\begin{align}\label{eq:tidb-expectation}
@ -160,17 +160,18 @@ Note that any $\poly$ in factorized form is equivalent to its \abbrSMB expansion
\begin{proof}
Let $\poly$ be the generalized polynomial, i.e., the polynomial of $\numvar$ variables with highest degree $= B$: %, in which every possible monomial permutation appears,
\[\poly(X_1,\ldots, X_\numvar) = \sum_{\vct{d} \in \{0,\ldots, B\}^\numvar}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar X_i^{d_i}.\]
Then, denoting the corresponding exponent vector $\vct{d}$ for a world $\vct{\wElem}$ over the set of valid worlds $\valworlds$ as $\vct{d} \in \valworlds$, in expectation we have
Then, %denoting the corresponding exponent vector $\vct{d}$ for a world $\vct{\wElem}$ over the set of valid worlds $\valworlds$ as $\vct{d} \in \valworlds$,
in expectation we have
\begin{align}
\expct_{\vct{W}}\pbox{\poly(\vct{W})} &= \sum_{\vct{d} \in \eta}c_{\vct{d}}\cdot \expct_{\vct{w}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar w_i^{d_i}}\label{p1-s1}\\
&= \sum_{\vct{d} \in \eta}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i^{d_i}}\label{p1-s2}\\
&= \sum_{\vct{d} \in \eta}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{w}}\pbox{w_i}\label{p1-s3}\\
&= \sum_{\vct{d} \in \eta}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\
&= \rpoly(\prob_1,\ldots, \prob_\numvar)\label{p1-s5}
\expct_{\vct{W}}\pbox{\poly(\vct{W})} &= \sum_{\vct{d} \in \{0,\ldots,B\}^\numvar}c_{\vct{d}}\cdot \expct_{\vct{W}}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar w_i^{d_i}}\label{p1-s1}\\
&= \sum_{\vct{d} \in \{0,\ldots,B\}^\numvar}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{W}}\pbox{w_i^{d_i}}\label{p1-s2}\\
&= \sum_{\vct{d} \in \{0,\ldots,B\}^\numvar}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \expct_{\vct{W}}\pbox{w_i}\label{p1-s3}\\
&= \sum_{\vct{d} \in \{0,\ldots,B\}^\numvar}c_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numvar \prob_i\label{p1-s4}\\
&= \rpoly(\prob_1,\ldots, \prob_\numvar).\label{p1-s5}
\end{align}
In steps \cref{p1-s1} and \cref{p1-s2}, by linearity of expectation (recall that by \bi constraints, the variables are independent, otherwise the monomial expectation is 0), the expecation can be pushed all the way inside of the product. In \cref{p1-s3}, note that $w_i \in \{0, 1\}$ which further implies that for any exponent $e \geq 1$, $w_i^e = w_i$. Next, in \cref{p1-s4} the expectation of a tuple is indeed its probability.
\Cref{p1-s1} is the result of the following facts. First, the only worlds contributing to the expectation are the valid worlds, i.e. those worlds each of which produce a polynomial whose monomials are all made up of independent variables. Second, linearity of expectation combined with the fact that any non-random variable can be pulled out of the expectation allow for the expectation to be pushed through the sum and coefficient. \Cref{p1-s2} is obtained by the independence property of \abbrBIDB\xplural, where any valid possible world is made up of independent tuples, and this allows for the expectation to be pushed through the product. In \cref{p1-s3}, note that $w_i \in \{0, 1\}$ which further implies that for any exponent $e \geq 1$, $w_i^e = w_i$. Next, in \cref{p1-s4} the expectation of a tuple is indeed its probability.
Finally, observe \Cref{p1-s5} by construction in \Cref{lem:pre-poly-rpoly}, that $\rpoly(\prob_1,\ldots, \prob_\numvar)$ is exactly the product of probabilities of each variable in each monomial across the entire sum.
Finally, observe \Cref{p1-s5}, where by construction in \Cref{lem:pre-poly-rpoly}, that $\rpoly(\prob_1,\ldots, \prob_\numvar)$ is exactly the product of probabilities of each variable in each monomial and its corresponding coefficient, across the entire sum.
\qed
\end{proof}

View File

@ -42,9 +42,7 @@ Since $e\ne e'$, this case produces the following edge patterns: $\twopath, \two
Since $\prob$ is fixed, \Cref{lem:qE3-exp} gives us one linear equation in $\numocc{G}{\tri}$ and $\numocc{G}{\threedis}$ (we can handle the other counts due to equations (\ref{eq:1e})-(\ref{eq:3p-3tri})). However, we need to generate one more independent linear equation in these two variables. Towards this end we generate another graph related to $G$:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}\label{def:Gk}
For $\ell > 1$, let graph $\graph{\ell}$ be a graph generated from an arbitrary graph $\graph{1}$, by replacing every edge $e$ of $\graph{1}$ with a $\ell$-path, such that all inner vertexes of an $\ell$-path replacement edge are disjoint from all other vertexes of any other $\ell$-path replacement edge. % in the sense that they only intersect at the original intersection endpoints as seen in $\graph{1}$.
\end{Definition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Next, we relate the various sub-graph counts in $\graph{2}$ to $\graph{1}$ ($G$).

View File

@ -66,9 +66,10 @@ In a RAM model of word size of $W$-bits, $\multc{M}{W}$ denotes the complexity o
\end{Definition}
\subsection{Our main result}
\AH{Verify that the proof for \cref{lem:approx-alg} doesn't rely on properties of $\raPlus$ or \abbrBIDB.}
\begin{Theorem}\label{lem:approx-alg}
Let \circuit be a circuit for a UCQ over \bi and define $\poly(\vct{X})=\polyf(\circuit)$ and let $k=\degree(\circuit)$.
Let \circuit be an arbitrary arithmetic circuit %for a UCQ over \bi
and define $\poly(\vct{X})=\polyf(\circuit)$ and let $k=\degree(\circuit)$.
Then an estimate $\mathcal{E}$ of $\rpoly(\prob_1,\ldots, \prob_\numvar)$ can be computed in time
{\small
\[O\left(\left(\size(\circuit) + \frac{\log{\frac{1}{\conf}}\cdot \abs{\circuit}^2(1,\ldots, 1)\cdot k\cdot \log{k} \cdot \depth(\circuit))}{\inparen{\error}^2\cdot\rpoly^2(\prob_1,\ldots, \prob_\numvar)}\right)\cdot\multc{\log\left(\abs{\circuit}(1,\ldots, 1)\right)}{\log\left(\size(\circuit)\right)}\right)\]

View File

@ -122,7 +122,7 @@ Note that \Cref{def:circuit-set} implies that $\circuit \in \circuitset{\polyf(\
\noindent We are now ready to formally state our \textbf{main problem}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{Definition}[The Expected Result Multiplicity Problem]\label{def:the-expected-multipl}
Let $\vct{X} = (X_1, \ldots, X_n)$, and $\pxdb$ be an $\semNX$-PDB over $\vct{X}$ with probability distribution $\pd$ over assignments $\vct{X} \to \{0,1\}$, $\query$ an n-ary query, and $t$ an n-ary tuple.
Let $\vct{X} = (X_1, \ldots, X_n)$ and $\pxdb$ be an arbitrary $\semNX$-PDB over $\vct{X}$ with probability distribution $\pd$ over assignments $\vct{X} \to \{0,1\}$. Fix a query $\query$ and an output tuple $\tup$.
The \expectProblem is defined as follows:\\[-7mm]
\begin{center}
\textbf{Input}: A circuit $\circuit \in \circuitset{\polyX}$ for $\polyX = \query(\pxdb)(t)$

View File

@ -16,15 +16,16 @@ For a probabilistic database $\pdb = (\idb, \pd)$, the result of a query is th
Let $\semNX$ denote the set of polynomials over variables $\vct{X}=(X_1,\dots,X_\numvar)$ with natural number coefficients and exponents.
We model incomplete relations using Green et. al.'s $\semNX$-databases~\cite{DBLP:conf/pods/GreenKT07}, discussed in detail in \Cref{subsec:supp-mat-krelations}.
$\semNX$-relations are functions from tuples to elements of $\semNX$, typically called annotations.
We write $R(t)$ to denote the polynomial annotating tuple $t$ in relation $R$. Note that $R(t)$ is the lineage polynomial for $t$.
Each possible world is defined by an assignment of $\numvar$ binary values $\vct{\wElem} \in \{0, 1\}^{\numvar}$ to $\vct{X}$.
The multiplicity of $t \in R$ in this possible world, denoted $R(t)(\vct{\wElem})$, is obtained by evaluating the polynomial annotating $t$ on $\vct{\wElem}$.
$\semNX$-databases are functions from tuples to elements of $\semNX$, typically called annotations.
Given an $\semNX$-database $\db$, it is common to use $\db(\tup)$ to denote the polynomial annotating tuple $\tup$ in $\db$.
%Note that based on this definition of $\rel$, $\rel(\tup)$ is the lineage polynomial for $\tup$.
Let $\numvar$ be the number of tuples in $\pdb$. Then, each possible world is defined by an assignment of $\numvar$ binary values $\vct{\wElem} \in \{0, 1\}^{\numvar}$ to $\vct{X}$.
The multiplicity of $\tup \in \db$, denoted $\db(\tup)(\vct{\wElem})$, is obtained by evaluating the polynomial annotating $\tup$ on $\vct{\wElem}$.
$\semNX$-relations are closed under $\raPlus$ (\Cref{fig:nxDBSemantics}).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We will use $\semNX$-\abbrPDB $\pxdb$, defined as the tuple $(\idb_{\semNX}, \pd)$, where $\semNX$-database $\idb_{\semNX}$ is paired with probability distribution $\pd$.
We will use $\semNX$-\abbrPDB $\pxdb$, defined as the tuple $(\idb_{\semNX}, \pd)$, where $\semNX$-database $\idb_{\semNX}$ is paired with probability distribution $\pd$ over the assignments to $\vct{X}$.
We denote by $\polyForTuple$ the annotation of tuple $t$ in the result of $\query$ on an implicit $\semNX$-\abbrPDB (i.e., $\polyForTuple = \query(\pxdb)(t)$ for some $\pxdb$) and as before, interpret it as a function $\polyForTuple: \{0,1\}^{\numvar} \rightarrow \semN$ from vectors of variable assignments to the corresponding value of the annotating polynomial.
$\semNX$-\abbrPDB\xplural and a function $\rmod$ (which transforms an $\semNX$-\abbrPDB to a classical bag-\abbrPDB, or $\semN$-\abbrPDB~\cite{DBLP:conf/pods/GreenKT07,feng:2019:sigmod:uncertainty}) are both formalized in \Cref{subsec:supp-mat-background}.
\begin{Proposition}[Expectation of polynomials]\label{prop:expection-of-polynom}

View File

@ -36,9 +36,16 @@ in $O\inparen{T(\numedge) + \numedge}$ time.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
To prove \cref{th:single-p}, we use the following notion.
\begin{Definition}\label{def:Gk}
For $\ell \geq 1$, let graph $\graph{\ell}$ be a graph generated from an arbitrary graph $G$, by replacing every edge $e$ of $G$ with a $\ell$-path, such that all inner vertexes of an $\ell$-path replacement edge are disjoint from all other vertexes.\footnote{Note that $G\equiv \graph{1}$.}% of any other $\ell$-path replacement edge. % in the sense that they only intersect at the original intersection endpoints as seen in $\graph{1}$.
\end{Definition}
The following result immediately implies \Cref{th:single-p}:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\AH{We need to introduce the meaning of the new notation $\graph{\ell}$.}
\begin{Lemma}\label{lem:lin-sys}
Fix $\prob\in (0,1)$. Given $\rpoly_{\graph{\ell}}^3(\prob,\dots,\prob)$ for $\ell\in [2]$, we can compute in $O(m)$ time a vector $\vct{b}\in\mathbb{R}^3$ such that
\[ \begin{pmatrix}