diff --git a/binarybidb.tex b/binarybidb.tex index 9abdc6c..662e232 100644 --- a/binarybidb.tex +++ b/binarybidb.tex @@ -29,7 +29,7 @@ We call a polynomial $\poly\inparen{\vct{X}}$ a \emph{\abbrCTIDB-lineage polynom \noindent A block independent database \abbrBIDB $\pdb'$ models a set of worlds each of which consists of a subset of the possible tuples $\tupset'$, where $\tupset'$ is partitioned into $\numblock$ blocks $\block_i$ and all $\block_i$ are independent random events. $\pdb'$ further constrains that all $\tup\in\block_i$ for all $i\in\pbox{\numblock}$ of $\tupset'$ be disjoint events. We refer to any monomial that includes $X_\tup X_{\tup'}$ for $\tup\neq\tup'\in\block_i$ as a \emph{cancellation}. We define next a specific construction of \abbrBIDB that is useful for our work. \begin{Definition}[\abbrOneBIDB]\label{def:one-bidb} -Define a \emph{\abbrOneBIDB} to be the pair $\pdb' = \inparen{\bigtimes_{\tup\in\tupset'}\inset{0, \bound_\tup}, \bpd'},$ where $\tupset'$ is the set of possible tuples such that each $\tup \in \tupset'$ has a multiplicity domain of $\inset{0, \bound_\tup}$, with $\bound_\tup\in\mathbb{N}$. $\tupset'$ is partitioned into $\numblock$ independent blocks $\block_i,$ for $i\in\pbox{\numblock}$, of disjoint tuples. $\bpd'$ is characterized by the vector $\inparen{\prob_\tup}_{\tup\in\tupset'}$ where for every block $\block_i$, $\sum_{\tup \in \block_i}\prob_\tup \leq 1$. Given $W\in\onebidbworlds{\tupset'}$ and for $i\in\pbox{\numblock}$, let $\prob_i(W) = \begin{cases} +Define a \emph{\abbrOneBIDB} to be the pair $\pdb' = \inparen{\bigtimes_{\tup\in\tupset'}\inset{0, \bound_\tup}, \bpd'},$ where $\tupset'$ is the set of possible tuples such that each $\tup \in \tupset'$ has a multiplicity domain of $\inset{0, \bound_\tup}$, with $\bound_\tup\in\mathbb{N}$. $\tupset'$ is partitioned into $\numblock$ independent blocks $\block_i,$ for $i\in\pbox{\numblock}$, of disjoint tuples. $\bpd'$ is characterized by the vector $\inparen{\prob_\tup}_{\tup\in\tupset'}$ where for every block $\block_i$, $\sum_{\tup \in \block_i}\prob_\tup \leq 1$. Given $W\in\onebidbworlds{\tupset'}$ and for $i\in\pbox{\numblock}$, let $\prob_\tup(W) = \begin{cases} 1 - \sum_{\tup\in\block_i}\prob_\tup & \text{if }W_\tup = 0\text{ for all }\tup\in\block_i\\ 0 & \text{if there exists } \tup \neq \tup'\in\block_i; W_\tup, W_{\tup'}\neq 0\\ \prob_\tup & W_\tup \ne 0 \text{ for the unique } t\in B_i.\\ diff --git a/introduction.tex b/introduction.tex index ff0a100..51f602e 100644 --- a/introduction.tex +++ b/introduction.tex @@ -122,7 +122,7 @@ Further, our approximation algorithm works for a more general notion of bag \abb (see \Cref{subsec:tidbs-and-bidbs}). \subsection{Polynomial Equivalence}\label{sec:intro-poly-equiv} -A common encoding of probabilistic databases (e.g., in \cite{IL84a,Imielinski1989IncompleteII,4497507,DBLP:conf/vldb/AgrawalBSHNSW06} and many others) annotates tuples with lineages, propositional formulas that describe the set of possible worlds that the tuple appears in. The bag semantics analog is a provenance/lineage polynomial (see~\Cref{fig:nxDBSemantics}) $\apolyqdt$~\cite{DBLP:conf/pods/GreenKT07}, a polynomial with non-zero integer coefficients and exponents, over variables $\vct{X}$ encoding input tuple multiplicities. The lineage polynomial for result tuple $t_{out}$ evaluates to $t_{out}$'s multiplicity in a given possible world when each $X_{t_{in}}$ is replaced by the multiplicity of $t_{in}$ in the possible world. +A common encoding of probabilistic databases (e.g., in \cite{IL84a,4497507,DBLP:conf/vldb/AgrawalBSHNSW06} and many others) annotates tuples with lineages, propositional formulas that describe the set of possible worlds that the tuple appears in. The bag semantics analog is a provenance/lineage polynomial (see~\Cref{fig:nxDBSemantics}) $\apolyqdt$~\cite{DBLP:conf/pods/GreenKT07}, a polynomial with non-zero integer coefficients and exponents, over variables $\vct{X}$ encoding input tuple multiplicities. The lineage polynomial for result tuple $t_{out}$ evaluates to $t_{out}$'s multiplicity in a given possible world when each $X_{t_{in}}$ is replaced by the multiplicity of $t_{in}$ in the possible world. We drop $\query$, $\tupset$, and $\tup$ from $\apolyqdt$ when they are clear from the context or irrelevant to the discussion. We now specify the problem of computing the expectation of tuple multiplicity in the language of lineage polynomials: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -187,7 +187,7 @@ We have argued that for our specific example the expectation that we want is $\r For any \abbrCTIDB $\pdb$, $\raPlus$ query $\query$, and lineage polynomial $\poly\inparen{\vct{X}}=\poly\pbox{\query,\tupset,\tup}\inparen{\vct{X}}$, it holds that $ \expct_{\vct{W} \sim \pdassign}\pbox{\poly\inparen{\vct{W}}} = \rpoly\inparen{\probAllTup} -$, where $\probAllTup = \inparen{\prob_{\tup}}_{\tup\in\tupset}.$ +$, where $\probAllTup = \inparen{\prob_{\tup, j}}_{\tup\in\tupset, j\in\pbox{\bound}}.$ \end{Lemma} \noindent diff --git a/main.bbl b/main.bbl index d9a82df..8d93cdb 100644 --- a/main.bbl +++ b/main.bbl @@ -2,7 +2,7 @@ %%% Do NOT edit. File created by BibTeX with style %%% ACM-Reference-Format-Journals [18-Jan-2012]. -\begin{thebibliography}{51} +\begin{thebibliography}{50} %%% ==================================================================== %%% NOTE TO THE USER: you can override these defaults by providing @@ -104,8 +104,8 @@ Frohm}, \bibinfo{person}{Charles~M. Gaona}, \bibinfo{person}{Gary~D. Hachtel}, \bibinfo{person}{Enrico Macii}, \bibinfo{person}{Abelardo Pardo}, {and} \bibinfo{person}{Fabio Somenzi}.} \bibinfo{year}{1993}\natexlab{}. -\newblock \showarticletitle{Algebraic decision diagrams and their - applications}. In \bibinfo{booktitle}{\emph{IEEE CAD}}. +\newblock \showarticletitle{Algebraic Decision Diagrams and Their + Applications}. In \bibinfo{booktitle}{\emph{IEEE CAD}}. \newblock @@ -171,19 +171,20 @@ \bibitem[Curticapean and Marx(2014)]% - {DBLP:journals/corr/CurticapeanM14} + {10.1109/FOCS.2014.22} \bibfield{author}{\bibinfo{person}{Radu Curticapean} {and} - \bibinfo{person}{D{\'{a}}niel Marx}.} \bibinfo{year}{2014}\natexlab{}. -\newblock \showarticletitle{Complexity of counting subgraphs: only the - boundedness of the vertex-cover number counts}. -\newblock \bibinfo{journal}{\emph{CoRR}} \bibinfo{volume}{abs/1407.2929} - (\bibinfo{year}{2014}). + \bibinfo{person}{D\'{a}niel Marx}.} \bibinfo{year}{2014}\natexlab{}. +\newblock \showarticletitle{Complexity of Counting Subgraphs: Only the + Boundedness of the Vertex-Cover Number Counts}. In + \bibinfo{booktitle}{\emph{Proceedings of the 2014 IEEE 55th Annual Symposium + on Foundations of Computer Science}} \emph{(\bibinfo{series}{FOCS '14})}. + \bibinfo{publisher}{IEEE Computer Society}, \bibinfo{address}{USA}, + \bibinfo{pages}{130–139}. \newblock -\showeprint[arXiv]{1407.2929} +\showISBNx{9781479965175} \urldef\tempurl% -\url{http://arxiv.org/abs/1407.2929} -\showURL{% -\tempurl} +\url{https://doi.org/10.1109/FOCS.2014.22} +\showDOI{\tempurl} \bibitem[Dalvi and Suciu(2007a)]% @@ -309,15 +310,12 @@ \bibitem[Flum and Grohe(2006)]% {param-comp} -\bibfield{author}{\bibinfo{person}{J{\"{o}}rg Flum} {and} - \bibinfo{person}{Martin Grohe}.} \bibinfo{year}{2006}\natexlab{}. -\newblock \bibinfo{booktitle}{\emph{Parameterized Complexity Theory}}. -\newblock \bibinfo{publisher}{Springer}. +\bibfield{author}{\bibinfo{person}{J{\"o}rg Flum} {and} \bibinfo{person}{Martin + Grohe}.} \bibinfo{year}{2006}\natexlab{}. +\newblock \showarticletitle{Parameterized Complexity Theory}. In + \bibinfo{booktitle}{\emph{Texts in Theoretical Computer Science. An EATCS + Series}}. \newblock -\showISBNx{978-3-540-29952-3} -\urldef\tempurl% -\url{https://doi.org/10.1007/3-540-29953-X} -\showDOI{\tempurl} \bibitem[Garcia{-}Molina et~al\mbox{.}(2009)]% @@ -325,7 +323,7 @@ \bibfield{author}{\bibinfo{person}{Hector Garcia{-}Molina}, \bibinfo{person}{Jeffrey~D. Ullman}, {and} \bibinfo{person}{Jennifer Widom}.} \bibinfo{year}{2009}\natexlab{}. -\newblock \bibinfo{booktitle}{\emph{Database systems - the complete book {(2.} +\newblock \bibinfo{booktitle}{\emph{Database Systems - The Complete Book {(2.} ed.)}}. \newblock \bibinfo{publisher}{Pearson Education}. \newblock @@ -364,14 +362,6 @@ \showDOI{\tempurl} -\bibitem[Imielinski and Lipski(1989)]% - {Imielinski1989IncompleteII} -\bibfield{author}{\bibinfo{person}{T. Imielinski} {and} \bibinfo{person}{W. - Lipski}.} \bibinfo{year}{1989}\natexlab{}. -\newblock \showarticletitle{Incomplete Information in Relational Databases}. -\newblock - - \bibitem[Imieli\'nski and Lipski~Jr(1984)]% {IL84a} \bibfield{author}{\bibinfo{person}{Tomasz Imieli\'nski} {and} diff --git a/main.bib b/main.bib index 5a81474..ef84026 100644 --- a/main.bib +++ b/main.bib @@ -1,17 +1,17 @@ -@article{DBLP:journals/corr/CurticapeanM14, - author = {Radu Curticapean and - D{\'{a}}niel Marx}, - title = {Complexity of counting subgraphs: only the boundedness of the vertex-cover - number counts}, - journal = {CoRR}, - volume = {abs/1407.2929}, - year = {2014}, - url = {http://arxiv.org/abs/1407.2929}, - eprinttype = {arXiv}, - eprint = {1407.2929}, - timestamp = {Mon, 13 Aug 2018 16:48:39 +0200}, - biburl = {https://dblp.org/rec/journals/corr/CurticapeanM14.bib}, - bibsource = {dblp computer science bibliography, https://dblp.org} +@inproceedings{10.1109/FOCS.2014.22, +author = {Curticapean, Radu and Marx, D\'{a}niel}, +title = {Complexity of Counting Subgraphs: Only the Boundedness of the Vertex-Cover Number Counts}, +year = {2014}, +isbn = {9781479965175}, +publisher = {IEEE Computer Society}, +address = {USA}, +url = {https://doi.org/10.1109/FOCS.2014.22}, +doi = {10.1109/FOCS.2014.22}, +abstract = {For a class C of graphs, #Sub(C) is the counting problem that, given a graph H from C and an arbitrary graph G, asks for the number of subgraphs of G isomorphic to H. It is known that if C has bounded vertex-cover number (equivalently, the size of the maximum matching in C is bounded), then #Sub(C) is polynomial-time solvable. We complement this result with a corresponding lower bound: if C is any recursively enumerable class of graphs with unbounded vertex-cover number, then #Sub(C) is #W[1]-hard parameterized by the size of H and hence not polynomial-time solvable and not even fixed-parameter tractable, unless FPT is equal to #W[1]. As a first step of the proof, we show that counting k-matchings in bipartite graphs is #W[1]-hard. Recently, Curticapean [ICALP 2013] proved the #W[1]-hardness of counting k-matchings in general graphs, our result strengthens this statement to bipartite graphs with a considerably simpler proof and even shows that, assuming the Exponential Time Hypothesis (ETH), there is no f(k)*no(k/log(k)) time algorithm for counting k-matchings in bipartite graphs for any computable function f. As a consequence, we obtain an independent and somewhat simpler proof of the classical result of Flum and Grohe [SICOMP 2004] stating that counting paths of length k is #W[1]-hard, as well as a similar almost-tight ETH-based lower bound on the exponent.}, +booktitle = {Proceedings of the 2014 IEEE 55th Annual Symposium on Foundations of Computer Science}, +pages = {130–139}, +numpages = {10}, +series = {FOCS '14} } @misc{https://doi.org/10.48550/arxiv.2201.11524, doi = {10.48550/ARXIV.2201.11524}, @@ -79,12 +79,6 @@ series = {FOCS '02} year = {2018} } -@inproceedings{Imielinski1989IncompleteII, - title={Incomplete Information in Relational Databases}, - author={T. Imielinski and W. Lipski}, - year={1989} -} - @inproceedings{10.1145/1265530.1265571, author = {Dalvi, Nilesh and Suciu, Dan}, booktitle = {PODS}, @@ -561,7 +555,7 @@ Virginia Vassilevska Williams}, D. Hachtel and Enrico Macii and Abelardo Pardo and Fabio Somenzi}, booktitle = {IEEE CAD}, - title = {Algebraic decision diagrams and their applications}, + title = {Algebraic Decision Diagrams and Their Applications}, year = {1993} } @@ -635,25 +629,19 @@ Maximilian Schleich}, bibsource = {dblp computer science bibliography, https://dblp.org} } -@book{param-comp, - author = {J{\"{o}}rg Flum and - Martin Grohe}, - title = {Parameterized Complexity Theory}, - series = {Texts in Theoretical Computer Science. An {EATCS} Series}, - publisher = {Springer}, - year = {2006}, - url = {https://doi.org/10.1007/3-540-29953-X}, - doi = {10.1007/3-540-29953-X}, - isbn = {978-3-540-29952-3}, - timestamp = {Tue, 16 May 2017 14:24:38 +0200}, - biburl = {https://dblp.org/rec/series/txtcs/FlumG06.bib}, - bibsource = {dblp computer science bibliography, https://dblp.org} +@inproceedings{param-comp, + title={Parameterized Complexity Theory}, + author={J{\"o}rg Flum and Martin Grohe}, + booktitle={Texts in Theoretical Computer Science. An EATCS Series}, + year={2006} } + + @book{DBLP:books/daglib/0020812, author = {Hector Garcia{-}Molina and Jeffrey D. Ullman and Jennifer Widom}, - title = {Database systems - the complete book {(2.} ed.)}, + title = {Database Systems - The Complete Book {(2.} ed.)}, publisher = {Pearson Education}, year = {2009} } diff --git a/mult_distinct_p.tex b/mult_distinct_p.tex index d9ffb43..90ce08e 100644 --- a/mult_distinct_p.tex +++ b/mult_distinct_p.tex @@ -19,7 +19,7 @@ Given positive integer $k$ and undirected graph $G=(\vset,\edgeSet)$ with no sel %There exists an absolute constant $c_0>0$ such that for every $G=(\vset,\edgeSet)$, we have $\kmatchtime \ge \Omega\inparen{|E|^{c_0\cdot k}}$ for large enough $k$. %\end{hypo} -\begin{hypo}[~\cite{DBLP:journals/corr/CurticapeanM14}]\label{conj:known-algo-kmatch} +\begin{hypo}[~\cite{10.1109/FOCS.2014.22}]\label{conj:known-algo-kmatch} For every $G=\inparen{\vset, \edgeSet}$, $\kmatchtime\ge n^{\Omega\inparen{k/\log{k}}}$. \end{hypo}