From ff97b625692659178d9215a7d5c44ee05ef87796 Mon Sep 17 00:00:00 2001 From: Atri Rudra Date: Wed, 8 Jun 2022 03:26:24 +0000 Subject: [PATCH] Update on Overleaf. --- approx_alg.tex | 9 +++++---- binarybidb.tex | 12 +++++++----- circuits-model-runtime.tex | 4 ++-- introduction.tex | 14 +++++++------- mult_distinct_p.tex | 12 ++++++------ prob-def.tex | 4 ++-- pwsem.tex | 5 +++-- single_p.tex | 2 +- 8 files changed, 33 insertions(+), 29 deletions(-) diff --git a/approx_alg.tex b/approx_alg.tex index 0f8f187..8c0a580 100644 --- a/approx_alg.tex +++ b/approx_alg.tex @@ -9,11 +9,12 @@ The following approximation algorithm applies to bag query semantics over both Our experimental results (see~\Cref{app:subsec:experiment}), which use queries from the PDBench benchmark~\cite{pdbench} support the notion that our bounds hold for general \abbrBIDB in practice. % % -Corresponding proofs and pseudocode for all formal statements and algorithms - can be found in \Cref{sec:proofs-approx-alg}. +Proofs and pseudocode for all formal statements and algorithms + are in \Cref{sec:proofs-approx-alg}. \subsection{Preliminaries and some more notation} +For notational convenience, in this section we will assume that \dbbaseName $\tupset'=[n]$. We now introduce definitions and notation related to circuits and polynomials that we will need to state our upper bound results. First we introduce the expansion $\expansion{\circuit}$ of circuit $\circuit$ which is used in our auxiliary algorithm \sampmon for sampling monomials when computing the approximation. @@ -129,7 +130,7 @@ $1$-\abbrTIDB (where $\gamma=0$ in the equivalent $1$-\abbrBIDB of~\Cref{prop:ct as well as for all three queries of the PDBench \abbrBIDB benchmark (\Cref{app:subsec:experiment}). %We prove \Cref{cor:approx-algo-punchline-ctidb} from \Cref{eq:approx-algo-runtime} via the following sequence of arguments. -Next, by \Cref{prop:circuit-depth} and \Cref{lem:circ-model-runtime} for any $\raPlus$ query $\query$, there exists a circuit $\circuit^*$ for $\apolyqdt$ such that $\depth(\circuit^*)\le O_{|Q|}(\log{n})$ and $\size(\circuit^*)\le O_k\inparen{\qruntime{\query, \tupset, \bound}}$. Then, we note that \Cref{prop:ctidb-reduct} gives us an equivalent $\circuit$ from $\circuit^*$ is essentially the same size and has $\gamma(\circuit)\le 1-c^{-\Omega(k)}$ (\Cref{lem:ctidb-gamma}). Finally, we argue (using the fact $\circuit^*$ has low depth) that $\abs{\circuit^*}(1,\dots,1)\le \size(\circuit^*)^{O_k(1)}$ (\Cref{lem:val-ub}). +Next, by \Cref{prop:circuit-depth} and \Cref{lem:circ-model-runtime} for any $\raPlus$ query $\query$, there exists a circuit $\circuit^*$ for $\apolyqdt$ such that $\depth(\circuit^*)\le O_{|Q|}(\log{n})$ and $\size(\circuit^*)\le O_k\inparen{\qruntime{\query, \tupset, \bound}}$. Then, we note that \Cref{prop:ctidb-reduct} gives us an equivalent $\circuit$ from $\circuit^*$ with essentially the same size/depth and has $\gamma(\circuit)\le 1-c^{-\Omega(k)}$ (\Cref{lem:ctidb-gamma}). Finally, we argue (using the fact $\circuit$ has low depth) that $\abs{\circuit}(1,\dots,1)\le \size(\circuit)^{O_k(1)}$ (\Cref{lem:val-ub}). %Next, we note that the above result %along with \Cref{lem:ctidb-gamma} The above sequence of arguments results in the following result (which answers \Cref{prob:big-o-joint-steps} in the affirmative): \begin{Corollary} @@ -137,7 +138,7 @@ The above sequence of arguments results in the following result (which answers \ Let $\query$ be an $\raPlus$ query and $\pdb$ be a \abbrCTIDB with $p_0>0$, where $p_0$ as in \Cref{cor:approx-algo-const-p}, is an absolute constant. Let $\poly(\vct{X})=\apolyqdt$ for any result tuple $\tup$ with $\deg(\poly)=k$. Then one can compute an approximation satisfying \Cref{eq:approx-algo-bound-main} in time $O_{k,|Q|,\error',\conf,\bound}\inparen{\qruntime{\optquery{\query}, \tupset, \bound}}$ (given $\query,\tupset$ and $\prob_{\tup, j}$ for each $\tup\in\tupset,~j\in\pbox{\bound}$ that defines $\bpd$). \end{Corollary} -If we want to approximate the expected multiplicities of all $Z=O(n^k)$ result tuples $\tup$ simultaneously, we just need to run the above result with $\conf$ replaced by $\frac \conf Z$. Note this increases the runtime by only a logarithmic factor. +If we want to approximate the expected multiplicities of all $Z=O(n^k)$ result tuples $\tup$ simultaneously, we just need to run the above result with $\conf$ replaced by $\frac \conf Z$, which increases the runtime by a factor of $O_k(\log{n})$. diff --git a/binarybidb.tex b/binarybidb.tex index ad70ded..c73b8bf 100644 --- a/binarybidb.tex +++ b/binarybidb.tex @@ -8,7 +8,7 @@ Given an index set $S$ and variables $X_\tup$ for $\tup\in S$, a (general) polyn is formally defined as: \begin{align} \label{eq:sop-form} -\genpoly\inparen{\inparen{X_\tup}_{\tup\in S}}=\sum_{\vct{d}=\inparen{d_\tup}_{\tup\in S}\in\{0,\ldots,\hideg\}^{S}} c_{\vct{d}}\cdot \prod_{\tup\in S}X_\tup^{d_\tup}&&\text{ where } c_{\vct{d}}\in \semN. +\genpoly\inparen{\inparen{X_\tup}_{\tup\in S}}=\sum_{\vct{d}=\inparen{d_\tup}_{\tup\in S}\in[0,\hideg]^{S}} c_{\vct{d}}\cdot \prod_{\tup\in S}X_\tup^{d_\tup}&&\text{ where } c_{\vct{d}}\in \semN. \end{align} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -26,23 +26,25 @@ We call a polynomial $\poly\inparen{\vct{X}}$ a \emph{\abbrCTIDB-lineage polynom \subsection{\abbrOneBIDB}\label{subsec:one-bidb} \label{subsec:tidbs-and-bidbs} -\noindent A block independent database \abbrBIDB $\pdb'$ models a set of worlds each of which consists of a subset of the possible tuples $\tupset'$, where $\tupset'$ is partitioned into $\numblock$ blocks $\block_i$ and the events $\tup\in\block_i$ and $\tup\in\block_j$ are independent for $i\ne j$. $\pdb'$ further constrains that all $\tup\in\block_i$ for all $i\in\pbox{\numblock}$ of $\tupset'$ be disjoint events. We refer to any monomial that includes $X_\tup X_{\tup'}$ for $\tup\neq\tup'\in\block_i$ as a \emph{cancellation}. We define next a specific construction of \abbrBIDB that is useful for our work. +\noindent A block independent database \abbrBIDB $\pdb'$ models a set of worlds each of which consists of a subset of the \dbbaseName $\tupset'$, where $\tupset'$ is partitioned into $\numblock$ blocks $\block_i$ and the events $\tup\in\block_i$ and $\tup\in\block_j$ are independent for $i\ne j$. $\pdb'$ further constrains that all $\tup\in\block_i$ for all $i\in\pbox{\numblock}$ of $\tupset'$ be disjoint events. +%We refer to any monomial that includes $X_\tup X_{\tup'}$ for $\tup\neq\tup'\in\block_i$ as a \emph{cancellation}. +We define next a specific construction of \abbrBIDB that is useful for our work. \begin{Definition}[\abbrOneBIDB]\label{def:one-bidb} -Define a \emph{\abbrOneBIDB} to be the pair $\pdb' = \inparen{\bigtimes_{\tup\in\tupset'}\inset{0, \bound_\tup}, \bpd'},$ where $\tupset'$ is the set of possible tuples such that each $\tup \in \tupset'$ has a multiplicity domain of $\inset{0, \bound_\tup}$, with $\bound_\tup\in\mathbb{N}$. $\tupset'$ is partitioned into $\numblock$ independent blocks $\block_i,$ for $i\in\pbox{\numblock}$, of disjoint tuples. $\bpd'$ is characterized by the vector $\inparen{\prob_\tup}_{\tup\in\tupset'}$ where for every block $\block_i$, $\sum_{\tup \in \block_i}\prob_\tup \leq 1$. Given $W\in\onebidbworlds{\tupset'}$ and for $i\in\pbox{\numblock}$, let $\prob_\tup(W) = \begin{cases} +Define a \emph{\abbrOneBIDB} to be the pair $\pdb' = \inparen{\bigtimes_{\tup\in\tupset'}\inset{0, \bound_\tup}, \bpd'},$ where $\tupset'$ is the \dbbaseName such that each $\tup \in \tupset'$ has a multiplicity in $\inset{0, \bound_\tup}$, with $\bound_\tup\in\mathbb{N}$. $\tupset'$ is partitioned into $\numblock$ independent blocks $\block_i,$ for $i\in\pbox{\numblock}$, of disjoint tuples. $\bpd'$ is characterized by the vector $\inparen{\prob_\tup}_{\tup\in\tupset'}$ where for every block $\block_i$, $\sum_{\tup \in \block_i}\prob_\tup \leq 1$. For $W\in\onebidbworlds{\tupset'}$ and $i\in\pbox{\numblock}$, let $\prob_i(W) = \begin{cases} 1 - \sum_{\tup\in\block_i}\prob_\tup & \text{if }W_\tup = 0\text{ for all }\tup\in\block_i\\ 0 & \text{if there exists } \tup \neq \tup'\in\block_i; W_\tup, W_{\tup'}\neq 0\\ \prob_\tup & W_\tup \ne 0 \text{ for one unique } t\in B_i.\\ \end{cases}$ -\noindent$\bpd'$ is the probability distribution across all worlds such that, given $W\in\bigtimes_{\tup \in \tupset'}\inset{0,\bound_\tup}$, $\probOf\pbox{\worldvec = W} = \prod_{\tup\in\tupset'}\prob_{\tup}(W)$. +\noindent$\bpd'$ is the probability distribution across all worlds such that, given $W\in\bigtimes_{\tup \in \tupset'}\inset{0,\bound_\tup}$, $\probOf\pbox{\worldvec = W} = \prod_{i\in [m]}\prob_{i}(W)$. %\footnote{We slightly abuse notation here, denoting a world vector as $W$ rather than $\worldvec$ to distinguish between the random variable and the world instance. When there is no ambiguity, we will denote a world vector as $\worldvec$.} \end{Definition} Lineage polynomials for arbitrary \dbbaseName $\gentupset'$ are constructed in a manner analogous to $1$-\abbrTIDB\xplural (see \Cref{fig:nxDBSemantics}), differing only in the base case. In a $1$-\abbrTIDB, each tuple contributes a multiplicity of 0 or 1, and $\polyqdt{\rel}{\gentupset}{\tup} = X_\tup$. %\textcolor{red}{CHANGE} In a \abbrOneBIDB, each tuple $\tup\in\tupset'$ contributes its corresponding multiplicity: %\textcolor{red}{CHANGE} -$\polyqdt{\rel}{\gentupset}{\tup} = c_\tup\cdot X_\tup$. These semantics are fully detailed in \Cref{fig:lin-poly-bidb}. +$\polyqdt{\rel}{\gentupset}{\tup} = c_\tup\cdot X_\tup$. See \Cref{fig:lin-poly-bidb} for details. \abbrOneBIDB are powerful enough to encode \abbrCTIDB: \begin{Proposition}[\abbrCTIDB reduction]\label{prop:ctidb-reduct} diff --git a/circuits-model-runtime.tex b/circuits-model-runtime.tex index 156e007..394d93c 100644 --- a/circuits-model-runtime.tex +++ b/circuits-model-runtime.tex @@ -24,7 +24,7 @@ % In practice there is often a limited number of alternatives for each block (e.g., which of five conflicting data sources to trust). Note that all \tis trivially fulfill this condition (i.e., $c = 1$).} %That is for \bis that fulfill this restriction approximating the expectation of results of SPJU queries is only has a constant factor overhead over deterministic query processing (using one of the algorithms for which we prove the claim). % with the same complexity as it would take to evaluate the query on a deterministic \emph{bag} database of the same size as the input PDB. -In~\Cref{sec:intro}, we introduced the structure $T_{det}\inparen{\cdot}$ to analyze the runtime complexity of~\Cref{prob:expect-mult}. +In~\Cref{sec:intro}, we introduced the function $T_{det}\inparen{\cdot}$ to analyze the runtime complexity of~\Cref{prob:expect-mult}. To decouple our results from any specific join algorithm, we first lower bound the cost of a join. \begin{Definition}[Join Cost] @@ -69,7 +69,7 @@ We assume that full table scans are used for every base relation access. We can %Observe that % () .\footnote{This claim can be verified by e.g. simply looking at the {\em Generic-Join} algorithm in~\cite{skew} and {\em factorize} algorithm in~\cite{factorized-db}.} It can be verified that the above cost model on the corresponding $\raPlus$ join queries correctly captures the runtime of current best known . -\Cref{lem:circ-model-runtime} and \Cref{lem:tlc-is-the-same-as-det} show that for any $\raPlus$ query $\query$ and $\tupset$, there exists a circuit $\circuit^*$ such that $\timeOf{\abbrStepOne}(Q,\tupset,\circuit^*)$ and $|\circuit^*|$ are both $O(\qruntimenoopt{\optquery{\query}, \tupset,\bound})$, as we assumed when moving from \Cref{prob:big-o-joint-steps} to \Cref{prob:intro-stmt}. Lastly, we can handle FAQs and factorized databases by allowing for optimization. %, i.e. $\qruntimenoopt{\optquery{\query}, \gentupset, \bound}$. +\Cref{lem:circ-model-runtime} and \Cref{lem:tlc-is-the-same-as-det} show that for any $\raPlus$ query $\query$ and $\tupset$, there exists a circuit $\circuit^*$ such that $\timeOf{\abbrStepOne}(Q,\tupset,\circuit^*)$ and $|\circuit^*|$ are both $O(\qruntimenoopt{\optquery{\query}, \tupset,\bound})$, as we assumed when moving from \Cref{prob:big-o-joint-steps} to \Cref{prob:intro-stmt}. Lastly, we can handle FAQs/AJAR queries and factorized databases by allowing for optimization. %, i.e. $\qruntimenoopt{\optquery{\query}, \gentupset, \bound}$. % %We now make a simple observation on the above cost model: %\begin{proposition} diff --git a/introduction.tex b/introduction.tex index 8f4bc69..17f7ed7 100644 --- a/introduction.tex +++ b/introduction.tex @@ -256,7 +256,7 @@ For example, if we insist that $\circuit$ represent the lineage polynomial in \a Hence, just $\timeOf{\abbrStepOne}(\query,\tupset,\circuit)$ is too large. However, systems can directly emit compact, factorized representations of $\poly(\vct{X})$ (e.g., as a consequence of the standard projection push-down optimization~\cite{DBLP:books/daglib/0020812}). Accordingly, this work uses (arithmetic) circuits\footnote{ - An arithmetic circuit is a DAG with variable/numeric source gates and multiplication/addition internal/sink gates. + An arithmetic circuit is a DAG with variable/numeric source gates and multiplication/addition internal gates. } as the representation system of $\poly(\vct{X})$, and we show in \Cref{sec:circuit-depth} an $\bigO{\qruntime{\optquery{\query}, \tupset, \bound}}$ algorithm for constructing the lineage polynomial for all result tuples of an $\raPlus$ query $\query$ (or more precisely, a circuit $\circuit$ with $\numvar$ sinks, one per output tuple).% representing the tuple's lineage). % @@ -270,24 +270,23 @@ Given one circuit $\circuit$ that encodes $\Phi\inparen{\vct{X}}$ for all result \end{Problem} We will formalize the notions of circuits and hence, \Cref{prob:intro-stmt} in \Cref{sec:expression-trees}. For an upper bound on approximating the expected count, it is easy to check that if all the probabilties are constant then (with an additive adjustment) $\poly\left(\prob_1,\dots, \prob_n\right)$ is a constant factor approximation of $\rpoly$ (recall \Cref{def:reduced-poly}). -This is illustrated in the following example using $\query_1^2$ from earlier. To aid in presentation we again limit our focus to $\refpoly{1, }^{\inparen{ABU}^2}$, assume $\bound = 2$ for variable $U$ and $\bound = 1$ for all other variables. Let $\prob_A$ denote $\probOf\pbox{A = 1}$. +This is illustrated in the following example using $\query_1^2$ from earlier. To aid in presentation we again limit our focus to $\monomial{1,R}$, assume $\bound = 2$ for variable $U$ and $\bound = 1$ for all other variables. Let $\prob_A$ denote $\probOf\pbox{A = 1}$. %In computing $\rpoly$, we have some cancellations to deal with: Then we have: % %\begin{footnotesize} %\begin{equation*} -$\refpoly{1, }^{\inparen{ABU}^2}\inparen{\vct{X}} = A^2\inparen{U_1^2 + 4U_1U_2 + 4U_2^2}B^2 =A^2U_1^2B^2 + 4A^2U_1U_2B^2+4A^2U_2^2B^2$ +$\monomial{1,R}\inparen{\vct{X}} = A^2\inparen{U_1^2 + 4U_1U_2 + 4U_2^2}B^2 =A^2U_1^2B^2 + 4A^2U_1U_2B^2+4A^2U_2^2B^2$, which in turn implies: %&\qquad+ 2AX_2B^2YE + 2AX_1B^2ZC + 2AX_2B^2ZC + 2B^2YEZC\\ %\end{equation*} %\end{footnotesize} %Recall that %\begin{footnotesize} %\begin{equation*} -$\rpoly_1^{\inparen{ABU}^2}\inparen{\vct{X}} = AU_1B+4AU_2B$ + %\end{equation*} %\end{footnotesize} -implies: -\[ \refpoly{1, }^{\inparen{ABU}^2}\inparen{\probAllTup} -4\prob_A^2\prob_{U_1}\prob_{U_2}\prob_B^2=\prob_A^2\prob_{U_1}^2\prob_B^2 + 4\prob_A^2\prob_{U_2}^2\prob_B^2.\] +\[ \monomial{1,R}\inparen{\probAllTup} -4\prob_A^2\prob_{U_1}\prob_{U_2}\prob_B^2=\prob_A^2\prob_{U_1}^2\prob_B^2 + 4\prob_A^2\prob_{U_2}^2\prob_B^2.\] %Substituting $\vct{\prob}$ for $\vct{X}$, %\begin{footnotesize} %\begin{align*} @@ -299,9 +298,10 @@ implies: % &= \rpoly_1^{\inparen{ABX}^2}\inparen{\vct{p}} + 4\prob_A^2\prob_{X_1}\prob_{X_2}\prob_B^2. %\end{align*} %\end{footnotesize} +Noting that $\rmonomial{1}\inparen{\vct{X}} = AU_1B+4AU_2B$, If we assume that all probability values are in $[p_0,1]$ for some $p_0>0$, %then given access to $\refpoly{1, }^{\inparen{ABX}^2}\inparen{\vct{\prob}} - 4\prob_A^2\prob_{X_1}\prob_{X_2}\prob_B^2$ -we get that $\refpoly{1, }^{\inparen{ABU}^2}\inparen{\vct{\prob}} - 4\prob_A^2\prob_{U_1}\prob_{U_2}\prob_B^2$ is in the range $\pbox{p_0^3\cdot\rpoly^{\inparen{ABU}^2}_1\inparen{\vct{\prob}}, \rpoly_1^{\inparen{ABU}^2}\inparen{\vct{\prob}}}$. +we get that $\monomial{1,R}\inparen{\vct{\prob}} - 4\prob_A^2\prob_{U_1}\prob_{U_2}\prob_B^2$ is in the range $\pbox{p_0^3\cdot\rmonomial{1}\inparen{\vct{\prob}}, \rmonomial{1}\inparen{\vct{\prob}}}$. %We can simulate sampling from $\refpoly{1, }^2\inparen{\vct{X}}$ by sampling monomials from $\refpoly{1, }^2$ while ignoring any samples $A^2X_1X_2B^2$. Note however, that this is \emph{not a tight approximation}. In~\Cref{sec:algo} we demonstrate that a $(1\pm\epsilon)$ (multiplicative) approximation with competitive performance is achievable. diff --git a/mult_distinct_p.tex b/mult_distinct_p.tex index cbfa10f..2f32dc3 100644 --- a/mult_distinct_p.tex +++ b/mult_distinct_p.tex @@ -2,13 +2,13 @@ %!TEX root=./main.tex \section{Hardness of Exact Computation} \label{sec:hard} -In this section, we will prove the hardness results claimed in Table~\ref{tab:lbs} for a specific (family) of hard instances $(\qhard,\pdb)$ for \Cref{prob:bag-pdb-poly-expected} where $\pdb$ is a $1$-\abbrTIDB. +In this section, we will prove the hardness results claimed in Table~\ref{tab:lbs} for a specific (family) of hard instances $(\qhard^k,\pdb)$ for \Cref{prob:bag-pdb-poly-expected} where $\pdb$ is a $1$-\abbrTIDB. Note that this implies hardness for \abbrCTIDB\xplural $\inparen{\bound\geq1}$ %; \Cref{prob:bag-pdb-poly-expected} cannot be done in $\bigO{\qruntime{\optquery{\query},\tupset,\bound}}$ runtime. The results also apply to as well as \abbrOneBIDB. % and other \abbrPDB\xplural. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %\subsection{Preliminaries}\label{sec:hard:sub:pre} -Our hardness results are based on (exactly) counting the number of (not necessarily induced) subgraphs in $G$ isomorphic to $H$. Let $\numocc{G}{H}$ denote this quantity. We can think of $H$ as being of constant size and $G$ as growing. +Our hardness results are based on (exactly) counting the number of (not necessarily induced) subgraphs in $G$ isomorphic to $H$. Let $\numocc{G}{H}$ denote this quantity. We think of $H$ as being of constant size and $G$ as growing. In particular, we will consider computing the following counts (given $G$ in its adjacency list representation): $\numocc{G}{\tri}$ (the number of triangles), $\numocc{G}{\threedis}$ (the number of $3$-matchings), and the latter's generalization $\numocc{G}{\kmatch}$ (the number of $k$-matchings). We use $\kmatchtime$ to denote the optimal runtime of computing $\numocc{G}{\kmatch}$ exactly. Our results in \Cref{sec:multiple-p} are based on the following known (conditional) hardness results: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -33,7 +33,7 @@ Given positive integer $k$ and undirected graph $G=(\vset,\edgeSet)$, $\kmatcht %We note that the above conjecture is somewhat non-standard. In particular, the best known algorithm to compute $\numocc{G}{\kmatch}$ takes time $\Omega\inparen{|V|^{k/2}}$ %(i.e. if this is the best algorithm then $c_0=\frac 14$) %~\cite{k-match}. -The above result is saying is that (assuming ETH) one can only hope for a slightly super-polynomial improvement over the trivial algorithm to compute $\numocc{G}{\kmatch}$. +The above result is saying is that, assuming Exponential Time Hypothesis (ETH), one can only hope for a slightly super-polynomial improvement over the trivial algorithm to compute $\numocc{G}{\kmatch}$. % Our hardness result in Section~\ref{sec:single-p} is based on the following conjectured hardness result: @@ -73,11 +73,11 @@ For any graph $G=(V,\edgeSet)$ and $\kElem\ge 1$, define SELECT COUNT(*) FROM $\underbrace{Q_1\text{ JOIN }Q_1\text{ JOIN}\cdots\text{JOIN }Q_1}_{k\rm\ times}$ \end{lstlisting} \end{mdframed} -In the above, $\query_1$ is defined in \Cref{sec:intro}, which is the same as $\qhard^1$. +In the above, $\query_1$ is as defined in \Cref{sec:intro}, which is the same as $\qhard^1$. % %\noindent %Consider again the \abbrCTIDB instance $\pdb$ of~\Cref{fig:two-step} and, for our hard instance, let $\bound = 1$. $\pdb$ generalizes to one compatible We next define the instances for $T$ and $R$ that lead to the lineage polynomial in~\Cref{def:qk} as follows. Relation $T$ has $n$ tuples corresponding to each vertex for $i$ in $[n]$, each with probability $\prob$ and $R$ has tuples corresponding to the edges $\edgeSet$ (each with a probability of $1$).\footnote{Technically, $\poly_{G}^\kElem(\vct{X})$ should have variables corresponding to tuples in $R$ as well, but since they always are present with probability $1$, we drop those. Our argument also works when all the tuples in $R$ also are present with probability $\prob$ but to simplify notation we assign probability $1$ to edges.} -In other words, this instance $\tupset$ contains the set of $\numvar$ unary tuples in $T$ (which corresponds to $\vset$) and $\numedge$ binary tuples in $R$ (which corresponds to $\edgeSet$). +In other words, the \dbbaseName $\tupset$ contains the set of $\numvar$ unary tuples in $T$ (which corresponds to $\vset$) and $\numedge$ binary tuples in $R$ (which corresponds to $\edgeSet$). Note that this implies that $\poly_{G}^\kElem$ is indeed a $1$-\abbrTIDB lineage polynomial. Next, we note that the runtime for answering $\qhard^k$ on deterministic database $\tupset$, as defined above, is $O_k\inparen{\numedge}$ (i.e. deterministic query processing is `easy' for this query): @@ -93,7 +93,7 @@ We are now ready to present one of our main hardness result. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Theorem}\label{thm:mult-p-hard-result} -Let $\prob_0,\ldots,\prob_{2k}$ be $2k + 1$ distinct values in $(0, 1]$. Then computing $\rpoly_G^\kElem(\prob_i,\dots,\prob_i)$ (over all $i\in [2k+1]$) for arbitrary $G=(\vset,\edgeSet)$ +Let $\prob_0,\ldots,\prob_{2k}$ be $2k + 1$ distinct values in $(0, 1]$. Then computing $\rpoly_G^\kElem(\prob_i,\dots,\prob_i)$ (for all $i\in [2k+1]$) for arbitrary $G=(\vset,\edgeSet)$ needs time $\bigOmega{\kmatchtime}$, if $\kmatchtime\ge \omega\inparen{\abs{\edgeSet}}$. \end{Theorem} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/prob-def.tex b/prob-def.tex index 9e49c89..031957e 100644 --- a/prob-def.tex +++ b/prob-def.tex @@ -19,7 +19,7 @@ Each gate has the following members: \type, \vari{input}, %\val, \colorlet{figray}{black!65} \colorlet{fillred}{red!45} \colorlet{fillblue}{blue!45} -\colorlet{fillbrown}{brown!45} +\colorlet{fillbrown}{green!45} \begin{wrapfigure}{r}{0.2\textwidth} %\begin{figure}[t!] \centering @@ -101,7 +101,7 @@ $\circuitset{\polyX}$ is the set of all possible circuits $\circuit$ such that $ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\Cref{fig:circuit} depicts a circuit \circuit in $\circuitset{2X^2+3XY-2Y^2}$. Light-text annotations +\Cref{fig:circuit} depicts a circuit \circuit in $\circuitset{2X^2+3XY-2Y^2}$. Light-text annotations and the colors %denote the computation of $\abs{\circuit}\inparen{1, \ldots, 1}$ which we introduce can be ignored until~\Cref{sec:algo}. %One can think of $\circuitset{\polyX}$ as the infinite set of circuits where for each element \circuit, $\polyf\inparen{\circuit} = \polyX$. % diff --git a/pwsem.tex b/pwsem.tex index 8f66335..46427df 100644 --- a/pwsem.tex +++ b/pwsem.tex @@ -13,11 +13,12 @@ Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blo \fi %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -Let $\abs{\poly}$ be the number of operators in $\poly$. Then: +Let $\abs{\poly'}$ be the number of operators in $\poly'$. Then: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{Corollary}\label{cor:expct-sop} -If $\poly$ is a \abbrOneBIDB lineage polynomial already in \abbrSMB, then the expectation of $\poly$, i.e., $\expct\pbox{\poly} = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ can be computed in $\bigO{\abs{\poly}}$ time. +If $\poly'$ is a \abbrOneBIDB lineage polynomial already in \abbrSMB, then the expectation of $\poly$, i.e., $\expct\pbox{\poly'}$ % = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ +can be computed in $\bigO{\abs{\poly'}}$ time. \end{Corollary} % \subsubsection{Possible World Semantics}\label{subsub:possible-world-sem} diff --git a/single_p.tex b/single_p.tex index 9fd048d..eeb3e08 100644 --- a/single_p.tex +++ b/single_p.tex @@ -33,7 +33,7 @@ Since $p$ is fixed, the earlier polynomial interpolation based argument does not %\end{Definition} \begin{Lemma}\label{lem:lin-sys} -Fix $\prob\in (0,1)$. Given $\rpoly_{\graph{\ell}}^3(\prob,\dots,\prob)$ for $\ell\in [2]$, we can compute in $O(m)$ time a vector $\vct{b}\in\mathbb{R}^3$ such that +Fix $\prob\in (0,1)$. Given $\rpoly_{\graph{\ell}}^3(\prob,\dots,\prob)$ for $\ell\in [2]$, we can compute in $O(m)$ time a vector $\vct{b}\in\mathbb{R}^2$ such that \begin{equation} \label{eq:lin-eqs-single-p} \begin{pmatrix}