Added two lemmas to S3.

2021-09-14 08:21:57 -04:00 · 2021-09-14 08:21:57 -04:00 · 74d67ae4b0
parent 65f7b1bddc
commit 74d67ae4b0
1 changed files with 12 additions and 5 deletions
--- a/mult_distinct_p.tex
+++ b/mult_distinct_p.tex
@ -51,7 +51,7 @@ For any graph $G=(V,\edgeSet)$ and $\kElem\ge 1$, define
 SELECT 1 FROM OnTime a, Route r, OnTime b
 WHERE a.city = r.city1 AND b.city = r.city2
 \end{lstlisting}
-as $R_i$ for each $i \in [k]$.  The query then becomes
+as $R_i$ for each $i \in [k]$.  The query $\query^k$ then becomes
 \begin{lstlisting}
 SELECT 1 FROM $R_1$ JOIN $R_2$ JOIN$\cdots$JOIN $R_k$
 \end{lstlisting}          
@ -66,14 +66,21 @@ SELECT 1 FROM $R_1$ JOIN $R_2$ JOIN$\cdots$JOIN $R_k$
 %\[\poly^k_G\dlImp OnTime(C_1),Route(C_1, C_1'),OnTime(C_1'),\dots,OnTime(C_\kElem),Route(C_\kElem,C_\kElem'),OnTime(C_\kElem')\]
 %\end{minipage}
 %}
-where adapting the PDB instance in \Cref{fig:two-step}, relation $OnTime$ has $4$ tuples corresponding to each vertex for $i$ in $[4]$, each with probability $\prob_i$ and $Route$ has tuples corresponding to the edges $\edgeSet$ (each with probability of $1$).\footnote{Technically, $\poly_{G}^\kElem(\vct{X})$ should have variables corresponding to tuples in $Route$ as well, but since they always are present with probability $1$, we drop those. Our argument also works when all the tuples in $Route$ also are present with probability $\prob$ but to simplify notation we assign probability $1$ to edges.}
+\noindent where adapting the PDB instance in \Cref{fig:two-step}, relation $OnTime$ has $4$ tuples corresponding to each vertex for $i$ in $[4]$, each with probability $\prob_i$ and $Route$ has tuples corresponding to the edges $\edgeSet$ (each with probability of $1$).\footnote{Technically, $\poly_{G}^\kElem(\vct{X})$ should have variables corresponding to tuples in $Route$ as well, but since they always are present with probability $1$, we drop those. Our argument also works when all the tuples in $Route$ also are present with probability $\prob$ but to simplify notation we assign probability $1$ to edges.}
 Note that this implies that our hard lineage polynomial can be represented as an expression tree produced by a  project-join query with same probability value for each input tuple $\prob_i$, and hence is indeed a lineage polynomial for a \abbrTIDB \abbrPDB.

 \begin{Lemma}\label{lem:pdb-for-def-qk}
-The relations encoding the edges for the hard query of \Cref{def:qk} can be computed in $\bigO{\numedge}$ time.
+Assuming that each $v \in \vset$ has degree $\geq 1$,\footnote{We argue that this is a reasonable assumption, since any vertex with degree $= 0$ can be dropped without affecting the result of our hard query.} the \abbrPDB relations encoding the edges for the hard query of \Cref{def:qk} can be computed in $\bigO{\numedge}$ time.
 \end{Lemma}
-\begin{proof}
-Only two relations need be constructed, one for the vertexes and one for the edges.  By a simple linear scan, each can be constructed in time $\bigO{\numedge + \numvar}$.  If we assume a constant factor of edges in the number of vertexes, then we have $\bigO{\numedge}$ time..
+\begin{proof}[Proof of \Cref{lem:pdb-for-def-qk}]
+Only two relations need be constructed, one for the set $\vset$ and one for the set $\edgeSet$.  By a simple linear scan, each can be constructed in time $\bigO{\numedge + \numvar}$.  Given that the degree of each $v \in \vset$ is at least $1$, we have that $\abs{\edgeSet}$ is at least within a constant factor of $\abs{\vset}$, and this yields the claimed runtime.
+\end{proof}
+
+\begin{Lemma}\label{lem:tdet-om}
+For $\query$ defined above, the runtime $\qruntime{\query^k, \dbbase}$ is $O_k(\numedge)$.
+\end{Lemma}
+\begin{proof}[Proof of \Cref{lem:tdet-om}]
+Since by definition, $\dbbase = \cup_{\db \in \idb}\db$, it follows that $\dbbase$ consists of the relations that contain all possible $v \in \vset$ and $e \in \edgeSet$.  Because the result for $\query^1$ cannot be any larger than the relation encoding $\edgeSet$ (i.e., $\abs{\edgeSet}$), it follows that (using an efficient query evaluation strategy such as indexing) the runtime of $\qruntime{\query^1, \dbbase}$ is indeed $O(\numedge)$.  When $k > 1$, since by \Cref{def:qk} $\query^k$ is simply a cross product of the original query $\query^1$, we arrive at the desired runtime of $O_k(\numedge)$.
 \end{proof}

 \subsection{Multiple Distinct $\prob$ Values}