From ff97b625692659178d9215a7d5c44ee05ef87796 Mon Sep 17 00:00:00 2001
From: Atri Rudra <atri@buffalo.edu>
Date: Wed, 8 Jun 2022 03:26:24 +0000
Subject: [PATCH] Update on Overleaf.

---
 approx_alg.tex             |  9 +++++----
 binarybidb.tex             | 12 +++++++-----
 circuits-model-runtime.tex |  4 ++--
 introduction.tex           | 14 +++++++-------
 mult_distinct_p.tex        | 12 ++++++------
 prob-def.tex               |  4 ++--
 pwsem.tex                  |  5 +++--
 single_p.tex               |  2 +-
 8 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/approx_alg.tex b/approx_alg.tex
index 0f8f187..8c0a580 100644
--- a/approx_alg.tex
+++ b/approx_alg.tex
@@ -9,11 +9,12 @@ The following approximation algorithm applies to bag query semantics over both
 Our experimental results (see~\Cref{app:subsec:experiment}), which use queries from the PDBench benchmark~\cite{pdbench} support the notion that our bounds hold for general \abbrBIDB in practice.
 %
 %
-Corresponding proofs and pseudocode for all formal statements and algorithms
-  can be found in \Cref{sec:proofs-approx-alg}.
+Proofs and pseudocode for all formal statements and algorithms
+  are in \Cref{sec:proofs-approx-alg}.
 
 \subsection{Preliminaries and some more notation}
 
+For notational convenience, in this section we will assume that \dbbaseName $\tupset'=[n]$.
 We now introduce definitions and notation related to circuits and polynomials that we will need to state our upper bound results. First we introduce the expansion $\expansion{\circuit}$ of circuit $\circuit$ which 
 is used in our auxiliary algorithm \sampmon for sampling monomials when computing the approximation.  
 
@@ -129,7 +130,7 @@ $1$-\abbrTIDB (where $\gamma=0$ in the equivalent $1$-\abbrBIDB of~\Cref{prop:ct
 as well as for all three queries of the PDBench \abbrBIDB benchmark (\Cref{app:subsec:experiment}). 
 
 %We prove \Cref{cor:approx-algo-punchline-ctidb} from \Cref{eq:approx-algo-runtime} via the following sequence of arguments.
-Next,  by \Cref{prop:circuit-depth} and \Cref{lem:circ-model-runtime} for any $\raPlus$ query $\query$, there exists a circuit $\circuit^*$ for $\apolyqdt$ such that $\depth(\circuit^*)\le O_{|Q|}(\log{n})$ and $\size(\circuit^*)\le O_k\inparen{\qruntime{\query, \tupset, \bound}}$. Then, we note that \Cref{prop:ctidb-reduct} gives us an equivalent $\circuit$ from $\circuit^*$ is essentially the same size and has $\gamma(\circuit)\le 1-c^{-\Omega(k)}$ (\Cref{lem:ctidb-gamma}). Finally, we argue (using the fact $\circuit^*$ has low depth) that $\abs{\circuit^*}(1,\dots,1)\le \size(\circuit^*)^{O_k(1)}$ (\Cref{lem:val-ub}).
+Next,  by \Cref{prop:circuit-depth} and \Cref{lem:circ-model-runtime} for any $\raPlus$ query $\query$, there exists a circuit $\circuit^*$ for $\apolyqdt$ such that $\depth(\circuit^*)\le O_{|Q|}(\log{n})$ and $\size(\circuit^*)\le O_k\inparen{\qruntime{\query, \tupset, \bound}}$. Then, we note that \Cref{prop:ctidb-reduct} gives us an equivalent $\circuit$ from $\circuit^*$ with essentially the same size/depth and has $\gamma(\circuit)\le 1-c^{-\Omega(k)}$ (\Cref{lem:ctidb-gamma}). Finally, we argue (using the fact $\circuit$ has low depth) that $\abs{\circuit}(1,\dots,1)\le \size(\circuit)^{O_k(1)}$ (\Cref{lem:val-ub}).
 %Next, we note that the above result %along with \Cref{lem:ctidb-gamma}
 The above sequence of arguments results in the following result (which answers \Cref{prob:big-o-joint-steps} in the affirmative):
 \begin{Corollary}
@@ -137,7 +138,7 @@ The above sequence of arguments results in the following result (which answers \
 Let $\query$ be an $\raPlus$ query and $\pdb$ be a \abbrCTIDB with $p_0>0$, where $p_0$ as in \Cref{cor:approx-algo-const-p}, is an absolute constant. Let $\poly(\vct{X})=\apolyqdt$ for any result tuple $\tup$ with $\deg(\poly)=k$. Then one can compute an approximation satisfying \Cref{eq:approx-algo-bound-main} in time $O_{k,|Q|,\error',\conf,\bound}\inparen{\qruntime{\optquery{\query}, \tupset, \bound}}$ (given $\query,\tupset$ and $\prob_{\tup, j}$ for each $\tup\in\tupset,~j\in\pbox{\bound}$ that defines $\bpd$).
 \end{Corollary}
 
-If we want to approximate the expected multiplicities of all $Z=O(n^k)$ result tuples $\tup$ simultaneously, we just need to run the above result with $\conf$ replaced by $\frac \conf Z$. Note this increases the runtime by only a logarithmic factor.
+If we want to approximate the expected multiplicities of all $Z=O(n^k)$ result tuples $\tup$ simultaneously, we just need to run the above result with $\conf$ replaced by $\frac \conf Z$, which increases the runtime by a  factor of $O_k(\log{n})$.
 
 
 
diff --git a/binarybidb.tex b/binarybidb.tex
index ad70ded..c73b8bf 100644
--- a/binarybidb.tex
+++ b/binarybidb.tex
@@ -8,7 +8,7 @@ Given an index set $S$ and variables $X_\tup$ for $\tup\in S$, a (general) polyn
 is formally defined as: 
 \begin{align}
   \label{eq:sop-form}
-\genpoly\inparen{\inparen{X_\tup}_{\tup\in S}}=\sum_{\vct{d}=\inparen{d_\tup}_{\tup\in S}\in\{0,\ldots,\hideg\}^{S}} c_{\vct{d}}\cdot \prod_{\tup\in S}X_\tup^{d_\tup}&&\text{ where } c_{\vct{d}}\in \semN.
+\genpoly\inparen{\inparen{X_\tup}_{\tup\in S}}=\sum_{\vct{d}=\inparen{d_\tup}_{\tup\in S}\in[0,\hideg]^{S}} c_{\vct{d}}\cdot \prod_{\tup\in S}X_\tup^{d_\tup}&&\text{ where } c_{\vct{d}}\in \semN.
 \end{align}
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -26,23 +26,25 @@ We call a polynomial $\poly\inparen{\vct{X}}$ a \emph{\abbrCTIDB-lineage polynom
 \subsection{\abbrOneBIDB}\label{subsec:one-bidb}
 \label{subsec:tidbs-and-bidbs}
 
-\noindent A block independent database \abbrBIDB $\pdb'$ models a set of worlds each of which consists of a subset of the possible tuples $\tupset'$, where $\tupset'$ is partitioned into $\numblock$ blocks $\block_i$ and the events $\tup\in\block_i$ and $\tup\in\block_j$ are independent  for $i\ne j$.  $\pdb'$ further constrains that all $\tup\in\block_i$ for all $i\in\pbox{\numblock}$ of $\tupset'$ be disjoint events.  We refer to any monomial that includes $X_\tup X_{\tup'}$ for $\tup\neq\tup'\in\block_i$ as a \emph{cancellation}.  We define next a specific construction of \abbrBIDB that is useful for our work.
+\noindent A block independent database \abbrBIDB $\pdb'$ models a set of worlds each of which consists of a subset of the \dbbaseName $\tupset'$, where $\tupset'$ is partitioned into $\numblock$ blocks $\block_i$ and the events $\tup\in\block_i$ and $\tup\in\block_j$ are independent  for $i\ne j$.  $\pdb'$ further constrains that all $\tup\in\block_i$ for all $i\in\pbox{\numblock}$ of $\tupset'$ be disjoint events.  
+%We refer to any monomial that includes $X_\tup X_{\tup'}$ for $\tup\neq\tup'\in\block_i$ as a \emph{cancellation}.
+We define next a specific construction of \abbrBIDB that is useful for our work.
 
 \begin{Definition}[\abbrOneBIDB]\label{def:one-bidb}
-Define a \emph{\abbrOneBIDB} to be the pair $\pdb' = \inparen{\bigtimes_{\tup\in\tupset'}\inset{0, \bound_\tup}, \bpd'},$  where $\tupset'$ is the set of possible tuples such that each $\tup \in \tupset'$ has a multiplicity domain of $\inset{0, \bound_\tup}$, with $\bound_\tup\in\mathbb{N}$.  $\tupset'$ is partitioned into $\numblock$ independent blocks $\block_i,$ for $i\in\pbox{\numblock}$, of disjoint tuples.  $\bpd'$ is characterized by the vector $\inparen{\prob_\tup}_{\tup\in\tupset'}$ where for every block $\block_i$, $\sum_{\tup \in \block_i}\prob_\tup \leq 1$.  Given $W\in\onebidbworlds{\tupset'}$ and for $i\in\pbox{\numblock}$, let $\prob_\tup(W) = \begin{cases}
+Define a \emph{\abbrOneBIDB} to be the pair $\pdb' = \inparen{\bigtimes_{\tup\in\tupset'}\inset{0, \bound_\tup}, \bpd'},$  where $\tupset'$ is the \dbbaseName such that each $\tup \in \tupset'$ has a multiplicity in $\inset{0, \bound_\tup}$, with $\bound_\tup\in\mathbb{N}$.  $\tupset'$ is partitioned into $\numblock$ independent blocks $\block_i,$ for $i\in\pbox{\numblock}$, of disjoint tuples.  $\bpd'$ is characterized by the vector $\inparen{\prob_\tup}_{\tup\in\tupset'}$ where for every block $\block_i$, $\sum_{\tup \in \block_i}\prob_\tup \leq 1$.  For $W\in\onebidbworlds{\tupset'}$ and  $i\in\pbox{\numblock}$, let $\prob_i(W) = \begin{cases}
 	1 - \sum_{\tup\in\block_i}\prob_\tup	&	\text{if }W_\tup = 0\text{ for all }\tup\in\block_i\\
 	0									&	\text{if there exists } \tup \neq \tup'\in\block_i; W_\tup, W_{\tup'}\neq 0\\
 	\prob_\tup							&	W_\tup \ne 0 \text{ for one unique } t\in B_i.\\
 	\end{cases}$
 	
-\noindent$\bpd'$ is the probability distribution across all worlds such that, given $W\in\bigtimes_{\tup \in \tupset'}\inset{0,\bound_\tup}$, $\probOf\pbox{\worldvec = W} = \prod_{\tup\in\tupset'}\prob_{\tup}(W)$.
+\noindent$\bpd'$ is the probability distribution across all worlds such that, given $W\in\bigtimes_{\tup \in \tupset'}\inset{0,\bound_\tup}$, $\probOf\pbox{\worldvec = W} = \prod_{i\in [m]}\prob_{i}(W)$.
 %\footnote{We slightly abuse notation here, denoting a world vector as $W$ rather than $\worldvec$ to distinguish between the random variable and the world instance.  When there is no ambiguity, we will denote a world vector as $\worldvec$.}
 \end{Definition}
 
 Lineage polynomials for arbitrary \dbbaseName $\gentupset'$ are constructed in a manner analogous to $1$-\abbrTIDB\xplural (see \Cref{fig:nxDBSemantics}), differing only in the base case.  
 In a $1$-\abbrTIDB, each tuple contributes a multiplicity of 0 or 1, and $\polyqdt{\rel}{\gentupset}{\tup} = X_\tup$. %\textcolor{red}{CHANGE}
 In a \abbrOneBIDB, each tuple $\tup\in\tupset'$ contributes its corresponding multiplicity: %\textcolor{red}{CHANGE}
-$\polyqdt{\rel}{\gentupset}{\tup} = c_\tup\cdot X_\tup$.  These semantics are fully detailed in \Cref{fig:lin-poly-bidb}.
+$\polyqdt{\rel}{\gentupset}{\tup} = c_\tup\cdot X_\tup$.  See \Cref{fig:lin-poly-bidb} for details.
 
 \abbrOneBIDB are powerful enough to encode \abbrCTIDB:
 \begin{Proposition}[\abbrCTIDB reduction]\label{prop:ctidb-reduct}
diff --git a/circuits-model-runtime.tex b/circuits-model-runtime.tex
index 156e007..394d93c 100644
--- a/circuits-model-runtime.tex
+++ b/circuits-model-runtime.tex
@@ -24,7 +24,7 @@
 % In practice there is often a limited number of alternatives for each block (e.g., which of five conflicting data sources to trust). Note that all \tis trivially fulfill this condition (i.e., $c = 1$).}
 %That is for \bis that fulfill this restriction approximating the expectation of results of SPJU queries is only has a constant factor overhead over deterministic query processing (using one of the algorithms for which we prove the claim).
 % with the same complexity as it would take to evaluate the query on a deterministic \emph{bag} database of the same size as the input PDB.
-In~\Cref{sec:intro}, we introduced the structure $T_{det}\inparen{\cdot}$ to analyze the runtime complexity of~\Cref{prob:expect-mult}.
+In~\Cref{sec:intro}, we introduced the function $T_{det}\inparen{\cdot}$ to analyze the runtime complexity of~\Cref{prob:expect-mult}.
 To decouple our results from any specific join algorithm, we first lower bound the cost of a join.
 
 \begin{Definition}[Join Cost]
@@ -69,7 +69,7 @@ We assume that full table scans are used for every base relation access. We can
 %Observe that 
 % () .\footnote{This claim can be verified by e.g. simply looking at the {\em Generic-Join} algorithm in~\cite{skew} and {\em factorize} algorithm in~\cite{factorized-db}.} It can be verified that the above cost model on the corresponding $\raPlus$ join queries correctly captures the runtime of current best known .
 
-\Cref{lem:circ-model-runtime} and \Cref{lem:tlc-is-the-same-as-det} show that for any $\raPlus$ query $\query$ and $\tupset$, there exists a circuit $\circuit^*$ such that $\timeOf{\abbrStepOne}(Q,\tupset,\circuit^*)$ and $|\circuit^*|$ are both $O(\qruntimenoopt{\optquery{\query}, \tupset,\bound})$, as we assumed when moving from \Cref{prob:big-o-joint-steps} to \Cref{prob:intro-stmt}.  Lastly, we can handle FAQs and factorized databases by allowing for optimization. %, i.e. $\qruntimenoopt{\optquery{\query}, \gentupset, \bound}$.
+\Cref{lem:circ-model-runtime} and \Cref{lem:tlc-is-the-same-as-det} show that for any $\raPlus$ query $\query$ and $\tupset$, there exists a circuit $\circuit^*$ such that $\timeOf{\abbrStepOne}(Q,\tupset,\circuit^*)$ and $|\circuit^*|$ are both $O(\qruntimenoopt{\optquery{\query}, \tupset,\bound})$, as we assumed when moving from \Cref{prob:big-o-joint-steps} to \Cref{prob:intro-stmt}.  Lastly, we can handle FAQs/AJAR queries and factorized databases by allowing for optimization. %, i.e. $\qruntimenoopt{\optquery{\query}, \gentupset, \bound}$.
 %
 %We now make a simple observation on the above cost model:
 %\begin{proposition}
diff --git a/introduction.tex b/introduction.tex
index 8f4bc69..17f7ed7 100644
--- a/introduction.tex
+++ b/introduction.tex
@@ -256,7 +256,7 @@ For example, if we insist that $\circuit$ represent the lineage polynomial in \a
 Hence, just $\timeOf{\abbrStepOne}(\query,\tupset,\circuit)$ is too large.
 However, systems can directly emit compact, factorized representations of $\poly(\vct{X})$ (e.g., as a consequence of the standard projection push-down optimization~\cite{DBLP:books/daglib/0020812}).
 Accordingly, this work uses (arithmetic) circuits\footnote{
-  An arithmetic circuit is a DAG with variable/numeric source gates and multiplication/addition internal/sink gates.
+  An arithmetic circuit is a DAG with variable/numeric source gates and multiplication/addition internal gates.
 }
 as the representation system of $\poly(\vct{X})$, and we show in \Cref{sec:circuit-depth} an $\bigO{\qruntime{\optquery{\query}, \tupset, \bound}}$ algorithm for constructing the lineage polynomial for all result tuples of an $\raPlus$ query $\query$ (or more precisely, a circuit $\circuit$ with $\numvar$ sinks, one per output tuple).% representing the tuple's lineage).
 %
@@ -270,24 +270,23 @@ Given one circuit $\circuit$ that encodes $\Phi\inparen{\vct{X}}$ for all result
 \end{Problem}
 
 We will formalize the notions of circuits and hence, \Cref{prob:intro-stmt} in \Cref{sec:expression-trees}. For an upper bound on approximating the expected count, it is easy to check that if all the probabilties are constant then (with an additive adjustment) $\poly\left(\prob_1,\dots, \prob_n\right)$ is a constant factor approximation of $\rpoly$ (recall \Cref{def:reduced-poly}).
-This is illustrated in the following example using $\query_1^2$ from earlier.  To aid in presentation we again limit our focus to $\refpoly{1, }^{\inparen{ABU}^2}$, assume $\bound = 2$ for variable $U$ and $\bound = 1$ for all other variables.  Let $\prob_A$ denote $\probOf\pbox{A = 1}$.
+This is illustrated in the following example using $\query_1^2$ from earlier.  To aid in presentation we again limit our focus to $\monomial{1,R}$, assume $\bound = 2$ for variable $U$ and $\bound = 1$ for all other variables.  Let $\prob_A$ denote $\probOf\pbox{A = 1}$.
 %In computing $\rpoly$, we have some cancellations to deal with:
 Then we have:
 %
 %\begin{footnotesize}
 %\begin{equation*}
-$\refpoly{1, }^{\inparen{ABU}^2}\inparen{\vct{X}} = A^2\inparen{U_1^2 + 4U_1U_2 + 4U_2^2}B^2 =A^2U_1^2B^2 + 4A^2U_1U_2B^2+4A^2U_2^2B^2$
+$\monomial{1,R}\inparen{\vct{X}} = A^2\inparen{U_1^2 + 4U_1U_2 + 4U_2^2}B^2 =A^2U_1^2B^2 + 4A^2U_1U_2B^2+4A^2U_2^2B^2$, which in turn implies:
 %&\qquad+ 2AX_2B^2YE + 2AX_1B^2ZC + 2AX_2B^2ZC + 2B^2YEZC\\
 %\end{equation*}
 %\end{footnotesize}
 %Recall that
 %\begin{footnotesize}
 %\begin{equation*}
-$\rpoly_1^{\inparen{ABU}^2}\inparen{\vct{X}} = AU_1B+4AU_2B$
+
 %\end{equation*}
 %\end{footnotesize}
-implies:
-\[	 \refpoly{1, }^{\inparen{ABU}^2}\inparen{\probAllTup} -4\prob_A^2\prob_{U_1}\prob_{U_2}\prob_B^2=\prob_A^2\prob_{U_1}^2\prob_B^2 +   4\prob_A^2\prob_{U_2}^2\prob_B^2.\]
+\[	 \monomial{1,R}\inparen{\probAllTup} -4\prob_A^2\prob_{U_1}\prob_{U_2}\prob_B^2=\prob_A^2\prob_{U_1}^2\prob_B^2 +   4\prob_A^2\prob_{U_2}^2\prob_B^2.\]
 %Substituting $\vct{\prob}$ for $\vct{X}$,
 %\begin{footnotesize}
 %\begin{align*}
@@ -299,9 +298,10 @@ implies:
 %	&= \rpoly_1^{\inparen{ABX}^2}\inparen{\vct{p}} + 4\prob_A^2\prob_{X_1}\prob_{X_2}\prob_B^2.
 %\end{align*}
 %\end{footnotesize}
+Noting that $\rmonomial{1}\inparen{\vct{X}} = AU_1B+4AU_2B$,
 If we assume that all probability values are in $[p_0,1]$ for some $p_0>0$, 
 %then given access to $\refpoly{1, }^{\inparen{ABX}^2}\inparen{\vct{\prob}} - 4\prob_A^2\prob_{X_1}\prob_{X_2}\prob_B^2$
-we get that $\refpoly{1, }^{\inparen{ABU}^2}\inparen{\vct{\prob}} - 4\prob_A^2\prob_{U_1}\prob_{U_2}\prob_B^2$ is in the range $\pbox{p_0^3\cdot\rpoly^{\inparen{ABU}^2}_1\inparen{\vct{\prob}}, \rpoly_1^{\inparen{ABU}^2}\inparen{\vct{\prob}}}$.
+we get that $\monomial{1,R}\inparen{\vct{\prob}} - 4\prob_A^2\prob_{U_1}\prob_{U_2}\prob_B^2$ is in the range $\pbox{p_0^3\cdot\rmonomial{1}\inparen{\vct{\prob}}, \rmonomial{1}\inparen{\vct{\prob}}}$.
 %We can simulate sampling from $\refpoly{1, }^2\inparen{\vct{X}}$ by sampling monomials from $\refpoly{1, }^2$ while ignoring any samples $A^2X_1X_2B^2$.
 Note however, that this is \emph{not a tight approximation}.
 In~\Cref{sec:algo} we demonstrate that a $(1\pm\epsilon)$ (multiplicative) approximation with competitive performance is achievable.
diff --git a/mult_distinct_p.tex b/mult_distinct_p.tex
index cbfa10f..2f32dc3 100644
--- a/mult_distinct_p.tex
+++ b/mult_distinct_p.tex
@@ -2,13 +2,13 @@
 %!TEX root=./main.tex
 \section{Hardness of Exact Computation}
 \label{sec:hard}
-In this section, we will prove the hardness results claimed in Table~\ref{tab:lbs} for a specific (family) of hard instances $(\qhard,\pdb)$ for \Cref{prob:bag-pdb-poly-expected} where $\pdb$ is a $1$-\abbrTIDB.
+In this section, we will prove the hardness results claimed in Table~\ref{tab:lbs} for a specific (family) of hard instances $(\qhard^k,\pdb)$ for \Cref{prob:bag-pdb-poly-expected} where $\pdb$ is a $1$-\abbrTIDB.
 Note that this implies hardness for \abbrCTIDB\xplural $\inparen{\bound\geq1}$
 %; \Cref{prob:bag-pdb-poly-expected} cannot be done in $\bigO{\qruntime{\optquery{\query},\tupset,\bound}}$ runtime.  The results also apply to 
 as well as \abbrOneBIDB. % and other \abbrPDB\xplural.
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %\subsection{Preliminaries}\label{sec:hard:sub:pre}
-Our hardness results are based on (exactly) counting the number of (not necessarily induced) subgraphs in $G$ isomorphic to $H$. Let $\numocc{G}{H}$ denote this quantity.  We can think of $H$ as being of constant size and $G$ as growing.  
+Our hardness results are based on (exactly) counting the number of (not necessarily induced) subgraphs in $G$ isomorphic to $H$. Let $\numocc{G}{H}$ denote this quantity.  We think of $H$ as being of constant size and $G$ as growing.  
 In particular, we will consider computing the following counts (given $G$ in its adjacency list representation): $\numocc{G}{\tri}$ (the number of triangles), $\numocc{G}{\threedis}$ (the number of $3$-matchings), and the latter's generalization $\numocc{G}{\kmatch}$ (the number of $k$-matchings).  We use $\kmatchtime$ to denote the optimal runtime of computing $\numocc{G}{\kmatch}$ exactly.  Our results in \Cref{sec:multiple-p} are based on the following known (conditional) hardness results:
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -33,7 +33,7 @@ Given positive integer $k$ and undirected graph $G=(\vset,\edgeSet)$,  $\kmatcht
 %We note that the above conjecture is somewhat non-standard. In particular, the best known algorithm to compute $\numocc{G}{\kmatch}$ takes time $\Omega\inparen{|V|^{k/2}}$
 %(i.e. if this is the best algorithm then $c_0=\frac 14$)
 %~\cite{k-match}. 
-The above result is saying is that (assuming ETH) one can only hope for a slightly super-polynomial improvement over the trivial algorithm to compute $\numocc{G}{\kmatch}$.
+The above result is saying is that, assuming Exponential Time Hypothesis (ETH), one can only hope for a slightly super-polynomial improvement over the trivial algorithm to compute $\numocc{G}{\kmatch}$.
 %
 
 Our hardness result in Section~\ref{sec:single-p} is based on the following conjectured hardness result:
@@ -73,11 +73,11 @@ For any graph $G=(V,\edgeSet)$ and $\kElem\ge 1$, define
 SELECT COUNT(*) FROM $\underbrace{Q_1\text{ JOIN }Q_1\text{ JOIN}\cdots\text{JOIN }Q_1}_{k\rm\ times}$
 \end{lstlisting}          
 \end{mdframed}
-In the above, $\query_1$ is defined in  \Cref{sec:intro}, which is the same as $\qhard^1$. 
+In the above, $\query_1$ is as defined in  \Cref{sec:intro}, which is the same as $\qhard^1$. 
 %
 %\noindent %Consider again the \abbrCTIDB instance $\pdb$ of~\Cref{fig:two-step} and, for our hard instance, let $\bound = 1$.  $\pdb$ generalizes to one compatible 
 We next define the instances for $T$ and $R$ that lead to the lineage polynomial in~\Cref{def:qk} as follows. Relation $T$ has $n$ tuples corresponding to each vertex for $i$ in $[n]$, each with probability $\prob$ and $R$ has tuples corresponding to the edges $\edgeSet$ (each with a probability of $1$).\footnote{Technically, $\poly_{G}^\kElem(\vct{X})$ should have variables corresponding to tuples in $R$ as well, but since they always are present with probability $1$, we drop those. Our argument also works when all the tuples in $R$ also are present with probability $\prob$ but to simplify notation we assign probability $1$ to edges.}
-In other words, this instance $\tupset$ contains the set of $\numvar$ unary tuples in $T$ (which corresponds to $\vset$) and $\numedge$ binary tuples in $R$ (which corresponds to $\edgeSet$).
+In other words, the \dbbaseName $\tupset$ contains the set of $\numvar$ unary tuples in $T$ (which corresponds to $\vset$) and $\numedge$ binary tuples in $R$ (which corresponds to $\edgeSet$).
 Note that this implies that $\poly_{G}^\kElem$ is indeed a $1$-\abbrTIDB lineage polynomial. 
 
 Next, we note that the runtime for answering $\qhard^k$ on deterministic database $\tupset$, as defined above, is $O_k\inparen{\numedge}$ (i.e. deterministic query processing is `easy' for this query):
@@ -93,7 +93,7 @@ We are now ready to present one of our main hardness result.
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 \begin{Theorem}\label{thm:mult-p-hard-result}
-Let $\prob_0,\ldots,\prob_{2k}$ be $2k + 1$ distinct values in $(0, 1]$.  Then computing $\rpoly_G^\kElem(\prob_i,\dots,\prob_i)$ (over all $i\in [2k+1]$) for arbitrary $G=(\vset,\edgeSet)$
+Let $\prob_0,\ldots,\prob_{2k}$ be $2k + 1$ distinct values in $(0, 1]$.  Then computing $\rpoly_G^\kElem(\prob_i,\dots,\prob_i)$ (for all $i\in [2k+1]$) for arbitrary $G=(\vset,\edgeSet)$
 needs time $\bigOmega{\kmatchtime}$, if $\kmatchtime\ge \omega\inparen{\abs{\edgeSet}}$.
 \end{Theorem}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
diff --git a/prob-def.tex b/prob-def.tex
index 9e49c89..031957e 100644
--- a/prob-def.tex
+++ b/prob-def.tex
@@ -19,7 +19,7 @@ Each gate has the following members: \type, \vari{input}, %\val,
 \colorlet{figray}{black!65}
 \colorlet{fillred}{red!45}
 \colorlet{fillblue}{blue!45}
-\colorlet{fillbrown}{brown!45}
+\colorlet{fillbrown}{green!45}
 \begin{wrapfigure}{r}{0.2\textwidth}
 	%\begin{figure}[t!]
 		\centering
@@ -101,7 +101,7 @@ $\circuitset{\polyX}$ is the set of all possible circuits $\circuit$ such that $
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-\Cref{fig:circuit} depicts a circuit \circuit in $\circuitset{2X^2+3XY-2Y^2}$.   Light-text annotations 
+\Cref{fig:circuit} depicts a circuit \circuit in $\circuitset{2X^2+3XY-2Y^2}$.   Light-text annotations and the colors
 %denote the computation of $\abs{\circuit}\inparen{1, \ldots, 1}$ which we introduce 
 can be ignored until~\Cref{sec:algo}. %One can think of $\circuitset{\polyX}$ as the infinite set of circuits where for each element \circuit, $\polyf\inparen{\circuit} = \polyX$.
 %
diff --git a/pwsem.tex b/pwsem.tex
index 8f66335..46427df 100644
--- a/pwsem.tex
+++ b/pwsem.tex
@@ -13,11 +13,12 @@ Consider $\poly(X, Y) = (X + Y)(X + Y)$ where $X$ and $Y$ are from different blo
 \fi
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-Let $\abs{\poly}$ be the number of operators in $\poly$. Then:
+Let $\abs{\poly'}$ be the number of operators in $\poly'$. Then:
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{Corollary}\label{cor:expct-sop}
-If $\poly$ is a \abbrOneBIDB lineage polynomial already in \abbrSMB, then the expectation of $\poly$, i.e., $\expct\pbox{\poly} = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ can be computed in $\bigO{\abs{\poly}}$ time.
+If $\poly'$ is a \abbrOneBIDB lineage polynomial already in \abbrSMB, then the expectation of $\poly$, i.e., $\expct\pbox{\poly'}$ % = \rpoly\left(\prob_1,\ldots, \prob_\numvar\right)$ 
+can be computed in $\bigO{\abs{\poly'}}$ time.
 \end{Corollary}
 
 % \subsubsection{Possible World Semantics}\label{subsub:possible-world-sem}
diff --git a/single_p.tex b/single_p.tex
index 9fd048d..eeb3e08 100644
--- a/single_p.tex
+++ b/single_p.tex
@@ -33,7 +33,7 @@ Since $p$ is fixed, the earlier polynomial interpolation based argument does not
 %\end{Definition}
 
 \begin{Lemma}\label{lem:lin-sys}
-Fix $\prob\in (0,1)$. Given $\rpoly_{\graph{\ell}}^3(\prob,\dots,\prob)$ for $\ell\in [2]$, we can compute in $O(m)$ time a vector $\vct{b}\in\mathbb{R}^3$ such that
+Fix $\prob\in (0,1)$. Given $\rpoly_{\graph{\ell}}^3(\prob,\dots,\prob)$ for $\ell\in [2]$, we can compute in $O(m)$ time a vector $\vct{b}\in\mathbb{R}^2$ such that
 \begin{equation}
 \label{eq:lin-eqs-single-p}
 \begin{pmatrix}