paper-BagRelationalPDBsAreHard/poly-form.tex

%root: main.tex
%!TEX root = ./main.tex
%\onecolumn
\section{Polynomial Formulation}

We can think of $\poly(\vct{w})$ as a function whose input are the variables $X_1,\ldots, X_M$ as in $\poly(X_1,\ldots, X_M)$.  Denote the sum of products expansion of  $\poly(X_1,\ldots, X_\numTup)$ as  $\poly(X_1,\ldots, X_\numTup)_{\Sigma}$

\begin{Definition}\label{def:qtilde}
Define $\rpoly(X_1,\ldots, X_\numTup)$ as the reduced version of $\poly(X_1,\ldots, X_\numTup)_{\Sigma}$, of the form
$\rpoly(X_1,\ldots, X_\numTup) = $

\[\poly(X_1,\ldots, X_\numTup)_{\Sigma} \mod \wbit_1^2-\wbit_1\cdots\mod \wbit_\numTup^2 - \wbit_\numTup.\]  
\end{Definition}


Intuitively, $\rpoly(\textbf{X})$ is the expanded sum of products form of $\poly(\textbf{X})$ such that if any $X_j$ term  has an exponent $e > 1$, it is reduced to $1$, i.e. $X_j^e\mapsto X_j$ for any $e > 1$.  
Alternatively, one can gain intuition for $\rpoly$ by thinking of $\rpoly$ as the resulting sum of product expansion of $\poly$ when $\poly$ is in a factorized form such that none of its terms have an exponent $e > 1$, with an idempotent product operator.  

The usefulness of this reduction will be seen shortly.

\begin{Lemma}\label{lem:pre-poly-rpoly}
When $\poly(X_1,\ldots, X_\numTup) = \sum\limits_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}} \cdot \prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numTup}X_i^{d_i}$, we have then that $\rpoly(X_1,\ldots, X_\numTup) = \sum\limits_{\vct{d} \in \{0,\ldots, D\}^\numTup} q_{\vct{d}}\cdot\prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numTup}X_i$.
\end{Lemma}
\begin{proof}
Follows by the construction of $\rpoly$ in \cref{def:qtilde}.
\end{proof}

\qed

Note the following fact:
\begin{Proposition}
\[\text{For all } (\wbit_1,\ldots, \wbit_\numTup) \in \{0, 1\}^\numTup, \poly(\wbit_1,\ldots, \wbit_\numTup) = \rpoly(\wbit_1,\ldots, \wbit_\numTup).\]
\end{Proposition}

\begin{proof}
Note that any $\poly$ in factorized form is equivalent to its sum of product expansion.  For each term in the expanded form, further note that for all $b \in \{0, 1\}$ and all $e \geq 1$, $b^e = 1$.
\end{proof}

\qed

Define all variables $X_i$ in $\poly$ to be independent.
\begin{Lemma}\label{lem:exp-poly-rpoly}
The expectation of a possible world in $\poly$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numTup)$.
\begin{equation*}
\expct_{\wVec}\pbox{\poly(\wVec)}  = \rpoly(\prob_1,\ldots, \prob_\numTup).
\end{equation*}
\end{Lemma}

\begin{proof}
%Using the fact above, we need to compute \[\sum_{(\wbit_1,\ldots, \wbit_\numTup) \in \{0, 1\}}\rpoly(\wbit_1,\ldots, \wbit_\numTup)\].  We therefore argue that 
%\[\sum_{(\wbit_1,\ldots, \wbit_\numTup) \in \{0, 1\}}\rpoly(\wbit_1,\ldots, \wbit_\numTup) = 2^\numTup \cdot \rpoly(\frac{1}{2},\ldots, \frac{1}{2}).\]

Let $\poly$ be the generalized polynomial, i.e., the polynomial of $\numTup$ variables with highest degree $= D$: %, in which every possible monomial permutation appears,
\[\poly(X_1,\ldots, X_\numTup) = \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup X_i^{d_i}\].


Then for expectation we have
\begin{align}
\expct_{\wVec}\pbox{\poly(\wVec)} &= \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \expct_{\wVec}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup w_i^{d_i}}\label{p1-s1}\\
&= \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup \expct_{\wVec}\pbox{w_i^{d_i}}\label{p1-s2}\\
&= \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup \expct_{\wVec}\pbox{w_i}\label{p1-s3}\\
&= \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup \prob_i\label{p1-s4}\\
&= \rpoly(\prob_1,\ldots, \prob_\numTup)\label{p1-s5}
\end{align}

In steps \cref{p1-s1} and \cref{p1-s2}, by linearity of expectation (recall the variables are independent), the expecation can be pushed all the way inside of the product.  In \cref{p1-s3}, note that $w_i \in \{0, 1\}$ which further implies that for any exponent $e \geq 1$, $w_i^e = w_i$.  Next, in \cref{p1-s4} the expectation of a tuple is indeed its probability.  

%\OK{
%	You don't need to tie this to TI-DBs if you define the variables ($X_i$) to be independent.  
%	Annotations 
%	Boolean expressions over uncorrelated boolean variables are sufficient to model TI-, BI-, and
%	PC-Tables.  This should still hold for arithmetic over the naturals.
%}


Finally, observe \cref{p1-s5} by construction in \cref{lem:pre-poly-rpoly}, that $\rpoly(\prob_1,\ldots, \prob_\numTup)$ is exactly the product of probabilities of each variable in each monomial across the entire sum.

\qed
\end{proof}

\begin{Corollary}
If $\poly$ is given to us in a sum of monomials form, the expectation of $\poly$ ($\ex{\poly}$) can be computed in $O(|\poly|)$, where $|\poly|$ denotes the total number of multiplication/addition operators.
\end{Corollary}

\begin{proof}
Note that \cref{lem:exp-poly-rpoly} shows that $\ex{\poly} = \rpoly(\prob_1,\ldots, \prob_\numTup)$.  Therefore, if $\poly$ is already in sum of products form, one only needs to compute $\poly(\prob_1,\ldots, \prob_\numTup)$ ignoring exponent terms (note that such a polynomial is $\rpoly(\prob_1,\ldots, \prob_\numTup)$), which is indeed has $O(|\poly|)$ compututations.\qed
\end{proof}

\subsection{When $\poly$ is not in sum of monomials form}


We would like to argue that in the general case there is no computation of expectation in linear time.

To this end, consider the following graph $G(V, E)$, where $|E| = m$, $|V| = \numTup$, and $i, j \in [\numTup]$.  Consider the query $q_E(X_1,\ldots, X_\numTup) = \sum\limits_{(i, j) \in E} X_i \cdot X_j$.
\AR{The two lemmas need to be re-written once notation for representing a query is finalized in Section 1.}
\AH{I think that we are okay with this now.  We can use both polynomial and query notation interchangably.  Does it matter which we use in the lemmas, i.e. $\poly(\vct{w})$ vs. $\poly(\wElem_1,\ldots, \wElem_N)$.  Please let me know.}
\begin{Lemma}\label{lem:const-p}
If we can compute $\poly(\wElem_1,\ldots, \wElem_\numTup) = q_E(\wElem_1,\dots, \wElem_\numTup)^3$ in T(m) time for $\wElem_1 =\cdots= \wElem_\numTup = \prob$, then we can count the number of 3-matchings in $G$ in $T(m) + O(m)$ time.
\end{Lemma}

\begin{Lemma}\label{lem:gen-p}
If we can compute $\poly(\wElem_1,\ldots, \wElem_\numTup) = q_E(\wElem_1,\ldots, \wElem_\numTup)^3$ in T(m) time for O(1) distinct values of $\prob$ then we can count the number of triangles (and the number of 3-paths, the number of 3-matchings) in $G$ in O(T(m) + m) time.
\end{Lemma}

\begin{Lemma}\label{lem:qE3-exp}
When we expand $\poly(\wElem_1,\ldots, \wElem_N) = q_E(\wElem_1,\ldots, \wElem_\numTup)^3$ out and assign all exponents $e \geq 1$ a value of $1$, we have the following,
	\begin{align}
		&\rpoly(\prob,\ldots, \prob) = \numocc{G}{\ed}\prob^2 + 6\numocc{G}{\twopath}\prob^3 + 6\numocc{G}{\twodis} + 6\numocc{G}{\tri}\prob^3 +\nonumber\\ 	
		&\qquad\qquad6\numocc{G}{\oneint}\prob^4 + 6\numocc{G}{\threepath}\prob^4 + 6\numocc{G}{\twopathdis}\prob^5 + 6\numocc{G}{\threedis}\prob^6.\label{claim:four-one}
	\end{align}
\end{Lemma}
\AH{\cref{lem:qE3-exp} needs to be proven.  I think I might need a gentle nudge on this, I can understand intuitively, but I think there is a combinatorics argument to prove this formally, I'm just a bit unsure.}
\AH{The warm-up below is fine for now, but will need to be removed for the final draft}
First, let us do a warm-up by computing $\rpoly(\wElem_1,\dots, \wElem_\numTup)$ when $\poly = q_E(\wElem_1,\ldots, \wElem_\numTup)$.  Before doing so, we introduce a notation.  Let $\numocc{G}{H}$ denote the number of occurrences that $H$ occurs in $G$.  So, e.g., $\numocc{G}{\ed}$ is the number of edges ($m$) in $G$.

\AH{We need to make a decision on subgraph notation, and number of occurrences notation.  Waiting to hear back from Oliver before making a decision.}
\AH{UPDATE: I did a quick google, and it \textit{appears} that there is a bit of a learning curve to implement node/edge symbols in LaTeX.  So, maybe, if time is of the essence, we go with another notation.}

\begin{Claim}
We can compute $\rpoly(\prob,\ldots, \prob)^2$ in O(m) time.
\end{Claim}
	\begin{proof}
		The proof basically follows by definition.  When we expand $\poly^2$, and make all exponents $e = 1$, substituting $\prob$ for all $\wElem_i$ we get $\rpoly_2(\prob,\ldots, \prob) = \numocc{G}{\ed} \cdot \prob^2 + 2\cdot \numocc{G}{\twopath}\cdot \prob^3 + 2\cdot \numocc{G}{\twodis}\cdot \prob^4$.
		\begin{enumerate}
			\item First note that 
				\begin{align*}
					\poly^2(\wVec) &= \sum_{(i, j) \in E} (\wElem_i\wElem_j)^2 + \sum_{(i, j), (k, \ell) \in E s.t. (i, j) \neq (k, \ell)} \wElem_i\wElem_j\wElem_k\wElem_\ell\\
					&= \sum_{(i, j) \in E} (\wElem_i\wElem_j)^2 + \sum_{\substack{(i, j), (j, \ell) \in E\\s.t. i \neq \ell}}\wElem_i
					\wElem_j^2\wElem_\ell + \sum_{\substack{(i, j), (k, \ell) \in E\\s.t. i \neq j \neq k \neq \ell}} \wElem_i\wElem_j\wElem_k\wElem_\ell\\
				\end{align*}
				By definition of $\rpoly$, 
				\begin{equation*}
					\rpoly^2(\wVec) = \sum_{(i, j) \in E} \wElem_i\wElem_j + \sum_{\substack{(i, j), (j, \ell) \in E\\s.t. i \neq \ell}}\wElem_i\wElem_j\wElem_\ell + \sum_{\substack{(i, j), (k, \ell) \in E\\s.t. i \neq j \neq k \neq \ell}} \wElem_i\wElem_j\wElem_k\wElem_\ell\label{eq:part-1}
				\end{equation*}
				Notice that the first term is $\numocc{G}{\ed}\cdot \prob^2$, the second $\numocc{G}{\twopath}\cdot \prob^3$, and the third $\numocc{G}{\twodis}\cdot \prob^4.$
			\item Note that 
\AH{We need the correct formula for two-matchings below.}
				\begin{align*}
					&\numocc{G}{\ed} = m,\\
					&\numocc{G}{\twopath} = \sum_{u \in V} \binom{d_u}{2} \text{where $d_u$ is the degree of vertex $u$}\\									&\numocc{G}{\twodis} = \textbf{\textit{a correct formula}}
				\end{align*}
		\end{enumerate}
		Thus, since each of the summations can be computed in O(m) time, this implies that by \cref{eq:part-1} $\rpoly(\prob,\ldots, \prob)$ can be computed in O(m) time.\qed
	\end{proof}
\AH{END of the 'warm-up'}
We are now ready to state the claim we need to prove \cref{lem:const-p} and \cref{lem:gen-p}.

Let $\poly(\wVec) = q_E(\wVec)^3$.
\begin{Claim}\label{claim:four-two}
 If one can compute $\rpoly(\prob,\ldots, \prob)$ in time T(m), then we can compute the following in O(T(m) + m):
\[\numocc{G}{\tri} + \numocc{G}{\threepath} \cdot \prob - \numocc{G}{\threedis}\cdot(\prob^2 - \prob^3).\]
\end{Claim}

\begin{proof}

We have either shown or will show that the following subgraph cardinalities can be computed in $O(m)$ time:
\[\numocc{G}{\ed}, \numocc{G}{\twopath}, \numocc{G}{\twodis}, \numocc{G}{\oneint}, \numocc{G}{\twopathdis} + \numocc{G}{\threedis}.\]

		By definition we have that
		\[\poly(\wElem_1,\ldots, \wElem_\numTup) = \sum_{\substack{(i_1, j_1),\\ (i_2, j_2),\\ (i_3, j_3) \in E}} \prod_{\ell = 1}^{3}\wElem_{i_\ell}\wElem_{j_\ell}.\]
		Rather than list all the expressions in full detail, let us make some observations regarding the sum.  Let $e_1 = (i_1, j_1), e_2 = (i_2, j_2), e_3 = (i_3, j_3)$.  Notice that each expression in the sum consists of a triple $(e_1, e_2, e_3)$.  There are three forms the triple $(e_1, e_2, e_3)$ can take.

\underline{case 1:} $e_1 = e_2 = e_3$, where all edges are the same.  There are exactly $m$ such triples, each with a $\prob^2$ factor.

\underline{case 2:}  This case occurs when there are two distinct edges of the three.  All 6 combinations of two distinct values consist of the same monomial in $\rpoly$, i.e. $(e_1, e_1, e_2)$ is the same as $(e_2, e_1, e_2)$.  This case produces the following edge patterns: $\twodis, \twopath$.

\underline{case 3:} $e_1 \neq e_2 \neq e_3$, i.e., when all edges are distinct.  This case consists of the following edge patterns: $\threedis, \twopathdis, \threepath, \oneint, \tri$.

\AH{This proof I think could some reorganization.  We really don't need the warm-up anymore, but we can use the formulas for case 1 and case 2.}

It has already been shown previously that $\numocc{G}{\ed}, \numocc{G}{\twopath}, \numocc{G}{\twodis}$ can be computed in O(m) time.  Here are the arguments for the rest.
\[\numocc{G}{\oneint} = \sum_{u \in V} \binom{d_u}{3}\]
$\numocc{G}{\twopathdis} + \numocc{G}{\threedis} = $ the number of occurrences of three distinct edges with five or six vertices.  This can be counted in the following manner.  For every edge $(u, v) \in E$, throw away all neighbors of $u$ and $v$ and pick two more distinct edges.
\[\numocc{G}{\twopathdis} + \numocc{G}{\threedis} = \sum_{(u, v) \in E} \binom{m - d_u - d_v + 1}{2}\]  The implication in \cref{claim:four-two} follows by the above and \cref{lem:qE3-exp}.

\AH{Justify the last sentence.}
	\end{proof}

\qed

\begin{proof}[Proof of \cref{lem:gen-p}]

%\AR{Also you can modify the text of \textsc{Proof} by using the following latex command \texttt{\\begin\{proof\}[Proof of Lemma 2]} and Latex will typeset this as \textsc{Proof of Lemma 2}, which is what you really want.}

\cref{claim:four-two} says that if we know $\rpoly_3(\prob,\ldots, \prob)$, then we can know in O(m) additional time
\[\numocc{G}{\tri} + \numocc{G}{\threepath} \cdot \prob - \numocc{G}{\threedis}\cdot(\prob^2 - \prob^3).\]  We can think of each term in the above equation as a variable, where one can solve a linear system given 3 distinct $\prob$ values, assuming independence of the three linear equations.  In the worst case, without independence, 4 distince values of $\prob$ would suffice...because Atri said so, and I need to ask him for understanding why this is the case, of which I suspect that it has to do with basic result(s) in linear algebra.\AR{Follows from the fact that the corresponding coefficient matrix is the so called Vandermonde matrix, which has full rank.}
\AH{This Vandermonde matrix I need to research.}
\end{proof}

\qed

\AH{Below is only a transcription of the notes.  The claims need to be verified and further worked out.}

\begin{proof}[Proof of \cref{lem:const-p}]

The argument for \cref{lem:gen-p} cannot be applied to \cref{lem:const-p} since we have that $\prob$ is fixed.  We have hope in the following:  we assume that we can solve this problem for all graphs, and the hope would be be to solve the problem for say $G_1, G_2, G_3$, where $G_1$ is arbitrary, and relate the values of $\numocc{G}{H}$, where $H$ is a placeholder for the relevant edge combination.  The hope is that these relations would result in three independent linear equations, and then we would be done.

The following is an option.
\begin{enumerate}
	\item Let $G_1$ be an arbitrary graph
	\item Build $G_2$ from $G_1$, where each edge in $G_1$ gets replaced by a 2 path.
\end{enumerate}

Then $\numocc{G_2}{\tri} = 0$, and if we can prove that
\begin{itemize}
	\item $\numocc{G_2}{\threepath} = 2 \cdot \numocc{G_1}{\twopath}$
	\item $\numocc{G_2}{\threedis} = 8 \cdot \numocc{G_1}{\threedis} + 6 \cdot \numocc{G_1}{\twopathdis} + 4 \cdot \numocc{G_1}{\oneint} + 4 \cdot \numocc{G_1}{\threepath} + 2 \cdot \numocc{G_1}{\tri}$
\end{itemize}
we solve our problem for $q_E^3$ based on $G_2$ and we can compute $\numocc{G}{\threedis}$, a hard problem.
\end{proof}

\AH{Proving the above linear combination for 3-matchings in $G_2$ always holds for an arbitrary $G_1$.}

Consider graph $G_2$, constructed from an arbitrary graph $G_1$.  We wish to show that the number of 3-matchings in $G_2$ will always be the linear combination above, regardless of the construction of $G_1$.

\AR{I did not make a pass on the above since it looks incomplete and does not seem to have changed for a while. Also it would be good to define $G_2$ and $G_3$ outside of the proofs below.}

\AH{Changes start here.}

\subsubsection{$f_k$ and $G_k$}

\begin{Definition}\label{def:Gk}
For $k > 1$, let graph $G_k$ be a graph generated from an arbitrary graph $G_1$, by replacing every edge $e$ of $G_1$ with a $k$-path, such that all $k$-path replacement edges are disjoint in the sense that they only intersect at the original intersection endpoints as seen in $G_1$.
\end{Definition}

For any graph $G_k$, we denote its edges to be a pair $(e, b)$, such that $b \in \{0,\ldots, k\}$.

\begin{Definition}\label{def:fk}
Define $f_k: \binom{E_k}{3} \mapsto \binom{E_1}{\leq3}$. 
\end{Definition}

The function $f_k$ is a mapping from every $3$-edge shape in $G_k$ to its generating subgraph in $G_1$.  The notation $\binom{S}{t}$ is standard and is used to denote the set of subsets in $S$ with exactly $t$ edges.  The set of edges in $G_k$ is written as $E_k$.

\begin{Lemma}
$f_k$ is a function.
\end{Lemma}
\begin{proof}
Note that $f_k$ is properly defined.  For any $S \in \binom{E_k}{3}$, $|f(S)| \leq 3$, since it has to be the case that any subset of $3$ edges in $E_k$ will map to at most 3 edges in $G_1$.  All mappings are in the required range.  Then,  since for any $b \in \{0,\ldots, k\}$ the edge $(e, b) \mapsto e$ is a mapping for which $(e, b)$ maps to no other edge than $e$, and this implies that $f_k$ is a function.
\end{proof}
\qed   

\subsubsection{Subgraph patterns with 3 edges}
We wish to briefly state the possible subgraphs $S$ containing exactly three edges.
\begin{itemize}
	\item Triangle ($\tri$)
	\item 3-path ($\threepath$)
	\item 3-star ($\oneint$)--this is the graph that results when all three edges share exactly one common endpoint.  The remaining endpoint for each edge is disconnected from any endpoint of the three edges.
	\item Disjoint Two-Path ($\twopathdis$)--this subgraph consists of a two path and a remaining disjoint edge.
	\item 3-matching ($\threedis$)--this subgraph is composed of three disjoint edges.
\end{itemize}
\subsection{Three Matchings in $G_2$}

\begin{Lemma}\label{lem:3m-G_2}
The number of $3$-matchings in graph $G_2$ is computed by the following identity,
\[\numocc{G_2}{\threedis} = 8 \cdot \numocc{G_1}{\threedis} + 6 \cdot \numocc{G_1}{\twopathdis} + 4 \cdot \numocc{G_1}{\oneint} + 4 \cdot \numocc{G_1}{\threepath} + 2 \cdot \numocc{G_1}{\tri}.\]
\end{Lemma}

\begin{proof}

Given any $S \in \binom{E_1}{\leq3}$, we consider $f_2^{-1}(S)$, which is the set of all possible edges in $S \times \{0, 1\}$ which $f_2$ maps to $S$.  Then we count the number of $3$-matchings in the $3$-edge subgraphs of $G_2$ in $f_2^{-1}(S)$.  We start with $S \in \binom{E_1}{3}$, where $S$ is composed of the edges $e_1, e_2, e_3$ and $f_2^{-1}(S)$ is set of all $3$-edge subsets of the set $\{(e_1, 0), (e_1, 1), (e_2, 0), (e_2, 1), (e_3, 0), (e_3, 1)\}$.

Consider the $S = \threedis$ pattern.  Note that edges in $f_2^{-1}$ intersect only at $(e_i, 0), (e_i, 1)$.  All subsets for $h \neq i \neq j$, $b_1, b_2, b_3 \in \{0, 1\}$, $(e_h, b_1), (e_i, b_2), (e_j, b_3)$ will compose a 3-matching.  One can see that we have a total of two possible choices for each edge $e$ in $G_1$ yielding $2^3 = 8$ possible 3-matchings in $G_2$.
%\AH{The comment below is an important comment.}
%\AR{I think your argument seems to implicitly assume that $G_1$ is the subset $S$ and $G_2$ is the corresponding mapping under $f^{-1}$. This is {\bf not} correct. You should present the argument as in the outline above above. I.e. fix an $S\in\binom{E_1}{\le 3}$ in $G_1$ and then consider all possible subgraphs in $G_2$ in $f^{-1}(S)$. {\bf Propagate} this change to the rest of the proof.}  

For $S = \twopathdis$ edges $e_1, e_2$ form a $2$-path with $e_3$ being disjoint.  This means that $(e_2, 0), (e_2, 1), (e_3, 0), (e_3, 1)$ form a $4$-path while $(e_1, 0), (e_1, 1)$ is its own disjoint $2$-path.  We can only pick $(e_1, 0)$ or $(e_1, 1)$ from $f^{-1}$, and then we need to pick a $2$-matching from the mapping of the $e_1, e_2$ under $f^{-1}.$  Note that a four path allows there to be 3 possible 2 matchings, specifically, $\pbrace{(e_2, 0), (e_3, 0)}, \pbrace{(e_2, 0), (e_3, 1)}, \pbrace{(e_2, 1), (e_3, 1)}$.  Since these two selections can be made independently, there are $2 \cdot 3 = 6$ choices.  Edge $e_1$ cannot produce a $2$-matching, and are done with $\twopathdis$.

When $S = \oneint$, in $f_2^{-1}$, the inner edges $(e_i, 1)$ are all connected, and the outer edges $(e_i, 0)$ are all disjoint.  Note that for a valid 3 matching it must be the case that at most one inner edge can be part of the set of disjoint edges.  When exactly one inner edge is chosen, there are 3 such possibilities.  The remaining possible 3-matching occurs when all 3 outer edges are chosen.  Thus, there are $3 + 1 = 4$ 3-matchings for a 3-star subgraph. 

When $S =\threepath$ it is the case that all edges beginning with $e_1$ and ending with $e_3$ are successively connected.  This translates to a $6$-path in the edges of $f_2^{-1}$, where all edges from $(e_0, 0)$ to $(e_2, 1)$ are successively connected.  For a $3$-matching to exist, there must be at least one edge separating edges picked from a sequence.  A sequence of size $6$ produces $4$ such possibilities.  The following edge combinations, $\pbrace{(e_1, 0), (e_2, 0), (e_3, 0)}, \pbrace{(e_1, 0), (e_2, 0), (e_3, 1)},  \pbrace{(e_1, 0), (e_2, 1), (e_3, 1)}, \pbrace{(e_1, 1), (e_2, 1),  (e_3, 1)}$ produce four possible 3-matchings.

For $S = \tri$, note that it is the case that the edges in $f_2^{-1}$ are connected in a successive manner, but this time in a cycle, such that $(e_1, 0)$ and $(e_3, 1)$ are also connected.  While this is similar to the discussion of the six path above, one must use caution not to consider the first and last edges as disjoint, since they are connected.  This rules out both $(e_1, 0), (e_2, 0), (e_3, 1)$ and $(e_1, 0), (e_2, 1), (e_3, 1)$ leaving us with two remaining edge combinations that produce a 3 matching.

Let us also consider when $S \in \binom{E_1}{\leq 2}$.  When $|S| = 2$, we have one of two possibile constraints on the output of $f_2^{-1}$.  First, we could have that $e_1$ and $e_2$ are connected, forming a $2$-path, and thus all edges from $(e_1, 0)$ to $(e_3, 1)$ are connected successively.  As alluded to previously, a 3-matching would require at least to alternate edges to ensure disjointedness, which requires $\geq 5$-path.  Second, it could be that $e_1$ is disjoint from $e_2$, and thus $(e_i, b)$ is disjoint to $(e_j, b)$ for $i \neq j$ and $b \in \{0, 1\}$.  For a $3$-matching to exist at least one of the disjoint $2$-paths must be a $3$-path in order to produce a $2$-matching, and this is not the case.  When $|S| = 1$, by construction of $G_2$, it is the case there does not exist an $M \in \binom{E_2}{3}$ such that $f(M) = S$.  Therefore only subgraphs $S$ of $3$ edges need to be considered.

Observe that all of the arguments above focused solely on the shape/pattern of $S$.  In other words, all $S$ of a given shape yield the same number of $3$-matchings, and this is why we get the required identity. 
\end{proof}
\qed

\subsection{Three matchings in $G_3$}

\begin{Lemma}\label{lem:3m-G3}
The number of 3-matchings in $G_3$ is computed by the following identity, 
\begin{align*}
\numocc{G_3}{\threedis} = &4\pbrace{\numocc{G_1}{\twopath}} + 6\pbrace{\numocc{G_1}{\twodis}} + 18\pbrace{\numocc{G_1}{\tri}} + 21\pbrace{\numocc{G_1}{\threepath}}\\
&+ 24\pbrace{\numocc{G_1}{\twopathdis}} + 20\pbrace{\numocc{G_1}{\oneint}} + 27\pbrace{\numocc{G_1}{\threedis}}.
\end{align*}

\end{Lemma}

\begin{proof}
For any $S \in \binom{E_1}{\leq3}$, we now consider $f_3^{-1}(S)$, which lists all possible subsets of $3$ edges in $S \times \{0, 1, 2\}$ to which $f_3$ maps to $S$.  We again then count the number of $3$-matchings in $f_3^{-1}(S)$.  Note, that for $G_1$, we represent edges as $e_1, e_2, e_3$, and their corresponding $3$-paths in $G_3$ as $(e_1, 0), (e_1, 1), (e_1, 2),\ldots, (e_3, 2)$.

When $S = \ed$, $f_3^{-1}(S)$ gives one result, $(e_1, 0), (e_1, 1), (e_1, 2)$.  All edges in the subset are a $3$-path, and it is the case as alluded in $G_2$ discussion that no 3-matching can exist in a single $3$-path.

Fix then $S = \twopath$ and now we have $f_3^{-1}(S)$ yielding all $3$-edged subsets of $S' = \{(e_1, 0),\ldots(e_1, 2), (e_2, 0),\ldots, (e_2, 2)\}.$  All edges in S' form a $6$-path, and as stated above, this forms $4$ $3$-matchings.

For $S = \twodis$, then all subsets in the output of $f_3{-1}(S)$ are predicated on the fact that $(e_i, b)$ is disjoint with $(e_j, b)$ for $i \neq j$ and $b \in \{0, 1, 2\}$.  Pick an aribitrary $e_i$ and note, that $(e_i, 0), (e_i, 2)$ is a $2$-matching, which can combine with any of the $3$ edges in $(e_j, 0),\ldots, (e_j, 2)$ again for $i \neq j$.  Since the selections are independent, it follows that there exist $2 \cdot 3 = 6$ $3$-matchings.

Now for the 3-edge subgraphs of $G_1$, starting with a $S = \tri$.   As discussed in the case of $G_2$, $f_3^{-1}(S)$ subsets are conditioned on the fact that all the edges in $S'$ are a cyclic sequence, and we must be careful not to pair $(e_1, 0)$ with $(e_3, 2)$ in a $3$-matching.  For any $s \in S'$, $s$ is a $3$-matching when we have that $j \geq i + 2, k \geq j + 2$ for the edges $(e_i, b_1), (e_j, b_2), (e_k, b_3)$ where $b_1, b_2, b_3 \in \{0, 1, 2\}$ for all $i \in \{1, 2\}$ it is the case that if $b_i = 2$ then $b_{i + 1} \neq 0$ and if $b_1 = 0$ then $b_3 \neq 2$.  Iterating through all possible combinations producing 3-matchings, i.e. $\pbox{(e_1, 0), (e_2, 0), (e_3, 0)},  \pbox{(e_1, 0), (e_2, 0), (e_3, 1)}, \pbox{(e_1, 0), (e_2, 1), (e_3, 0)},\ldots, \pbox{(e_1, 0), (e_2, 2), (e_3, 1)}, \pbox{(e_1, 1), (e_2, 0), (e_3, 0)},\ldots, \pbox{(e_1, 2), (e_2, 2), (e_3, 2)}$ gives a total of 18 3-matchings.

Consider when $S = \threepath$ and $f_3^{-1}(S)$ has the constraint that all edges are successively connected to form a $9$-path.  Since $(e_1, 0)$ is disjoint to $(e_3, 2)$, both of these edges can exist in a $3$-matching.  The relaxation yields 3 other 3-matchings that couldn't be counted in the case of the $S = \tri$, namely $\pbox{(e_1, 0), (e_2, 0), (e_3, 2)},\pbox{(e_1, 0), (e_2, 1), (e_3, 2)}, \pbox{(e_1, 0), (e_2, 2), (e_3, 2)}$.  There are therefore $18 + 3 = 21$ three-matchings.

Assume $S = \twopathdis$, then $f_3^{-1}$ has successive connectivity from $(e_1, 0)$ through $(e_1, 2)$, and successive connectivity from $(e_2, 0)$ through $(e_3, 2)$.  It is the case that the edges in $S'$ form a 6-path with a disjoint 3-path.  There exist 8 distinct two matchings in the $6$-path $(e_2, 0),\ldots, (e_3, 2)$ of the form $\pbox{(e_2, 0), (e_3, 0)}, \pbox{(e_2, 0), (e_3, 1)}, \pbox{(e_2, 0), (e_3, 2)},\pbox{(e_2, 1), (e_3, 0)},\ldots, \pbox{(e_2, 1), (e_3, 2)}, \pbox{(e_2, 2), (e_3, 1)}, \pbox{(e_2, 2), (e_3, 2)}$.  These matchings can be paired independently with either of the $3$ remaining edges of $(e_1, b)$, for a total of $8 \cdot 3 = 24$ 3-matchings.

Given $S = \oneint$, the subsets of $f_3^{-1}(S)$ are restricted such that the outer edges $(e_i, 0)$ are disjoint from another, the middle edges $(e_i, 1)$ are also disjoint to each other, and only the inner edges $(e_i, 2)$ intersect with one another at exactly one common endpoint.  To be precise, any outer edge $(e_i, 0)$ is disjoint to every middle edge $(e_j, 1)$ for $i \neq j$.  As discussed previously, at most one inner edge may appear in a $3$-matching.  For arbitrary inner edge $(e_i, 2)$, we have $4$ combinations of the middle and outer edges of $e_j, e_k$, where $i \neq j \neq k$.  These choices are independent and we have $4 \cdot 3 = 12$ 3-matchings.  We are not done yet, as we need to consider the middle and outer edge combinations.  Notice that for each $e_i$, we have $2$ choices, i.e. a middle or outer edge, contributing $2^3 = 8$ additional $3$-matchings, for a total of $8 + 12 = 20$

Given $S = \threedis$ subgraph, we have the case that all subsets in $f_3^{-1}(S)$ have the property that $(e_i, b)$ is disjoint to $(e_j, b)$ for $i \neq j$.  For each $e_i$, there are then $3$ choices, independent of each other, and it results that there are $3^3 = 27$ 3-matchings.

All of the observations above focused only on the shape of $S$, and since we see that for fixed $S$, we have a fixed number of $3$-matchings, this implies the identity.
\end{proof}
\qed

\subsection{Three Paths}
Computing the number of 3-paths in $G_2$ and $G_3$ consists of much simpler linear combinations.
\subsubsection{$G_2$}
\begin{Lemma}
The number of $3$-paths in $G_2$ is computed by the following linear combination,
\[\numocc{G_2}{\threepath} = 2 \cdot \numocc{G_1}{\twopath}.\]
\end{Lemma}

\begin{proof}
For a $M = \threepath \in G_2$, it \textit{must} be the case that there is successive connectivity for $3$ edges across $f_2(M) = S$.  This constraint rules out every pattern $S \in G_1$ consisting of $3$ edges, as well as when $S = \twodis$ and for $S = \ed$.  The only surviving pattern is $S = \twopath$, where it can be seen in $f_2^{-1}(S)$ that each subset has successive connectivity from $(e_1, 0)$ to $(e_2, 1)$.  There are then $2$ $3$-paths sharing edges $e_1$ and $e_2$.
\end{proof}
\qed
%we have two 3-paths generated: $\pbox{(e_1, 0), (e_1, 1), (e_2, 0)}$ and $\pbox{(e_1, 1), (e_2, 0), (e_2, 1)}$.  Thus,


\AH{Changes propagated up to this point.}

\subsubsection{$G_3$}
In a similar fashion, enumerate through the various subgraphs in $G_1$ with $\leq 3$ edges, starting with the smallest.  Note, that one edge in $G_1$ generates one 3-path in $G_3$.  Moving on to a 2-path, again we see that we have 2 2-paths that consist of both $G_1$ generating edges.  For the subgraph of 2 disjoint edges, as in the case of $G_2$, there is no way to make a 3-path out of disjoint edges, and this rolls over into the subgraph consisting of 3 disjoint edges, the subgraph made of a 2 path and disjoint edge, 3-star, triangle, and 3-path.  All of these subgraphs provide no way to create a 3-path from all edges $e_1, e_2, e_3$ in $G_1$.  The combination is then

\[\numocc{G_3}{\threepath} = \numocc{G_1}{\ed} + 2 \times \numocc{G_1}{\twopath}.\]

\subsection{Triangle}
The number of triangles in both $G_2$ and $G_3$ will always be $0$ for the simple fact that when we replace a single edge with $\geq 2$-path, the possibility of a triangle of single edge sides disappears, since the only way a single edged triangle could exist is if it existed in $G_1$ and then was passed to $G_2$ or $G_3$ without replacing each single edge with $\geq 2$-paths.