paper-BagRelationalPDBsAreHard/poly-form.tex

%root: main.tex
%!TEX root = ./main.tex
%\onecolumn
\section{Polynomial Formulation}

We can think of $\poly(\vct{w})$ as a function whose input are the variables $X_1,\ldots, X_M$ as in $\poly(X_1,\ldots, X_M)$.  Denote the sum of products expansion of  $\poly(X_1,\ldots, X_\numTup)$ as  $\poly(X_1,\ldots, X_\numTup)_{\Sigma}$

\begin{Definition}\label{def:qtilde}
Define $\rpoly(X_1,\ldots, X_\numTup)$ as the reduced version of $\poly(X_1,\ldots, X_\numTup)_{\Sigma}$, of the form
$\rpoly(X_1,\ldots, X_\numTup) = $

\[\poly(X_1,\ldots, X_\numTup)_{\Sigma} \mod \wbit_1^2-\wbit_1\cdots\mod \wbit_\numTup^2 - \wbit_\numTup.\]  
\end{Definition}


Intuitively, $\rpoly(\textbf{X})$ is the expanded sum of products form of $\poly(\textbf{X})$ such that if any $X_j$ term  has an exponent $e > 1$, it is reduced to $1$, i.e. $X_j^e\mapsto X_j$ for any $e > 1$.  
Alternatively, one can gain intuition for $\rpoly$ by thinking of $\rpoly$ as the resulting sum of product expansion of $\poly$ when $\poly$ is in a factorized form such that none of its terms have an exponent $e > 1$, with an idempotent product operator.  

The usefulness of this reduction will be seen shortly.

\begin{Lemma}\label{lem:pre-poly-rpoly}
When $\poly(X_1,\ldots, X_\numTup) = \sum\limits_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}} \cdot \prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numTup}X_i^{d_i}$, we have then that $\rpoly(X_1,\ldots, X_\numTup) = \sum\limits_{\vct{d} \in \{0,\ldots, D\}^\numTup} q_{\vct{d}}\cdot\prod\limits_{\substack{i = 1\\s.t. d_i\geq 1}}^{\numTup}X_i$.
\end{Lemma}
\begin{proof}
Follows by the construction of $\rpoly$ in \cref{def:qtilde}.
\end{proof}

\qed

Note the following fact:
\begin{Proposition}
\[\text{For all } (\wbit_1,\ldots, \wbit_\numTup) \in \{0, 1\}^\numTup, \poly(\wbit_1,\ldots, \wbit_\numTup) = \rpoly(\wbit_1,\ldots, \wbit_\numTup).\]
\end{Proposition}

\begin{proof}
Note that any $\poly$ in factorized form is equivalent to its sum of product expansion.  For each term in the expanded form, further note that for all $b \in \{0, 1\}$ and all $e \geq 1$, $b^e = 1$.
\end{proof}

\qed

Define all variables $X_i$ in $\poly$ to be independent.
\begin{Lemma}\label{lem:exp-poly-rpoly}
The expectation of a possible world in $\poly$ is equal to $\rpoly(\prob_1,\ldots, \prob_\numTup)$.
\begin{equation*}
\expct_{\wVec}\pbox{\poly(\wVec)}  = \rpoly(\prob_1,\ldots, \prob_\numTup).
\end{equation*}
\end{Lemma}

\begin{proof}
%Using the fact above, we need to compute \[\sum_{(\wbit_1,\ldots, \wbit_\numTup) \in \{0, 1\}}\rpoly(\wbit_1,\ldots, \wbit_\numTup)\].  We therefore argue that 
%\[\sum_{(\wbit_1,\ldots, \wbit_\numTup) \in \{0, 1\}}\rpoly(\wbit_1,\ldots, \wbit_\numTup) = 2^\numTup \cdot \rpoly(\frac{1}{2},\ldots, \frac{1}{2}).\]

Let $\poly$ be the generalized polynomial, i.e., the polynomial of $\numTup$ variables with highest degree $= D$: %, in which every possible monomial permutation appears,
\[\poly(X_1,\ldots, X_\numTup) = \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup X_i^{d_i}\].


Then for expectation we have
\begin{align}
\expct_{\wVec}\pbox{\poly(\wVec)} &= \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \expct_{\wVec}\pbox{\prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup w_i^{d_i}}\label{p1-s1}\\
&= \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup \expct_{\wVec}\pbox{w_i^{d_i}}\label{p1-s2}\\
&= \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup \expct_{\wVec}\pbox{w_i}\label{p1-s3}\\
&= \sum_{\vct{d} \in \{0,\ldots, D\}^\numTup}q_{\vct{d}}\cdot \prod_{\substack{i = 1\\s.t. d_i \geq 1}}^\numTup \prob_i\label{p1-s4}\\
&= \rpoly(\prob_1,\ldots, \prob_\numTup)\label{p1-s5}
\end{align}

In steps \cref{p1-s1} and \cref{p1-s2}, by linearity of expectation (recall the variables are independent), the expecation can be pushed all the way inside of the product.  In \cref{p1-s3}, note that $w_i \in \{0, 1\}$ which further implies that for any exponent $e \geq 1$, $w_i^e = w_i$.  Next, in \cref{p1-s4} the expectation of a tuple is indeed its probability.  

%\OK{
%	You don't need to tie this to TI-DBs if you define the variables ($X_i$) to be independent.  
%	Annotations 
%	Boolean expressions over uncorrelated boolean variables are sufficient to model TI-, BI-, and
%	PC-Tables.  This should still hold for arithmetic over the naturals.
%}


Finally, observe \cref{p1-s5} by construction in \cref{lem:pre-poly-rpoly}, that $\rpoly(\prob_1,\ldots, \prob_\numTup)$ is exactly the product of probabilities of each variable in each monomial across the entire sum.

\qed
\end{proof}

\begin{Corollary}
If $\poly$ is given to us in a sum of monomials form, the expectation of $\poly$ ($\ex{\poly}$) can be computed in $O(|\poly|)$, where $|\poly|$ denotes the total number of multiplication/addition operators.
\end{Corollary}

\begin{proof}
Note that \cref{lem:exp-poly-rpoly} shows that $\ex{\poly} = \rpoly(\prob_1,\ldots, \prob_\numTup)$.  Therefore, if $\poly$ is already in sum of products form, one only needs to compute $\poly(\prob_1,\ldots, \prob_\numTup)$ ignoring exponent terms (note that such a polynomial is $\rpoly(\prob_1,\ldots, \prob_\numTup)$), which is indeed has $O(|\poly|)$ compututations.\qed
\end{proof}

\subsection{When $\poly$ is not in sum of monomials form}


We would like to argue that in the general case there is no computation of expectation in linear time.

To this end, consider the following graph $G(V, E)$, where $|E| = m$, $|V| = \numTup$, and $i, j \in [\numTup]$.  Consider the query $q_E(X_1,\ldots, X_\numTup) = \sum\limits_{(i, j) \in E} X_i \cdot X_j$.
\AR{The two lemmas need to be re-written once notation for representing a query is finalized in Section 1.}
\AH{I think that we are okay with this now.  We can use both polynomial and query notation interchangably.  Does it matter which we use in the lemmas, i.e. $\poly(\vct{w})$ vs. $\poly(\wElem_1,\ldots, \wElem_N)$.  Please let me know.}
\begin{Lemma}\label{lem:const-p}
If we can compute $\poly(\wElem_1,\ldots, \wElem_\numTup) = q_E(\wElem_1,\dots, \wElem_\numTup)^3$ in T(m) time for $\wElem_1 =\cdots= \wElem_\numTup = \prob$, then we can count the number of 3-matchings in $G$ in $T(m) + O(m)$ time.
\end{Lemma}

\begin{Lemma}\label{lem:gen-p}
If we can compute $\poly(\wElem_1,\ldots, \wElem_\numTup) = q_E(\wElem_1,\ldots, \wElem_\numTup)^3$ in T(m) time for O(1) distinct values of $\prob$ then we can count the number of triangles (and the number of 3-paths, the number of 3-matchings) in $G$ in O(T(m) + m) time.
\end{Lemma}

\begin{Lemma}\label{lem:qE3-exp}
When we expand $\poly(\wElem_1,\ldots, \wElem_N) = q_E(\wElem_1,\ldots, \wElem_\numTup)^3$ out and assign all exponents $e \geq 1$ a value of $1$, we have the following,
	\begin{align}
		&\rpoly(\prob,\ldots, \prob) = \numocc{G}{\ed}\prob^2 + 6\numocc{G}{\twopath}\prob^3 + 6\numocc{G}{\twodis} + 6\numocc{G}{\tri}\prob^3 +\nonumber\\ 	
		&\qquad\qquad6\numocc{G}{\oneint}\prob^4 + 6\numocc{G}{\threepath}\prob^4 + 6\numocc{G}{\twopathdis}\prob^5 + 6\numocc{G}{\threedis}\prob^6.\label{claim:four-one}
	\end{align}
\end{Lemma}
\AH{\cref{lem:qE3-exp} needs to be proven.  I think I might need a gentle nudge on this, I can understand intuitively, but I think there is a combinatorics argument to prove this formally, I'm just a bit unsure.}

\AH{The warm-up below is fine for now, but will need to be removed for the final draft}
First, let us do a warm-up by computing $\rpoly(\wElem_1,\dots, \wElem_\numTup)$ when $\poly = q_E(\wElem_1,\ldots, \wElem_\numTup)$.  Before doing so, we introduce a notation.  Let $\numocc{G}{H}$ denote the number of occurrences that $H$ occurs in $G$.  So, e.g., $\numocc{G}{\ed}$ is the number of edges ($m$) in $G$.

\AH{We need to make a decision on subgraph notation, and number of occurrences notation.  Waiting to hear back from Oliver before making a decision.}

\OK{
	I'm not sure what I can add.  The existing notation is fine (for now).  I would suggest adding
	a definition table.
}
\AH{UPDATE: I did a quick google, and it \textit{appears} that there is a bit of a learning curve to implement node/edge symbols in LaTeX.  So, maybe, if time is of the essence, we go with another notation.}

\begin{Claim}
We can compute $\rpoly(\prob,\ldots, \prob)^2$ in O(m) time.
\end{Claim}
	\begin{proof}
		The proof basically follows by definition.  When we expand $\poly^2$, and make all exponents $e = 1$, substituting $\prob$ for all $\wElem_i$ we get $\rpoly_2(\prob,\ldots, \prob) = \numocc{G}{\ed} \cdot \prob^2 + 2\cdot \numocc{G}{\twopath}\cdot \prob^3 + 2\cdot \numocc{G}{\twodis}\cdot \prob^4$.
		\begin{enumerate}
			\item First note that 
				\begin{align*}
					\poly^2(\wVec) &= \sum_{(i, j) \in E} (\wElem_i\wElem_j)^2 + \sum_{(i, j), (k, \ell) \in E s.t. (i, j) \neq (k, \ell)} \wElem_i\wElem_j\wElem_k\wElem_\ell\\
					&= \sum_{(i, j) \in E} (\wElem_i\wElem_j)^2 + \sum_{\substack{(i, j), (j, \ell) \in E\\s.t. i \neq \ell}}\wElem_i
					\wElem_j^2\wElem_\ell + \sum_{\substack{(i, j), (k, \ell) \in E\\s.t. i \neq j \neq k \neq \ell}} \wElem_i\wElem_j\wElem_k\wElem_\ell\\
				\end{align*}
				By definition of $\rpoly$, 
				\begin{equation*}
					\rpoly^2(\wVec) = \sum_{(i, j) \in E} \wElem_i\wElem_j + \sum_{\substack{(i, j), (j, \ell) \in E\\s.t. i \neq \ell}}\wElem_i\wElem_j\wElem_\ell + \sum_{\substack{(i, j), (k, \ell) \in E\\s.t. i \neq j \neq k \neq \ell}} \wElem_i\wElem_j\wElem_k\wElem_\ell\label{eq:part-1}
				\end{equation*}
				Notice that the first term is $\numocc{G}{\ed}\cdot \prob^2$, the second $\numocc{G}{\twopath}\cdot \prob^3$, and the third $\numocc{G}{\twodis}\cdot \prob^4.$
			\item Note that 
\AH{We need the correct formula for two-matchings below.}
				\begin{align*}
					&\numocc{G}{\ed} = m,\\
					&\numocc{G}{\twopath} = \sum_{u \in V} \binom{d_u}{2} \text{where $d_u$ is the degree of vertex $u$}\\									&\numocc{G}{\twodis} = \textbf{\textit{a correct formula}}
				\end{align*}
		\end{enumerate}
		Thus, since each of the summations can be computed in O(m) time, this implies that by \cref{eq:part-1} $\rpoly(\prob,\ldots, \prob)$ can be computed in O(m) time.\qed
	\end{proof}
\AH{END of the 'warm-up'}
We are now ready to state the claim we need to prove \cref{lem:const-p} and \cref{lem:gen-p}.

Let $\poly(\wVec) = q_E(\wVec)^3$.
\begin{Claim}\label{claim:four-two}
 If one can compute $\rpoly(\prob,\ldots, \prob)$ in time T(m), then we can compute the following in O(T(m) + m):
\[\numocc{G}{\tri} + \numocc{G}{\threepath} \cdot \prob - \numocc{G}{\threedis}\cdot(\prob^2 - \prob^3).\]
\end{Claim}

\begin{proof}

We have either shown or will show that the following subgraph cardinalities can be computed in $O(m)$ time:
\[\numocc{G}{\ed}, \numocc{G}{\twopath}, \numocc{G}{\twodis}, \numocc{G}{\oneint}, \numocc{G}{\twopathdis} + \numocc{G}{\threedis}.\]

		By definition we have that
		\[\poly(\wElem_1,\ldots, \wElem_\numTup) = \sum_{\substack{(i_1, j_1),\\ (i_2, j_2),\\ (i_3, j_3) \in E}} \prod_{\ell = 1}^{3}\wElem_{i_\ell}\wElem_{j_\ell}.\]
		Rather than list all the expressions in full detail, let us make some observations regarding the sum.  Let $e_1 = (i_1, j_1), e_2 = (i_2, j_2), e_3 = (i_3, j_3)$.  Notice that each expression in the sum consists of a triple $(e_1, e_2, e_3)$.  There are three forms the triple $(e_1, e_2, e_3)$ can take.

\underline{case 1:} $e_1 = e_2 = e_3$, where all edges are the same.  There are exactly $m$ such triples, each with a $\prob^2$ factor.

\underline{case 2:}  This case occurs when there are two distinct edges of the three.  All 6 combinations of two distinct values consist of the same monomial in $\rpoly$, i.e. $(e_1, e_1, e_2)$ is the same as $(e_2, e_1, e_2)$.  This case produces the following edge patterns: $\twodis, \twopath$.

\underline{case 3:} $e_1 \neq e_2 \neq e_3$, i.e., when all edges are distinct.  This case consists of the following edge patterns: $\threedis, \twopathdis, \threepath, \oneint, \tri$.

\AH{This proof I think could some reorganization.  We really don't need the warm-up anymore, but we can use the formulas for case 1 and case 2.}

It has already been shown previously that $\numocc{G}{\ed}, \numocc{G}{\twopath}, \numocc{G}{\twodis}$ can be computed in O(m) time.  Here are the arguments for the rest.
\[\numocc{G}{\oneint} = \sum_{u \in V} \binom{d_u}{3}\]
$\numocc{G}{\twopathdis} + \numocc{G}{\threedis} = $ the number of occurrences of three distinct edges with five or six vertices.  This can be counted in the following manner.  For every edge $(u, v) \in E$, throw away all neighbors of $u$ and $v$ and pick two more distinct edges.
\[\numocc{G}{\twopathdis} + \numocc{G}{\threedis} = \sum_{(u, v) \in E} \binom{m - d_u - d_v + 1}{2}\]  The implication in \cref{claim:four-two} follows by the above and \cref{lem:qE3-exp}.

\AH{Justify the last sentence.}
	\end{proof}

\qed

\begin{proof}[Proof of \cref{lem:gen-p}]

%\AR{Also you can modify the text of \textsc{Proof} by using the following latex command \texttt{\\begin\{proof\}[Proof of Lemma 2]} and Latex will typeset this as \textsc{Proof of Lemma 2}, which is what you really want.}

\cref{claim:four-two} says that if we know $\rpoly_3(\prob,\ldots, \prob)$, then we can know in O(m) additional time
\[\numocc{G}{\tri} + \numocc{G}{\threepath} \cdot \prob - \numocc{G}{\threedis}\cdot(\prob^2 - \prob^3).\]  We can think of each term in the above equation as a variable, where one can solve a linear system given 3 distinct $\prob$ values, assuming independence of the three linear equations.  In the worst case, without independence, 4 distince values of $\prob$ would suffice...because Atri said so, and I need to ask him for understanding why this is the case, of which I suspect that it has to do with basic result(s) in linear algebra.\AR{Follows from the fact that the corresponding coefficient matrix is the so called Vandermonde matrix, which has full rank.}
\AH{This Vandermonde matrix I need to research.}
\end{proof}

\qed

\AH{Below is only a transcription of the notes.  The claims need to be verified and further worked out.}

\begin{proof}[Proof of \cref{lem:const-p}]

The argument for \cref{lem:gen-p} cannot be applied to \cref{lem:const-p} since we have that $\prob$ is fixed.  We have hope in the following:  we assume that we can solve this problem for all graphs, and the hope would be be to solve the problem for say $G_1, G_2, G_3$, where $G_1$ is arbitrary, and relate the values of $\numocc{G}{H}$, where $H$ is a placeholder for the relevant edge combination.  The hope is that these relations would result in three independent linear equations, and then we would be done.

The following is an option.
\begin{enumerate}
	\item Let $G_1$ be an arbitrary graph
	\item Build $G_2$ from $G_1$, where each edge in $G_1$ gets replaced by a 2 path.
\end{enumerate}

Then $\numocc{G_2}{\tri} = 0$, and if we can prove that
\begin{itemize}
	\item $\numocc{G_2}{\threepath} = 2 \cdot \numocc{G_1}{\twopath}$
	\item $\numocc{G_2}{\threedis} = 8 \cdot \numocc{G_1}{\threedis} + 6 \cdot \numocc{G_1}{\twopathdis} + 4 \cdot \numocc{G_1}{\oneint} + 4 \cdot \numocc{G_1}{\threepath} + 2 \cdot \numocc{G_1}{\tri}$
\end{itemize}
we solve our problem for $q_E^3$ based on $G_2$ and we can compute $\numocc{G}{\threedis}$, a hard problem.
\end{proof}

\AH{Proving the above linear combination for 3-matchings in $G_2$ always holds for an arbitrary $G_1$.}

Consider graph $G_2$, constructed from an arbitrary graph $G_1$.  We wish to show that the number of 3-matchings in $G_2$ will always be the linear combination above, regardless of the construction of $G_1$.

\AR{I did not make a pass on the above since it looks incomplete and does not seem to have changed for a while. Also it would be good to define $G_2$ and $G_3$ outside of the proofs below.}

\AR{You should have a subsection on the mapping $f$ that talks about the map independent of matchings. See comments below on how to state such observations.}

\subsection{Three Matchings}
\subsubsection{$G_2$}

\begin{proof}
Denote $3_{match}$ as the set of 3-matchings in $G_2$. \AR{I do not like the notation $3_{match}$: perhaps $\mathcal{M}_3$ would be better?} Denote $SG$ as the set of subgraphs imposed on $G_1$. \AR{The set of all edge-subgraphs of $G_1$ already has an existing notation-- $2^{E_1}$, i.e. the power set of the set of edges of $G_1$.}  Notate each edge in $G_2$ as $(e, b)$ such that $b \in \{0, 1\}$, where $b$ identifies either the first or second edge of the two path that replaced the original $G_1$ edge.  

\AR{I would recommend that you make the following changes. First define $f$ as a function $f:\binom{E_2}{3}\to \binom{E_1}{\le 3}$. As an aside, for any set $S$, the notation $\binom{S}{t}$ and $\binom{S}{\le t}$ denote the set of all subsets of $S$ of size {\em exactly} $t$ and {\em at most} $t$ respectively. Then for any $S\in\binom{E_2}{3}$, define $f(S)$ generically as below. Then argue that $f$ is properly defined: i.e. it is a function and $|f(S)|\le 3$. Also you'll need the definition for $G_3$ as well. One thought: define $G_k$ for $k>1$ generically. And then define $f_k$ to do the mapping from $G_k$ to $G_1$. Again, as mentioned above these definitions of $f$ and its properties should be pulled out up front.}
Let $f: 3_{match} \mapsto SG$ be a function that maps a distinct 3-matching in $G_2$ to its generating subgraph.  An arbitrary $M \in 3_{match}$ in $G_2$ is then denoted by $(e_1, b_1), (e_2, b_2), (e_3, b_3)$, and $f(M) = \{e_1, e_2, e_3\}$.  Note that $f(M)$ is a set, i.e., the distinct edges of the generating subgraph in $G_1$.

\underline{f is a function}:
\AR{I think I had mentioned this before. In either case let me state it in a way so that you do not forget :-) {\LARGE \bf DO NOT USE UNDERLINING FOR EMPHASIS IN LATEX.} {\Huge \bf NEVER.} Underlining is a vestige of times when papers would be typeset using a typewriter. There are {\bf much} better ways to do it in Latex. In the current case since you'll be bringing this part out of the proof you can make the claim below a lemma.}

First, let us argue that $f$ is indeed a function.  To do this, first note that for $G_2$ to contain a 3-matching, it must be that the generating subgraph in $G_1$ has at least 3 distinct edges.  This is because, with only 2 distinct $G_1$ edges, there are only two subgraph patterns, i.e., 2 disjoint edges and two-path, both of which when transformed into $G_2$ will not have enough disjoint edges to create a 3-matching.

Consider then a minimal $G_1$, i.e. a graph with at most 3 edges.  Choose an arbitrary 3-matching $(e_1, b_1), (e_2, b_2), (e_3, b_3)$ in the generated $G_2$ graph.  Choose an arbitrary edge $(e_i, b_i)$ and remove its corresponding generating edge in $G_1$.  Notice that this 3-matching now disappears from $G_2$.  Now replace the removed edge in $G_1$ with a new edge, placing it anywhere other than its original position.  Note that, no matter where the replacement edge lies, it cannot be that it generates the same 3-matching that the original edge helped uniquely generate, since a one-to-one correspondence exists between $(e_i, b_i)$ and $e_i$.\AR{this argument is overtly complicated. The main part of the argument is basically the last line. The argument should go like the following. For any choice of $b\in\{0,1\}$, the map $(e,b)\mapsto e$ is a one-to-one map and this implies that $f$ is indeed a function. Of course you should do this argument for the general $f_k$.}  Thus, since any $M \in 3_{match}$ cannot be generated from more than one subgraph in $G_1$, $f$ must be a function.

\underline{For any $M \in 3_{match}, | f(M) |\leq 3$}

Since any arbitrary 3-matching $(e_1, b_1), (e_2, b_2), (e_3, b_3)$ has an image of $\{e_1, e_2, e_3\}$, which is a set, it has to be the case that $|f(M)|$ can be no more than $3$.

\underline{For any $G' \in SG: |G'| \leq 3$, there are a fixed number of 3-matchings}
\underline{ $M'$ s.t. $f(M') = G'$}

First, note that there are a fixed number of \textit{distinct} 3-edge subgraphs that can appear on a given $G_1$.  Second, these subgraphs themselves are fixed, with the number of edges, vertices, and intersections always being the same.  Third, since each possible subgraph from a finite set of subgraphs is fixed, it must then be the case that each possible subgraph will always generate the same number of 3-matchings in $G_2$.
\AR{I do not see the point of stating the claim above as well as the para above. The claim follows as a consequence of the argument below so it does not need to ``re-stated" above.}

Specifically, we have the following possible subgraphs that can be composed of three edges.
\begin{itemize}
	\item Triangle (Tri)
	\item 3-path (3p)
	\item 3-star--this is the graph that results when all three edges share exactly one common endpoint.  The remaining endpoint for each edge is disconnected from any endpoint of the three edges.
	\item Disjoint Two-Path (DT)--this subgraph consists of a two path and a remaining disjoint edge.
	\item 3-matching (3m)
\end{itemize}
The fact that there is a \textit{fixed} number of possible subgraphs that can be composed over three edges follows from a simple iteration over the 3 edges, where for each iteration, the edge in question can be connected or disconnected to its neighboring edges' endpoints, with the constraint that all edges remain unique, i.e., doubling of edges is disallowed.  When all possiblities are exhausted, the above set of patterns remains.  One can view this iteration similar to the iteration of a summation or set of nested for loops.

\AH{I think that all of the above can be generalized to 3-paths as well.}
Earlier, we claimed the following.

\[\numocc{G_2}{\threedis} = 8 \cdot \numocc{G_1}{\threedis} + 6 \cdot \numocc{G_1}{\twopathdis} + 4 \cdot \numocc{G_1}{\oneint} + 4 \cdot \numocc{G_1}{\threepath} + 2 \cdot \numocc{G_1}{\tri}\]

Beginning with the leftmost of RHS terms and proceeding to the consecutive rightmost terms, let us show this to be the case.

Consider the $\threedis$ pattern.  For $G_2$, this gives us three disjoint two paths.  Note, that for each edge in a given two path, there are four possible edge combinations that will yield a 3-matching in $G_2$.  This gives us a total of $4 * 2$ possible 3-matchings for the $\threedis$ subgraph in $G_1$.  Alternatively, one can see that we have a total of two possible choices for each disjoint two path, which yields $2^3$ possible 3-matchings in $G_2$.

For the $\twopathdis$, i.e. DT pattern, when we convert to $G_2$, we have a four path and disjoint two path.  Note that a four path does not have enough edges to produce a 3-matching, as there must be at least 5 edges to produce a 3-matching.  However, a four path allows there to be 3 possible 2 matchings, specifically, $\pbrace{(e_1, 0), (e_2, 0)}, \pbrace{(e_1, 0), (e_2, 1)}, \pbrace{(e_1, 1), (e_2, 1)}$.  These three possibilities can combine with either of the two edges in the disjoint two path to produce a 3 matching in $G_2$, yielding $3 * 2 = 6$ total 3-matchings.  Since the disjoint two path cannot produce a 2 matching, we are done with $\twopathdis$. 

For the $\oneint$, i.e. 3-star pattern, the resulting $G_2$ graph produces 3 two paths, each of which has one and only one endpoint that intersects with one and only one endpoint of the other two paths.  Call the intersecting edges of the two paths inner edges.  Note that for a valid 3 matching it must be the case that at most one inner edge can be part of the set of disjoint edges.  Their are 3 such possibilities.  The remaining possible 3-matching occurs when all 3 outer edges are chosen.  Thus, there are $3 + 1 = 4$ 3-matchings for a 3-star subgraph.

When $G_1$ is $\threepath$, i.e. 3p, it is the case that $G_2$ becomes a six path.  As alluded to earlier, there must be at least 5 edges in a path to produce a 3 matching, since all three edges in a 3 matching must be disjoint.  With this condition over six connected edges, we end up with the following edge combinations, $\pbrace{(e_1, 0), (e_2, 0), (e_3, 0)}, \pbrace{(e_1, 0), (e_2, 0), (e_3, 1)},  \pbrace{(e_1, 0), (e_2, 1), (e_3, 1)}, \pbrace{(e_1, 1), (e_2, 1),  (e_3, 1)}$ to produce four 3-matchings.

For Tri, note that it is the case that the graph $G_2$ is a 'triangle of two paths', where each trianglular edge is a two path.  While this is similar to the discussion of the six path above, one must use caution not to consider the first and last edges as disjoint, since they are connected.  This leaves us with two remaining edge combinations that produce a 3 matching.
\end{proof}

\qed

\subsubsection{$G_3$}
\AH{Linear Equation computing 3-matchings in $G_3$ using all 3-edge subgraphs in $G_1$.}
In a similar way we can count the number of 3-matchings in graph $G_3$, where each edge in a given $G_1$ gets replaced with a disjoint 3-path, disjoint meaining that no other 3-path intersects another 3-path, except at its endpoints as in the original graph.  Because of $G_3$ construction, we now need to also account for two paths in $G_1$.  

The linear combination of 3-edge $G_1$ subgraphs to compute the number of 3-matchings in $G_3$ follows is


\begin{align*}
\numocc{G_3}{\threedis} = &4\pbrace{\numocc{G_1}{\twopath}} + 6\pbrace{\numocc{G_1}{\twodis}} + 18\pbrace{\numocc{G_1}{\tri}} + 21\pbrace{\numocc{G_1}{\threepath}}\\
&+ 24\pbrace{\numocc{G_1}{\twopathdis}} + 20\pbrace{\numocc{G_1}{\oneint}} + 27\pbrace{\numocc{G_1}{\threedis}}.
\end{align*}

\AH{Justification.}
Enumerate through the RHS in a similar fashion.  Beginning with a two path $\twopath$ in $G_1$, it is the case that in $G_3$ this becomes a six-path.  As discussed previously, this yields four three matching subgraphs.  

For subgragh of two disjoint edges, $\twodis$, this becomes two disjoint 3-paths.  It is the case in one 3-path, that we have one subgraph of two disjoint edges, where a third disjoint edge can be picked from any of the three edges in the remaining disjoint 3-path.  The process can be repeated starting with the alternative 3-path, giving $2 * 3 = 6$ unique 3-matchings.

Now for the 3-edge subgraphs, starting with a triangle.  Note that we are considering 3-matchings $M$ $s.t.$ $f(M) = G'$, where $G'$ is a 3-edge subgraph.  In other words, the function $f$ will produce 3 \textit{distinct} edge outputs.  This then disalllows double counting of 3-matchings from a 3-edge subgraph using only two of the edges.  

Note that for the case of $G_3$, edges are denoted as $(e_i, b_i)$ where $b \in \{0, 1, 2\}$.  When a triangle in $G_1$ is transformed into $G_3$, it becomes a 'triangle' where each leg is a three-path.  This is very similar to a 9-path, with the caveat that the first and last edge $(e_1, 0)$ and $(e_3, 2)$ cannot be in the same 3-matching set together.  For this $G_3$ it is  also the case that for all $i \in \{0, 1, 2\}$ $(e_i, 2)$ and $(e_{i + 1}, 0)$ are neighbors and cannot share a 3-matching.  Iterating through all possible combinations producing 3-matchings, i.e. $\pbox{(e_1, 0), (e_2, 0), (e_3, 0)},  \pbox{(e_1, 0), (e_2, 0), (e_3, 1)}, \pbox{(e_1, 0), (e_2, 1), (e_3, 0)},$\newline$\ldots, \pbox{(e_1, 0), (e_2, 2), (e_3, 1)},\ldots, \pbox{(e_1, 2), (e_2, 2), (e_3, 2)}$ gives a total of 18 3-matchings.

Consider next a 3-path in $G_1$, where the resulting subgraph in $G_3$ is a 9-path.  In this case, because the endpoints are disconnected, we have  3 other 3-matchings that couldn't be counted in the case of the Tri subgraph, namely $\pbox{(e_1, 0), (e_2, 0), (e_3, 2)},\pbox{(e_1, 0), (e_2, 1), (e_3, 2)}, \pbox{(e_1, 0), (e_2, 2), (e_3, 2)}$, thus $18$ (from the Tri analysis)$ + 3 = 21$ three-matchings.

 For the $\twopathdis$ subgraph, it is the case that this graph becomes a 6-path with a disjoint 3-path in $G_3$.  Starting with the 6-path, there are 8 distinct two matchings in the form of $\pbox{(e_1, 0), (e_2, 0), (e_3, 0)}, \pbox{(e_1, 0), (e_2, 0), (e_3, 1)}, \pbox{(e_1, 0), (e_2, 0), (e_3, 2)}, \ldots \pbox{(e_1, 2), (e_2, 1), (e_3, 0)},\ldots, \pbox{(e_1, 2), (e_2, 2), (e_3, 2)}$.  Notice again that the edge pattern $\pbox{(e_1, 2), (e_2, 0)\ldots}$ is forbidden.  Each of these 8 2-matchings can be paired with one of the 3 edges in the disjoint 3-path, yielding $8 * 3 = 24$ 3-matchings.%We have to consider 3 possibilities of 3-matchings.  First, the 6-path produces 4.  Second, it is the case that the 6-path produces 10 two-matchings, which can be paired with any one of the three edges in the disjoint 3-path, producing $10 * 3$ 3-matchings.  Third, the disjoint 3-path can produce one 3-matching which can be paired with a third edge from any one of the edges in the 6-path, giving $1 * 6 = 6$ three-matchings, for a total of $4 + 30 + 6 = 40$ three-matchings.

Given the 3-star subgraph, where 3 distinct edges are connected at one common endpoint, occurring in $G_1$.  In $G_3$, this becomes 3 3-paths joined at one common endpoint.  Note the non-intersecting outer and middle edges.  For each 3-path, there are 2 possible choices to create a 3-matching, yielding $2^3 = 8$ 3-matchings.  Finally, consider the joined inner edges, recalling that at most one of them can be in a 3-matching.  Pick an arbitrary edge, and there are four possible combinations that result from the middle and outer edges of the other 2 3-paths.  This gives $3 * 4 = 12$ more 3-matchings, a total of $8 + 12 = 20$.% 3 distinct 9-paths, with each 9-path intersecting the others at one common and shared endpoint.  If we consider the outermost non-intersecting edges along with the middle non-intersecting edges, we have $2 * 2 * 2 = 8$ possible 3-matchings.  Considering the inner, intersecting edges, we have the condition that only one can appear at a time in a 3-matching set.  When we pick an arbitrary inner edge, we have one of two possibilities, we can pick the outer edge of the same 3-path the inner edge is located on, while picking any of the other 4 remaining edges in the middle and outer edges of the other two 3-paths.  This gives $4 * 3$ more unique 3-matchings.  The remaining possibility exists in combining the arbitrary inner edge with any of the 4 combinations of the middle and outer edges of the other 3-paths.  This yields again $3 * 4 = 12$ unique three-matchings, together which make $8 + 12 + 12 = 32$ three-matchings.

Given the $\threedis$ subgraph occurring in $G_1$, the resulting graph consists of three disjoint 3-paths in $G_3$.  Since only one edge can be used at a time from each 3-path, it results that there are $3^3 = 27$ 3-matchings.%There are two considerations.  First, if we pull one edge from each disjoint 3-path, we have three choices from each path, which is $3^3 = 27$ three-matchings.  The second consideration is that we can pull a two matching from any of the given disjoint 3-paths, matching it with a third disjoint edge from any of the other edges in the other 2 three-paths, giving $3 * 6 = 18$ more unique 3-matchings for a total of $27 + 18 = 45$ three-matchings.

\subsection{Three Paths}
Computing the number of 3-paths in $G_2$ and $G_3$ consists of much simpler linear combinations.
\subsubsection{$G_2$}
It can be easily verified that a single edge in $G_1$ becomes a 2-path in $G_2$, and generates $0$ 3-paths.  Similarly, it is the case for 2 disjoint edges.  When we look at output of $f$ that has 3 edges, we see the same result for the case of 3 disjoint edges in $G_1$.  When we have a 2-path and 1 disjoint edge, there is no way to create a 3-path out of all three edges.  The same holds for the remaining subgraphs consisting of 3 edges, the 3-star, triangle, and 3-path.

All the above to say that there exists only one subgraph in $G_1$ that will produce 3-paths, namely the 2-path.  This subgraph becomes a 4-path in $G_2$, and using both edges (as required by definition of $f$), we have two 3-paths generated: $\pbox{(e_1, 0), (e_1, 1), (e_2, 0)}$ and $\pbox{(e_1, 1), (e_2, 0), (e_2, 1)}$.  Thus,

\[\numocc{G_2}{\threepath} = 2 * \numocc{G_1}{\twopath}.\]

\subsubsection{$G_3$}
In a similar fashion, enumerate through the various subgraphs in $G_1$ with $\leq 3$ edges, starting with the smallest.  Note, that one edge in $G_1$ generates one 3-path in $G_3$.  Moving on to a 2-path, again we see that we have 2 2-paths that consist of both $G_1$ generating edges.  For the subgraph of 2 disjoint edges, as in the case of $G_2$, there is no way to make a 3-path out of disjoint edges, and this rolls over into the subgraph consisting of 3 disjoint edges, the subgraph made of a 2 path and disjoint edge, 3-star, triangle, and 3-path.  All of these subgraphs provide no way to create a 3-path from all edges $e_1, e_2, e_3$ in $G_1$.  The combination is then

\[\numocc{G_3}{\threepath} = \numocc{G_1}{\ed} + 2 \times \numocc{G_1}{\twopath}.\]

\subsection{Triangle}
The number of triangles in both $G_2$ and $G_3$ will always be $0$ for the simple fact that when we replace a single edge with $\geq 2$-path, the possibility of a triangle of single edge sides disappears, since the only way a single edged triangle could exist is if it existed in $G_1$ and then was passed to $G_2$ or $G_3$ without replacing each single edge with $\geq 2$-paths.