Merging local changes to @rxin graph branch.

This commit is contained in:
Joseph E. Gonzalez 2013-08-06 12:29:21 -07:00
commit 0704d85823
2 changed files with 398 additions and 400 deletions

View file

@ -1,15 +1,14 @@
package spark.graph package spark.graph
import spark._ import spark._
import spark.SparkContext._
// import breeze.linalg._
object Analytics extends Logging { object Analytics extends Logging {
def main(args: Array[String]) { // def main(args: Array[String]) {
//pregelPagerank() // //pregelPagerank()
} // }
// /** // /**
// * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD // * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
@ -41,54 +40,61 @@ object Analytics extends Logging {
/** /**
* Compute the PageRank of a graph returning the pagerank of each vertex as an RDD * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
*/ */
def pregelPagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int) = { def pagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
numIter: Int,
resetProb: Double = 0.15) = {
// Compute the out degree of each vertex // Compute the out degree of each vertex
val pagerankGraph = graph.leftJoinVertices[Int, (Int, Double)](graph.outDegrees, val pagerankGraph = graph.leftJoinVertices[Int, (Int, Double)](graph.outDegrees,
(vertex, deg) => (deg.getOrElse(0), 1.0) (vertex, deg) => (deg.getOrElse(0), 1.0)
) )
Pregel.iterate[(Int, Double), ED, Double](pagerankGraph)( Pregel.iterate[(Int, Double), ED, Double](pagerankGraph)(
(vertex, a: Double) => (vertex.data._1, (0.15 + 0.85 * a)), // apply (vertex, a: Double) => (vertex.data._1, (resetProb + (1.0 - resetProb) * a)), // apply
(me_id, edge) => Some(edge.src.data._2 / edge.src.data._1), // gather (me_id, edge) => Some(edge.src.data._2 / edge.src.data._1), // gather
(a: Double, b: Double) => a + b, // merge (a: Double, b: Double) => a + b, // merge
1.0, 1.0,
numIter).mapVertices{ case Vertex(id, (outDeg, r)) => r } numIter).mapVertices{ case Vertex(id, (outDeg, r)) => r }
} }
// /** /**
// * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
// */ */
// def dynamicPagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], def dynamicPagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
// tol: Float, maxIter: Int = 10) = { tol: Float,
// // Compute the out degree of each vertex maxIter: Int = Integer.MAX_VALUE,
// val pagerankGraph = graph.updateVertices[Int, (Int, Float, Float)](graph.outDegrees, resetProb: Double = 0.15) = {
// (vertex, degIter) => (degIter.sum, 1.0F, 1.0F) // Compute the out degree of each vertex
// ) val pagerankGraph = graph.leftJoinVertices[Int, (Int, Double, Double)](graph.outDegrees,
(vertex, degIter) => (degIter.sum, 1.0, 1.0)
)
// // Run PageRank // Run PageRank
// GraphLab.iterateGAS(pagerankGraph)( GraphLab.iterate(pagerankGraph)(
// (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather
// (a: Float, b: Float) => a + b, (a: Double, b: Double) => a + b,
// (vertex, a: Option[Float]) => (vertex, a: Option[Double]) =>
// (vertex.data._1, (0.15F + 0.85F * a.getOrElse(0F)), vertex.data._2), // apply (vertex.data._1, (resetProb + (1.0 - resetProb) * a.getOrElse(0.0)), vertex.data._2), // apply
// (me_id, edge) => math.abs(edge.src.data._2 - edge.dst.data._1) > tol, // scatter (me_id, edge) => math.abs(edge.src.data._3 - edge.src.data._2) > tol, // scatter
// maxIter).mapVertices { case Vertex(vid, data) => Vertex(vid, data._2) } maxIter).mapVertices { case Vertex(vid, data) => data._2 }
// } }
// /**
// * Compute the connected component membership of each vertex /**
// * and return an RDD with the vertex value containing the * Compute the connected component membership of each vertex
// * lowest vertex id in the connected component containing * and return an RDD with the vertex value containing the
// * that vertex. * lowest vertex id in the connected component containing
// */ * that vertex.
// def connectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int) = { */
// val ccGraph = graph.mapVertices { case Vertex(vid, _) => Vertex(vid, vid) } def connectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED]) = {
// GraphLab.iterateGA[Int, ED, Int](ccGraph)( val ccGraph = graph.mapVertices { case Vertex(vid, _) => vid }
// (me_id, edge) => edge.otherVertex(me_id).data, // gather
// (a: Int, b: Int) => math.min(a, b), // merge GraphLab.iterate[Int, ED, Int](ccGraph)(
// (v, a: Option[Int]) => math.min(v.data, a.getOrElse(Integer.MAX_VALUE)), // apply (me_id, edge) => edge.otherVertex(me_id).data, // gather
// numIter, (a: Int, b: Int) => math.min(a, b), // merge
// gatherDirection = EdgeDirection.Both) (v, a: Option[Int]) => math.min(v.data, a.getOrElse(Integer.MAX_VALUE)), // apply
// } (me_id, edge) => (edge.vertex(me_id).data < edge.otherVertex(me_id).data), // scatter
gatherDirection = EdgeDirection.Both, scatterDirection = EdgeDirection.Both
)
}
// /** // /**
// * Compute the shortest path to a set of markers // * Compute the shortest path to a set of markers
@ -134,8 +140,6 @@ object Analytics extends Logging {
// // } // // }
// // /** // // /**
// // * Compute the shortest path to a set of markers // // * Compute the shortest path to a set of markers
// // */ // // */
@ -163,8 +167,6 @@ object Analytics extends Logging {
// // } // // }
// // /** // // /**
// // * // // *
// // */ // // */
@ -503,8 +505,6 @@ object Analytics extends Logging {
// } // }
// /** // /**
// * Compute the shortest path to a set of markers // * Compute the shortest path to a set of markers
// */ // */
@ -532,8 +532,6 @@ object Analytics extends Logging {
// } // }
// /** // /**
// * // *
// */ // */

View file

@ -33,12 +33,12 @@ object GraphLab {
* @tparam A The type accumulated during the gather phase * @tparam A The type accumulated during the gather phase
* @return the resulting graph after the algorithm converges * @return the resulting graph after the algorithm converges
*/ */
def apply[VD: ClassManifest, ED: ClassManifest, A: ClassManifest](graph: Graph[VD, ED])( def iterate[VD: ClassManifest, ED: ClassManifest, A: ClassManifest](graph: Graph[VD, ED])(
gatherFunc: (Vid, EdgeTriplet[VD, ED]) => A, gatherFunc: (Vid, EdgeTriplet[VD, ED]) => A,
mergeFunc: (A, A) => A, mergeFunc: (A, A) => A,
applyFunc: (Vertex[VD], Option[A]) => VD, applyFunc: (Vertex[VD], Option[A]) => VD,
scatterFunc: (Vid, EdgeTriplet[VD, ED]) => Boolean, scatterFunc: (Vid, EdgeTriplet[VD, ED]) => Boolean,
numIter: Int, numIter: Int = Integer.MAX_VALUE,
gatherDirection: EdgeDirection = EdgeDirection.In, gatherDirection: EdgeDirection = EdgeDirection.In,
scatterDirection: EdgeDirection = EdgeDirection.Out): Graph[VD, ED] = { scatterDirection: EdgeDirection = EdgeDirection.Out): Graph[VD, ED] = {