Addressed code review comments.
This commit is contained in:
parent
639e27a396
commit
37a524d91c
|
@ -241,7 +241,7 @@ object Analytics extends Logging {
|
|||
var outFname = ""
|
||||
var numVPart = 4
|
||||
var numEPart = 4
|
||||
var partitionStrategy: PartitionStrategy = RandomVertexCut
|
||||
var partitionStrategy: PartitionStrategy = RandomVertexCut()
|
||||
|
||||
options.foreach{
|
||||
case ("numIter", v) => numIter = v.toInt
|
||||
|
@ -251,11 +251,11 @@ object Analytics extends Logging {
|
|||
case ("numVPart", v) => numVPart = v.toInt
|
||||
case ("numEPart", v) => numEPart = v.toInt
|
||||
case ("partStrategy", v) => {
|
||||
v match {
|
||||
case "RandomVertexCut" => partitionStrategy = RandomVertexCut
|
||||
case "EdgePartition1D" => partitionStrategy = EdgePartition1D
|
||||
case "EdgePartition2D" => partitionStrategy = EdgePartition2D
|
||||
case "CanonicalRandomVertexCut" => partitionStrategy = CanonicalRandomVertexCut
|
||||
partitionStrategy = v match {
|
||||
case "RandomVertexCut" => RandomVertexCut()
|
||||
case "EdgePartition1D" => EdgePartition1D()
|
||||
case "EdgePartition2D" => EdgePartition2D()
|
||||
case "CanonicalRandomVertexCut" => CanonicalRandomVertexCut()
|
||||
case _ => throw new IllegalArgumentException("Invalid Partition Strategy: " + v)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package org.apache.spark.graph
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.Logging
|
||||
import org.apache.spark.storage.StorageLevel
|
||||
|
||||
/**
|
||||
|
@ -22,7 +21,7 @@ import org.apache.spark.storage.StorageLevel
|
|||
* @tparam VD the vertex attribute type
|
||||
* @tparam ED the edge attribute type
|
||||
*/
|
||||
abstract class Graph[VD: ClassManifest, ED: ClassManifest] extends Logging {
|
||||
abstract class Graph[VD: ClassManifest, ED: ClassManifest] {
|
||||
|
||||
/**
|
||||
* Get the vertices and their data.
|
||||
|
|
|
@ -2,12 +2,11 @@ package org.apache.spark.graph
|
|||
|
||||
import scala.collection.JavaConversions._
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.Logging
|
||||
|
||||
/**
|
||||
* This object implements the GraphLab gather-apply-scatter api.
|
||||
*/
|
||||
object GraphLab extends Logging {
|
||||
object GraphLab {
|
||||
|
||||
/**
|
||||
* Execute the GraphLab Gather-Apply-Scatter API
|
||||
|
|
|
@ -28,7 +28,7 @@ object GraphLoader {
|
|||
edgeParser: Array[String] => ED,
|
||||
minEdgePartitions: Int = 1,
|
||||
minVertexPartitions: Int = 1,
|
||||
partitionStrategy: PartitionStrategy = RandomVertexCut): GraphImpl[Int, ED] = {
|
||||
partitionStrategy: PartitionStrategy = RandomVertexCut()): GraphImpl[Int, ED] = {
|
||||
|
||||
// Parse the edge data table
|
||||
val edges = sc.textFile(path, minEdgePartitions).flatMap { line =>
|
||||
|
|
|
@ -50,7 +50,7 @@ sealed trait PartitionStrategy extends Serializable {
|
|||
*
|
||||
*
|
||||
*/
|
||||
object EdgePartition2D extends PartitionStrategy {
|
||||
case class EdgePartition2D() extends PartitionStrategy {
|
||||
override def getPartition(src: Vid, dst: Vid, numParts: Pid): Pid = {
|
||||
val ceilSqrtNumParts: Pid = math.ceil(math.sqrt(numParts)).toInt
|
||||
val mixingPrime: Vid = 1125899906842597L
|
||||
|
@ -61,7 +61,7 @@ object EdgePartition2D extends PartitionStrategy {
|
|||
}
|
||||
|
||||
|
||||
object EdgePartition1D extends PartitionStrategy {
|
||||
case class EdgePartition1D() extends PartitionStrategy {
|
||||
override def getPartition(src: Vid, dst: Vid, numParts: Pid): Pid = {
|
||||
val mixingPrime: Vid = 1125899906842597L
|
||||
(math.abs(src) * mixingPrime).toInt % numParts
|
||||
|
@ -73,7 +73,7 @@ object EdgePartition1D extends PartitionStrategy {
|
|||
* Assign edges to an aribtrary machine corresponding to a
|
||||
* random vertex cut.
|
||||
*/
|
||||
object RandomVertexCut extends PartitionStrategy {
|
||||
case class RandomVertexCut() extends PartitionStrategy {
|
||||
override def getPartition(src: Vid, dst: Vid, numParts: Pid): Pid = {
|
||||
math.abs((src, dst).hashCode()) % numParts
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ object RandomVertexCut extends PartitionStrategy {
|
|||
* function ensures that edges of opposite direction between the same two vertices
|
||||
* will end up on the same partition.
|
||||
*/
|
||||
object CanonicalRandomVertexCut extends PartitionStrategy {
|
||||
case class CanonicalRandomVertexCut() extends PartitionStrategy {
|
||||
override def getPartition(src: Vid, dst: Vid, numParts: Pid): Pid = {
|
||||
val lower = math.min(src, dst)
|
||||
val higher = math.max(src, dst)
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package org.apache.spark.graph
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.Logging
|
||||
|
||||
|
||||
/**
|
||||
|
@ -42,7 +41,7 @@ import org.apache.spark.Logging
|
|||
* }}}
|
||||
*
|
||||
*/
|
||||
object Pregel extends Logging {
|
||||
object Pregel {
|
||||
|
||||
|
||||
/**
|
||||
|
|
|
@ -8,6 +8,7 @@ import scala.collection.mutable.ArrayBuffer
|
|||
import org.apache.spark.SparkContext._
|
||||
import org.apache.spark.HashPartitioner
|
||||
import org.apache.spark.util.ClosureCleaner
|
||||
import org.apache.spark.SparkException
|
||||
|
||||
import org.apache.spark.Partitioner
|
||||
import org.apache.spark.graph._
|
||||
|
@ -97,8 +98,6 @@ class GraphImpl[VD: ClassManifest, ED: ClassManifest] protected (
|
|||
@transient override val triplets: RDD[EdgeTriplet[VD, ED]] =
|
||||
makeTriplets(localVidMap, vTableReplicatedValues.bothAttrs, eTable)
|
||||
|
||||
//@transient private val partitioner: PartitionStrategy = partitionStrategy
|
||||
|
||||
override def persist(newLevel: StorageLevel): Graph[VD, ED] = {
|
||||
eTable.persist(newLevel)
|
||||
vid2pid.persist(newLevel)
|
||||
|
@ -250,6 +249,8 @@ class GraphImpl[VD: ClassManifest, ED: ClassManifest] protected (
|
|||
|
||||
override def groupEdgeTriplets[ED2: ClassManifest](
|
||||
f: Iterator[EdgeTriplet[VD,ED]] => ED2 ): Graph[VD,ED2] = {
|
||||
partitioner match {
|
||||
case _: CanonicalRandomVertexCut => {
|
||||
val newEdges: RDD[Edge[ED2]] = triplets.mapPartitions { partIter =>
|
||||
partIter
|
||||
// TODO(crankshaw) toList requires that the entire edge partition
|
||||
|
@ -266,15 +267,20 @@ class GraphImpl[VD: ClassManifest, ED: ClassManifest] protected (
|
|||
.map { case ((src, dst), data) => Edge(src, dst, data) }
|
||||
.toIterator
|
||||
}
|
||||
|
||||
//TODO(crankshaw) eliminate the need to call createETable
|
||||
val newETable = createETable(newEdges, partitioner)
|
||||
new GraphImpl(vTable, vid2pid, localVidMap, newETable, partitioner)
|
||||
}
|
||||
|
||||
case _ => throw new SparkException(partitioner.getClass.getName
|
||||
+ " is incompatible with groupEdgeTriplets")
|
||||
}
|
||||
}
|
||||
|
||||
override def groupEdges[ED2: ClassManifest](f: Iterator[Edge[ED]] => ED2 ):
|
||||
Graph[VD,ED2] = {
|
||||
|
||||
partitioner match {
|
||||
case _: CanonicalRandomVertexCut => {
|
||||
val newEdges: RDD[Edge[ED2]] = edges.mapPartitions { partIter =>
|
||||
partIter.toList
|
||||
.groupBy { e: Edge[ED] => (e.srcId, e.dstId) }
|
||||
|
@ -289,6 +295,11 @@ class GraphImpl[VD: ClassManifest, ED: ClassManifest] protected (
|
|||
new GraphImpl(vTable, vid2pid, localVidMap, newETable, partitioner)
|
||||
}
|
||||
|
||||
case _ => throw new SparkException(partitioner.getClass.getName
|
||||
+ " is incompatible with groupEdges")
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Lower level transformation methods
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -315,7 +326,7 @@ object GraphImpl {
|
|||
vertices: RDD[(Vid, VD)],
|
||||
edges: RDD[Edge[ED]],
|
||||
defaultVertexAttr: VD): GraphImpl[VD,ED] = {
|
||||
apply(vertices, edges, defaultVertexAttr, (a:VD, b:VD) => a, RandomVertexCut)
|
||||
apply(vertices, edges, defaultVertexAttr, (a:VD, b:VD) => a, RandomVertexCut())
|
||||
}
|
||||
|
||||
def apply[VD: ClassManifest, ED: ClassManifest](
|
||||
|
@ -331,7 +342,7 @@ object GraphImpl {
|
|||
edges: RDD[Edge[ED]],
|
||||
defaultVertexAttr: VD,
|
||||
mergeFunc: (VD, VD) => VD): GraphImpl[VD,ED] = {
|
||||
apply(vertices, edges, defaultVertexAttr, mergeFunc, RandomVertexCut)
|
||||
apply(vertices, edges, defaultVertexAttr, mergeFunc, RandomVertexCut())
|
||||
}
|
||||
|
||||
def apply[VD: ClassManifest, ED: ClassManifest](
|
||||
|
@ -362,14 +373,6 @@ object GraphImpl {
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// TODO(crankshaw) - can I remove this
|
||||
//protected def createETable[ED: ClassManifest](edges: RDD[Edge[ED]])
|
||||
// : RDD[(Pid, EdgePartition[ED])] = {
|
||||
// createETable(edges, RandomVertexCut)
|
||||
//}
|
||||
|
||||
/**
|
||||
* Create the edge table RDD, which is much more efficient for Java heap storage than the
|
||||
* normal edges data structure (RDD[(Vid, Vid, ED)]).
|
||||
|
|
Loading…
Reference in a new issue