merging changes from upstream benchmarking branch

2013-10-13 19:54:09 -07:00 · 2013-10-13 19:54:09 -07:00 · 637b67da56
parent 494472a6cc 1a961dd1f2
commit 637b67da56
11 changed files with 293 additions and 386 deletions
--- a/conf/core-site.xml
+++ b/conf/core-site.xml
@ -0,0 +1,43 @@
 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 <!-- Put site-specific property overrides in this file. -->
 <configuration>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>/mnt/ephemeral-hdfs</value>
  </property>
  <property>
    <name>fs.default.name</name>
    <value>hdfs://ec2-50-17-7-68.compute-1.amazonaws.com:9000</value>
  </property>
  <property>
    <name>io.file.buffer.size</name>
    <value>65536</value>
  </property>
  <property>
    <name>dfs.client.read.shortcircuit</name>
    <value>false</value>
  </property>
  <property>
    <name>dfs.client.read.shortcircuit.skip.checksum</name>
    <value>false</value>
  </property>
  <property>
    <name>dfs.domain.socket.path</name>
    <value>/var/run/hadoop-hdfs/dn._PORT</value>
  </property>
  <property>
    <name>dfs.client.file-block-storage-locations.timeout</name>
    <value>3000</value>
  </property>
 </configuration>
--- a/conf/slaves
+++ b/conf/slaves
@ -1,2 +1,10 @@
-# A Spark Worker will be started on each of the machines listed below.
+ec2-23-20-12-62.compute-1.amazonaws.com
-localhost
+ec2-54-205-173-19.compute-1.amazonaws.com
 ec2-54-225-4-124.compute-1.amazonaws.com
 ec2-23-22-209-112.compute-1.amazonaws.com
 ec2-50-16-69-88.compute-1.amazonaws.com
 ec2-54-205-163-126.compute-1.amazonaws.com
 ec2-54-242-235-95.compute-1.amazonaws.com
 ec2-54-211-169-232.compute-1.amazonaws.com
 ec2-54-237-31-30.compute-1.amazonaws.com
 ec2-54-235-15-124.compute-1.amazonaws.com
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@ -1,21 +1,19 @@
 #!/usr/bin/env bash
 # This file contains environment variables required to run Spark. Copy it as
-# spark-env.sh and edit that to configure Spark for your site.
+# spark-env.sh and edit that to configure Spark for your site. At a minimum,
-#
+# the following two variables should be set:
-# The following variables can be set in this file:
+# - SCALA_HOME, to point to your Scala installation, or SCALA_LIBRARY_PATH to
-# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+#   point to the directory for Scala library JARs (if you install Scala as a
 #   Debian or RPM package, these are in a separate path, often /usr/share/java)
 # - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos
 # - SPARK_JAVA_OPTS, to set node-specific JVM options for Spark. Note that
 #   we recommend setting app-wide options in the application's driver program.
 #     Examples of node-specific options : -Dspark.local.dir, GC options
 #     Examples of app-wide options : -Dspark.serializer
 #
-# If using the standalone deploy mode, you can also set variables for it here:
+# If using the standalone deploy mode, you can also set variables for it:
-# - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
+# - SPARK_MASTER_IP, to bind the master to a different IP address
 # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports
 # - SPARK_WORKER_CORES, to set the number of cores to use on this machine
 # - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g)
 # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT
-# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
+# - SPARK_WORKER_INSTANCES, to set the number of worker instances/processes
 #   to be spawned on every slave machine
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@ -156,6 +156,7 @@ object SparkEnv extends Logging {
    val serializer = serializerManager.setDefault(
      System.getProperty("spark.serializer", "org.apache.spark.serializer.JavaSerializer"))
    logInfo("spark.serializer is " + System.getProperty("spark.serializer"))
    val closureSerializer = serializerManager.get(
      System.getProperty("spark.closure.serializer", "org.apache.spark.serializer.JavaSerializer"))
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@ -61,14 +61,10 @@ class KryoSerializer extends org.apache.spark.serializer.Serializer with Logging
    kryo.register(classOf[HttpBroadcast[_]], new KryoJavaSerializer())
    // Allow the user to register their own classes by setting spark.kryo.registrator
-    try {
+    Option(System.getProperty("spark.kryo.registrator")).foreach { regCls =>
-      Option(System.getProperty("spark.kryo.registrator")).foreach { regCls =>
+      logDebug("Running user registrator: " + regCls)
-        logDebug("Running user registrator: " + regCls)
+      val reg = Class.forName(regCls, true, classLoader).newInstance().asInstanceOf[KryoRegistrator]
-        val reg = Class.forName(regCls, true, classLoader).newInstance().asInstanceOf[KryoRegistrator]
+      reg.registerClasses(kryo)
        reg.registerClasses(kryo)
      }
    } catch {
      case _: Exception => println("Failed to register spark.kryo.registrator")
    }
    kryo.setClassLoader(classLoader)
@ -116,7 +112,7 @@ class KryoDeserializationStream(kryo: Kryo, inStream: InputStream) extends Deser
  }
 }
-private[spark] class KryoSerializerInstance(ks: KryoSerializer) extends SerializerInstance {
+private[spark] class KryoSerializerInstance(ks: KryoSerializer) extends SerializerInstance with Logging {
  val kryo = ks.newKryo()
  val output = ks.newKryoOutput()
  val input = ks.newKryoInput()
--- a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@ -25,7 +25,7 @@ import java.util.concurrent.ConcurrentHashMap
 * instance of the serializer object has been created, the get method returns that instead of
 * creating a new one.
 */
-private[spark] class SerializerManager {
+private[spark] class SerializerManager extends org.apache.spark.Logging {
  private val serializers = new ConcurrentHashMap[String, Serializer]
  private var _default: Serializer = _
--- a/graph/src/main/scala/org/apache/spark/graph/Analytics.scala
+++ b/graph/src/main/scala/org/apache/spark/graph/Analytics.scala
@ -6,37 +6,6 @@ import org.apache.spark._
 object Analytics extends Logging {
 //  def main(args: Array[String]) {
 //    //pregelPagerank()
 //  }
  // /**
  //  * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
  //  */
  // // def pagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int) = {
  // //   // Compute the out degree of each vertex
  // //   val pagerankGraph = graph.updateVertices[Int, (Int, Float)](graph.outDegrees,
  // //     (vertex, deg) => (deg.getOrElse(0), 1.0F)
  // //   )
  // //   GraphLab.iterateGA[(Int, Float), ED, Float](pagerankGraph)(
  // //     (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather
  // //     (a: Float, b: Float) => a + b, // merge
  // //     (vertex, a: Option[Float]) => (vertex.data._1, (0.15F + 0.85F * a.getOrElse(0F))), // apply
  // //     numIter).mapVertices{ case Vertex(id, (outDeg, r)) => Vertex(id, r) }
  // // }
  // def pagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int) = {
  //   // Compute the out degree of each vertex
  //   val pagerankGraph = graph.updateVertices[Int, (Int, Double)](graph.outDegrees,
  //     (vertex, deg) => (deg.getOrElse(0), 1.0)
  //   )
  //   GraphLab.iterateGA2[(Int, Double), ED, Double](pagerankGraph)(
  //     (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather
  //     (a: Double, b: Double) => a + b, // merge
  //     0.0, // default
  //     (vertex, a: Double) => (vertex.data._1, (0.15 + 0.85 * a)), // apply
  //     numIter).mapVertices{ case Vertex(id, (outDeg, r)) => Vertex(id, r) }
  // }
  /**
   * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
   */
@ -96,346 +65,239 @@ object Analytics extends Logging {
   * lowest vertex id in the connected component containing
   * that vertex.
   */
-  def connectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED]) = {
+  def connectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int) = {
    val ccGraph = graph.mapVertices { case (vid, _) => vid }
    GraphLab.iterate(ccGraph)(
      (me_id, edge) => edge.otherVertex(me_id).data, // gather
      (a: Vid, b: Vid) => math.min(a, b), // merge
      (id, data, a: Option[Vid]) => math.min(data, a.getOrElse(Long.MaxValue)), // apply
      (me_id, edge) => (edge.vertex(me_id).data < edge.otherVertex(me_id).data), // scatter
      numIter,
      gatherDirection = EdgeDirection.Both, scatterDirection = EdgeDirection.Both
    )
  }
-  //   /**
+  def main(args: Array[String]) = {
-  //    * Compute the shortest path to a set of markers
+    val host = args(0)
-  //    */
+    val taskType = args(1)
-  //   def shortestPath[VD: Manifest](graph: Graph[VD, Float], sources: List[Int], numIter: Int) = {
+    val fname = args(2)
-  //     val sourceSet = sources.toSet
+    val options =  args.drop(3).map { arg =>
-  //     val spGraph = graph.mapVertices {
+      arg.dropWhile(_ == '-').split('=') match {
-  //       case Vertex(vid, _) => Vertex(vid, (if(sourceSet.contains(vid)) 0.0F else Float.MaxValue))
+        case Array(opt, v) => (opt -> v)
-  //     }
+        case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
-  //     GraphLab.iterateGA[Float, Float, Float](spGraph)(
+      }
-  //       (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
+    }
  //       (a: Float, b: Float) => math.min(a, b), // merge
  //       (v, a: Option[Float]) => math.min(v.data, a.getOrElse(Float.MaxValue)), // apply
  //       numIter,
  //       gatherDirection = EdgeDirection.In)
  //   }
-  //   // /**
+    def setLogLevels(level: org.apache.log4j.Level, loggers: TraversableOnce[String]) = {
-  //   //  * Compute the connected component membership of each vertex
+      loggers.map{
-  //   //  * and return an RDD with the vertex value containing the
+        loggerName =>
-  //   //  * lowest vertex id in the connected component containing
+          val logger = org.apache.log4j.Logger.getLogger(loggerName)
-  //   //  * that vertex.
+        val prevLevel = logger.getLevel()
-  //   //  */
+        logger.setLevel(level)
-  //   // def dynamicConnectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
+        loggerName -> prevLevel
-  //   //   numIter: Int = Int.MaxValue) = {
+      }.toMap
    }
 //       setLogLevels(org.apache.log4j.Level.DEBUG, Seq("org.apache.spark"))
-  //   //   val vertices = graph.vertices.mapPartitions(iter => iter.map { case (vid, _) => (vid, vid) })
+     val serializer = "org.apache.spark.serializer.KryoSerializer"
-  //   //   val edges = graph.edges // .mapValues(v => None)
+     System.setProperty("spark.serializer", serializer)
-  //   //   val ccGraph = new Graph(vertices, edges)
+     //System.setProperty("spark.shuffle.compress", "false")
     System.setProperty("spark.kryo.registrator", "org.apache.spark.graph.GraphKryoRegistrator")
-  //   //   ccGraph.iterateDynamic(
+     taskType match {
-  //   //     (me_id, edge) => edge.otherVertex(me_id).data, // gather
+       case "pagerank" => {
-  //   //     (a: Int, b: Int) => math.min(a, b), // merge
+
-  //   //     Integer.MAX_VALUE,
+         var numIter = Int.MaxValue
-  //   //     (v, a: Int) => math.min(v.data, a), // apply
+         var isDynamic = false
-  //   //     (me_id, edge) => edge.otherVertex(me_id).data > edge.vertex(me_id).data, // scatter
+         var tol:Float = 0.001F
-  //   //     numIter,
+         var outFname = ""
-  //   //     gatherEdges = EdgeDirection.Both,
+         var numVPart = 4
-  //   //     scatterEdges = EdgeDirection.Both).vertices
+         var numEPart = 4
-  //   //   //
+
-  //   //   //    graph_ret.vertices.collect.foreach(println)
+         options.foreach{
-  //   //   //    graph_ret.edges.take(10).foreach(println)
+           case ("numIter", v) => numIter = v.toInt
-  //   // }
+           case ("dynamic", v) => isDynamic = v.toBoolean
           case ("tol", v) => tol = v.toFloat
           case ("output", v) => outFname = v
           case ("numVPart", v) => numVPart = v.toInt
           case ("numEPart", v) => numEPart = v.toInt
           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
         }
         if(!isDynamic && numIter == Int.MaxValue) {
           println("Set number of iterations!")
           sys.exit(1)
         }
         println("======================================")
         println("|             PageRank               |")
         println("--------------------------------------")
         println(" Using parameters:")
         println(" \tDynamic:  " + isDynamic)
         if(isDynamic) println(" \t  |-> Tolerance: " + tol)
         println(" \tNumIter:  " + numIter)
         println("======================================")
         val sc = new SparkContext(host, "PageRank(" + fname + ")")
         val graph = GraphLoader.textFile(sc, fname, a => 1.0F, 
          minEdgePartitions = numEPart, minVertexPartitions = numVPart).cache()
         val startTime = System.currentTimeMillis
         logInfo("GRAPHX: starting tasks")
         logInfo("GRAPHX: Number of vertices " + graph.vertices.count)
         logInfo("GRAPHX: Number of edges " + graph.edges.count)
         val pr = Analytics.pagerank(graph, numIter)
         // val pr = if(isDynamic) Analytics.dynamicPagerank(graph, tol, numIter)
         //   else  Analytics.pagerank(graph, numIter)
         logInfo("GRAPHX: Total rank: " + pr.vertices.map{ case (id,r) => r }.reduce(_+_) )
         if (!outFname.isEmpty) {
           println("Saving pageranks of pages to " + outFname)
           pr.vertices.map{case (id, r) => id + "\t" + r}.saveAsTextFile(outFname)
         }
         logInfo("GRAPHX: Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
         sc.stop()
       }
        case "cc" => {
           var numIter = Int.MaxValue
           var numVPart = 4
           var numEPart = 4
           var isDynamic = false
           options.foreach{
             case ("numIter", v) => numIter = v.toInt
             case ("dynamic", v) => isDynamic = v.toBoolean
             case ("numEPart", v) => numEPart = v.toInt
             case ("numVPart", v) => numVPart = v.toInt
             case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
           }
           if(!isDynamic && numIter == Int.MaxValue) {
             println("Set number of iterations!")
             sys.exit(1)
           }
           println("======================================")
           println("|      Connected Components          |")
           println("--------------------------------------")
           println(" Using parameters:")
           println(" \tDynamic:  " + isDynamic)
           println(" \tNumIter:  " + numIter)
           println("======================================")
           val sc = new SparkContext(host, "ConnectedComponents(" + fname + ")")
           //val graph = GraphLoader.textFile(sc, fname, a => 1.0F)
           val graph = GraphLoader.textFile(sc, fname, a => 1.0F, 
            minEdgePartitions = numEPart, minVertexPartitions = numVPart).cache()
           val cc = Analytics.connectedComponents(graph, numIter)
           //val cc = if(isDynamic) Analytics.dynamicConnectedComponents(graph, numIter)
           //         else Analytics.connectedComponents(graph, numIter)
           println("Components: " + cc.vertices.map{ case (vid,data) => data}.distinct())
           sc.stop()
         }
 //
 //        case "shortestpath" => {
 //
 //           var numIter = Int.MaxValue
 //           var isDynamic = true
 //           var sources: List[Int] = List.empty
 //
 //           options.foreach{
 //             case ("numIter", v) => numIter = v.toInt
 //             case ("dynamic", v) => isDynamic = v.toBoolean
 //             case ("source", v) => sources ++= List(v.toInt)
 //             case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
 //           }
 //
 //
 //           if(!isDynamic && numIter == Int.MaxValue) {
 //             println("Set number of iterations!")
 //             sys.exit(1)
 //           }
 //
 //           if(sources.isEmpty) {
 //             println("No sources provided!")
 //             sys.exit(1)
 //           }
 //
 //           println("======================================")
 //           println("|          Shortest Path             |")
 //           println("--------------------------------------")
 //           println(" Using parameters:")
 //           println(" \tDynamic:  " + isDynamic)
 //           println(" \tNumIter:  " + numIter)
 //           println(" \tSources:  [" + sources.mkString(", ") + "]")
 //           println("======================================")
 //
 //           val sc = new SparkContext(host, "ShortestPath(" + fname + ")")
 //           val graph = GraphLoader.textFile(sc, fname, a => (if(a.isEmpty) 1.0F else a(0).toFloat ) )
 //           //val sp = Analytics.shortestPath(graph, sources, numIter)
 //           // val cc = if(isDynamic) Analytics.dynamicShortestPath(graph, sources, numIter)
 //           //   else  Analytics.shortestPath(graph, sources, numIter)
 //           println("Longest Path: " + sp.vertices.map(_.data).reduce(math.max(_,_)))
 //
 //           sc.stop()
 //         }
-  //   // /**
+      //  case "als" => {
  //   //  * Compute the shortest path to a set of markers
  //   //  */
  //   //  def dynamicShortestPath[VD: Manifest, ED: Manifest](graph: Graph[VD, Float],
  //   //   sources: List[Int], numIter: Int) = {
  //   //   val sourceSet = sources.toSet
  //   //   val vertices = graph.vertices.mapPartitions(
  //   //     iter => iter.map {
  //   //       case (vid, _) => (vid, (if(sourceSet.contains(vid)) 0.0F else Float.MaxValue) )
  //   //       });
-  //   //   val edges = graph.edges // .mapValues(v => None)
+      //    var numIter = 5
-  //   //   val spGraph = new Graph(vertices, edges)
+      //    var lambda = 0.01
      //    var latentK = 10
      //    var usersFname = "usersFactors.tsv"
      //    var moviesFname = "moviesFname.tsv"
      //    var numVPart = 4
      //    var numEPart = 4
-  //   //   val niterations = Int.MaxValue
+      //    options.foreach{
-  //   //   spGraph.iterateDynamic(
+      //      case ("numIter", v) => numIter = v.toInt
-  //   //     (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
+      //      case ("lambda", v) => lambda = v.toDouble
-  //   //     (a: Float, b: Float) => math.min(a, b), // merge
+      //      case ("latentK", v) => latentK = v.toInt
-  //   //     Float.MaxValue,
+      //      case ("usersFname", v) => usersFname = v
-  //   //     (v, a: Float) => math.min(v.data, a), // apply
+      //      case ("moviesFname", v) => moviesFname = v
-  //   //     (me_id, edge) => edge.vertex(me_id).data + edge.data < edge.otherVertex(me_id).data, // scatter
+      //      case ("numVPart", v) => numVPart = v.toInt
-  //   //     numIter,
+      //      case ("numEPart", v) => numEPart = v.toInt
-  //   //     gatherEdges = EdgeDirection.In,
+      //      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-  //   //     scatterEdges = EdgeDirection.Out).vertices
+      //    }
-  //   // }
+
      //    println("======================================")
      //    println("|       Alternating Least Squares    |")
      //    println("--------------------------------------")
      //    println(" Using parameters:")
      //    println(" \tNumIter:     " + numIter)
      //    println(" \tLambda:      " + lambda)
      //    println(" \tLatentK:     " + latentK)
      //    println(" \tusersFname:  " + usersFname)
      //    println(" \tmoviesFname: " + moviesFname)
      //    println("======================================")
      //    val sc = new SparkContext(host, "ALS(" + fname + ")")
      //    val graph = GraphLoader.textFile(sc, fname, a => a(0).toDouble )
      //    graph.numVPart = numVPart
      //    graph.numEPart = numEPart
      //    val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
      //    val minMovie = graph.edges.map(_._2).reduce(math.min(_,_))
      //    assert(maxUser < minMovie)
      //    val factors = Analytics.alternatingLeastSquares(graph, latentK, lambda, numIter).cache
      //    factors.filter(_._1 <= maxUser).map(r => r._1 + "\t" + r._2.mkString("\t"))
      //      .saveAsTextFile(usersFname)
      //    factors.filter(_._1 >= minMovie).map(r => r._1 + "\t" + r._2.mkString("\t"))
      //      .saveAsTextFile(moviesFname)
      //    sc.stop()
      //  }
-  //   // /**
+       case _ => {
-  //   //  *
+         println("Invalid task type.")
-  //   //  */
+       }
-  //   // def alternatingLeastSquares[VD: ClassManifest, ED: ClassManifest](graph: Graph[VD, Double],
+     }
-  //   //   latentK: Int, lambda: Double, numIter: Int) = {
+   }
  //   //   val vertices = graph.vertices.mapPartitions( _.map {
  //   //       case (vid, _) => (vid,  Array.fill(latentK){ scala.util.Random.nextDouble() } )
  //   //       }).cache
  //   //   val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
  //   //   val edges = graph.edges // .mapValues(v => None)
  //   //   val alsGraph = new Graph(vertices, edges)
  //   //   alsGraph.numVPart = graph.numVPart
  //   //   alsGraph.numEPart = graph.numEPart
  //   //   val niterations = Int.MaxValue
  //   //   alsGraph.iterateDynamic[(Array[Double], Array[Double])](
  //   //     (me_id, edge) => { // gather
  //   //       val X = edge.otherVertex(me_id).data
  //   //       val y = edge.data
  //   //       val Xy = X.map(_ * y)
  //   //       val XtX = (for(i <- 0 until latentK; j <- i until latentK) yield(X(i) * X(j))).toArray
  //   //       (Xy, XtX)
  //   //     },
  //   //     (a, b) => {
  //   //     // The difference between the while loop and the zip is a FACTOR OF TWO in overall
  //   //     //  runtime
  //   //       var i = 0
  //   //       while(i < a._1.length) { a._1(i) += b._1(i); i += 1 }
  //   //       i = 0
  //   //       while(i < a._2.length) { a._2(i) += b._2(i); i += 1 }
  //   //       a
  //   //       // (a._1.zip(b._1).map{ case (q,r) => q+r }, a._2.zip(b._2).map{ case (q,r) => q+r })
  //   //     },
  //   //     (Array.empty[Double], Array.empty[Double]), // default value is empty
  //   //     (vertex, accum) => { // apply
  //   //       val XyArray  = accum._1
  //   //       val XtXArray = accum._2
  //   //       if(XyArray.isEmpty) vertex.data // no neighbors
  //   //       else {
  //   //         val XtX = DenseMatrix.tabulate(latentK,latentK){ (i,j) =>
  //   //           (if(i < j) XtXArray(i + (j+1)*j/2) else XtXArray(i + (j+1)*j/2)) +
  //   //           (if(i == j) lambda else 1.0F) //regularization
  //   //         }
  //   //         val Xy = DenseMatrix.create(latentK,1,XyArray)
  //   //         val w = XtX \ Xy
  //   //         w.data
  //   //       }
  //   //     },
  //   //     (me_id, edge) => true,
  //   //     numIter,
  //   //     gatherEdges = EdgeDirection.Both,
  //   //     scatterEdges = EdgeDirection.Both,
  //   //     vertex => vertex.id < maxUser).vertices
  //   // }
  //   def main(args: Array[String]) = {
  //     val host = args(0)
  //     val taskType = args(1)
  //     val fname = args(2)
  //     val options =  args.drop(3).map { arg =>
  //       arg.dropWhile(_ == '-').split('=') match {
  //         case Array(opt, v) => (opt -> v)
  //         case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
  //       }
  //     }
  //     System.setProperty("spark.serializer", "spark.KryoSerializer")
  //     //System.setProperty("spark.shuffle.compress", "false")
  //     System.setProperty("spark.kryo.registrator", "spark.graph.GraphKryoRegistrator")
  //     taskType match {
  //       case "pagerank" => {
  //         var numIter = Int.MaxValue
  //         var isDynamic = false
  //         var tol:Float = 0.001F
  //         var outFname = ""
  //         var numVPart = 4
  //         var numEPart = 4
  //         options.foreach{
  //           case ("numIter", v) => numIter = v.toInt
  //           case ("dynamic", v) => isDynamic = v.toBoolean
  //           case ("tol", v) => tol = v.toFloat
  //           case ("output", v) => outFname = v
  //           case ("numVPart", v) => numVPart = v.toInt
  //           case ("numEPart", v) => numEPart = v.toInt
  //           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
  //         }
  //         if(!isDynamic && numIter == Int.MaxValue) {
  //           println("Set number of iterations!")
  //           sys.exit(1)
  //         }
  //         println("======================================")
  //         println("|             PageRank               |")
  //         println("--------------------------------------")
  //         println(" Using parameters:")
  //         println(" \tDynamic:  " + isDynamic)
  //         if(isDynamic) println(" \t  |-> Tolerance: " + tol)
  //         println(" \tNumIter:  " + numIter)
  //         println("======================================")
  //         val sc = new SparkContext(host, "PageRank(" + fname + ")")
  //         val graph = Graph.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache()
  //         val startTime = System.currentTimeMillis
  //         logInfo("GRAPHX: starting tasks")
  //         logInfo("GRAPHX: Number of vertices " + graph.vertices.count)
  //         logInfo("GRAPHX: Number of edges " + graph.edges.count)
  //         val pr = Analytics.pagerank(graph, numIter)
  //         // val pr = if(isDynamic) Analytics.dynamicPagerank(graph, tol, numIter)
  //         //   else  Analytics.pagerank(graph, numIter)
  //         logInfo("GRAPHX: Total rank: " + pr.vertices.map{ case Vertex(id,r) => r }.reduce(_+_) )
  //         if (!outFname.isEmpty) {
  //           println("Saving pageranks of pages to " + outFname)
  //           pr.vertices.map{case Vertex(id, r) => id + "\t" + r}.saveAsTextFile(outFname)
  //         }
  //         logInfo("GRAPHX: Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
  //         sc.stop()
  //       }
  //      case "cc" => {
  //         var numIter = Int.MaxValue
  //         var isDynamic = false
  //         options.foreach{
  //           case ("numIter", v) => numIter = v.toInt
  //           case ("dynamic", v) => isDynamic = v.toBoolean
  //           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
  //         }
  //         if(!isDynamic && numIter == Int.MaxValue) {
  //           println("Set number of iterations!")
  //           sys.exit(1)
  //         }
  //         println("======================================")
  //         println("|      Connected Components          |")
  //         println("--------------------------------------")
  //         println(" Using parameters:")
  //         println(" \tDynamic:  " + isDynamic)
  //         println(" \tNumIter:  " + numIter)
  //         println("======================================")
  //         val sc = new SparkContext(host, "ConnectedComponents(" + fname + ")")
  //         val graph = Graph.textFile(sc, fname, a => 1.0F)
  //         val cc = Analytics.connectedComponents(graph, numIter)
  //         // val cc = if(isDynamic) Analytics.dynamicConnectedComponents(graph, numIter)
  //         //   else  Analytics.connectedComponents(graph, numIter)
  //         println("Components: " + cc.vertices.map(_.data).distinct())
  //         sc.stop()
  //       }
  //      case "shortestpath" => {
  //         var numIter = Int.MaxValue
  //         var isDynamic = true
  //         var sources: List[Int] = List.empty
  //         options.foreach{
  //           case ("numIter", v) => numIter = v.toInt
  //           case ("dynamic", v) => isDynamic = v.toBoolean
  //           case ("source", v) => sources ++= List(v.toInt)
  //           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
  //         }
  //         if(!isDynamic && numIter == Int.MaxValue) {
  //           println("Set number of iterations!")
  //           sys.exit(1)
  //         }
  //         if(sources.isEmpty) {
  //           println("No sources provided!")
  //           sys.exit(1)
  //         }
  //         println("======================================")
  //         println("|          Shortest Path             |")
  //         println("--------------------------------------")
  //         println(" Using parameters:")
  //         println(" \tDynamic:  " + isDynamic)
  //         println(" \tNumIter:  " + numIter)
  //         println(" \tSources:  [" + sources.mkString(", ") + "]")
  //         println("======================================")
  //         val sc = new SparkContext(host, "ShortestPath(" + fname + ")")
  //         val graph = Graph.textFile(sc, fname, a => (if(a.isEmpty) 1.0F else a(0).toFloat ) )
  //         val sp = Analytics.shortestPath(graph, sources, numIter)
  //         // val cc = if(isDynamic) Analytics.dynamicShortestPath(graph, sources, numIter)
  //         //   else  Analytics.shortestPath(graph, sources, numIter)
  //         println("Longest Path: " + sp.vertices.map(_.data).reduce(math.max(_,_)))
  //         sc.stop()
  //       }
  //      //  case "als" => {
  //      //    var numIter = 5
  //      //    var lambda = 0.01
  //      //    var latentK = 10
  //      //    var usersFname = "usersFactors.tsv"
  //      //    var moviesFname = "moviesFname.tsv"
  //      //    var numVPart = 4
  //      //    var numEPart = 4
  //      //    options.foreach{
  //      //      case ("numIter", v) => numIter = v.toInt
  //      //      case ("lambda", v) => lambda = v.toDouble
  //      //      case ("latentK", v) => latentK = v.toInt
  //      //      case ("usersFname", v) => usersFname = v
  //      //      case ("moviesFname", v) => moviesFname = v
  //      //      case ("numVPart", v) => numVPart = v.toInt
  //      //      case ("numEPart", v) => numEPart = v.toInt
  //      //      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
  //      //    }
  //      //    println("======================================")
  //      //    println("|       Alternating Least Squares    |")
  //      //    println("--------------------------------------")
  //      //    println(" Using parameters:")
  //      //    println(" \tNumIter:     " + numIter)
  //      //    println(" \tLambda:      " + lambda)
  //      //    println(" \tLatentK:     " + latentK)
  //      //    println(" \tusersFname:  " + usersFname)
  //      //    println(" \tmoviesFname: " + moviesFname)
  //      //    println("======================================")
  //      //    val sc = new SparkContext(host, "ALS(" + fname + ")")
  //      //    val graph = Graph.textFile(sc, fname, a => a(0).toDouble )
  //      //    graph.numVPart = numVPart
  //      //    graph.numEPart = numEPart
  //      //    val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
  //      //    val minMovie = graph.edges.map(_._2).reduce(math.min(_,_))
  //      //    assert(maxUser < minMovie)
  //      //    val factors = Analytics.alternatingLeastSquares(graph, latentK, lambda, numIter).cache
  //      //    factors.filter(_._1 <= maxUser).map(r => r._1 + "\t" + r._2.mkString("\t"))
  //      //      .saveAsTextFile(usersFname)
  //      //    factors.filter(_._1 >= minMovie).map(r => r._1 + "\t" + r._2.mkString("\t"))
  //      //      .saveAsTextFile(moviesFname)
  //      //    sc.stop()
  //      //  }
  //       case _ => {
  //         println("Invalid task type.")
  //       }
  //     }
  //   }
  // /**
  //  * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
@ -649,7 +511,7 @@ object Analytics extends Logging {
  //       val sc = new SparkContext(host, "PageRank(" + fname + ")")
-  //       val graph = Graph.textFile(sc, fname, a => 1.0).withPartitioner(numVPart, numEPart).cache()
+  //       val graph = GraphLoader.textFile(sc, fname, a => 1.0).withPartitioner(numVPart, numEPart).cache()
  //       val startTime = System.currentTimeMillis
  //       logInfo("GRAPHX: starting tasks")
@ -692,7 +554,7 @@ object Analytics extends Logging {
  //       println("======================================")
  //       val sc = new SparkContext(host, "ConnectedComponents(" + fname + ")")
-  //       val graph = Graph.textFile(sc, fname, a => 1.0)
+  //       val graph = GraphLoader.textFile(sc, fname, a => 1.0)
  //       val cc = Analytics.connectedComponents(graph, numIter)
  //       // val cc = if(isDynamic) Analytics.dynamicConnectedComponents(graph, numIter)
  //       //   else  Analytics.connectedComponents(graph, numIter)
@ -735,7 +597,7 @@ object Analytics extends Logging {
  //       println("======================================")
  //       val sc = new SparkContext(host, "ShortestPath(" + fname + ")")
-  //       val graph = Graph.textFile(sc, fname, a => (if(a.isEmpty) 1.0 else a(0).toDouble ) )
+  //       val graph = GraphLoader.textFile(sc, fname, a => (if(a.isEmpty) 1.0 else a(0).toDouble ) )
  //       val sp = Analytics.shortestPath(graph, sources, numIter)
  //       // val cc = if(isDynamic) Analytics.dynamicShortestPath(graph, sources, numIter)
  //       //   else  Analytics.shortestPath(graph, sources, numIter)
@ -778,7 +640,7 @@ object Analytics extends Logging {
  //    println("======================================")
  //    val sc = new SparkContext(host, "ALS(" + fname + ")")
-  //    val graph = Graph.textFile(sc, fname, a => a(0).toDouble )
+  //    val graph = GraphLoader.textFile(sc, fname, a => a(0).toDouble )
  //    graph.numVPart = numVPart
  //    graph.numEPart = numEPart
--- a/graph/src/main/scala/org/apache/spark/graph/GraphKryoRegistrator.scala
+++ b/graph/src/main/scala/org/apache/spark/graph/GraphKryoRegistrator.scala
@ -4,7 +4,7 @@ import com.esotericsoftware.kryo.Kryo
 import org.apache.spark.graph.impl.MessageToPartition
 import org.apache.spark.serializer.KryoRegistrator
-
+import org.apache.spark.graph.impl._
 class GraphKryoRegistrator extends KryoRegistrator {
@ -13,6 +13,8 @@ class GraphKryoRegistrator extends KryoRegistrator {
    kryo.register(classOf[Edge[Object]])
    kryo.register(classOf[MutableTuple2[Object, Object]])
    kryo.register(classOf[MessageToPartition[Object]])
    kryo.register(classOf[(Vid, Object)])
    kryo.register(classOf[EdgePartition[Object]])
    // This avoids a large number of hash table lookups.
    kryo.setReferences(false)
--- a/graph/src/main/scala/org/apache/spark/graph/GraphLoader.scala
+++ b/graph/src/main/scala/org/apache/spark/graph/GraphLoader.scala
@ -20,7 +20,7 @@ object GraphLoader {
    : GraphImpl[Int, ED] = {
    // Parse the edge data table
-    val edges = sc.textFile(path).flatMap { line =>
+    val edges = sc.textFile(path, minEdgePartitions).flatMap { line =>
      if (!line.isEmpty && line(0) != '#') {
        val lineArray = line.split("\\s+")
        if(lineArray.length < 2) {
--- a/graph/src/main/scala/org/apache/spark/graph/impl/GraphImpl.scala
+++ b/graph/src/main/scala/org/apache/spark/graph/impl/GraphImpl.scala
@ -526,7 +526,8 @@ def apply[VD: ClassManifest, ED: ClassManifest](
      .map { e =>
        // Random partitioning based on the source vertex id.
        // val part: Pid = edgePartitionFunction1D(e.src, e.dst, numPartitions)
-        val part: Pid = edgePartitionFunction2D(e.src, e.dst, numPartitions, ceilSqrt)
+        // val part: Pid = edgePartitionFunction2D(e.src, e.dst, numPartitions, ceilSqrt)
        val part: Pid = randomVertexCut(e.src, e.dst, numPartitions)
        //val part: Pid = canonicalEdgePartitionFunction2D(e.src, e.dst, numPartitions, ceilSqrt)
        // Should we be using 3-tuple or an optimized class
@ -642,7 +643,6 @@ def apply[VD: ClassManifest, ED: ClassManifest](
  // protected def createVTable[VD: ClassManifest, ED: ClassManifest](
  //     eTable: IndexedRDD[Pid, EdgePartition[ED]],
  //     vid2pid: Index
--- a/graph/src/main/scala/org/apache/spark/graph/util/GraphGenerators.scala
+++ b/graph/src/main/scala/org/apache/spark/graph/util/GraphGenerators.scala
@ -63,9 +63,6 @@ object GraphGenerators {
  }
  // For now just writes graph to a file. Eventually
  // it will return a spark.graph.Graph
  // Right now it just generates a bunch of edges where
  // the edge data is the weight (default 1)