Remove duplicate method in GraphLoader and improve docs
This commit is contained in:
parent
37611e57f6
commit
9454fa1f6c
|
@ -8,51 +8,20 @@ import org.apache.spark.{Logging, SparkContext}
|
|||
import org.apache.spark.graphx.impl.{EdgePartition, GraphImpl}
|
||||
import org.apache.spark.util.collection.PrimitiveVector
|
||||
|
||||
|
||||
/**
|
||||
* Provides utilities for loading [[Graph]]s from files.
|
||||
*/
|
||||
object GraphLoader extends Logging {
|
||||
|
||||
/**
|
||||
* Load an edge list from file initializing the Graph
|
||||
* Loads a graph from an edge list formatted file where each line contains two integers: a source
|
||||
* id and a target id. Skips lines that begin with `#`.
|
||||
*
|
||||
* @tparam ED the type of the edge data of the resulting Graph
|
||||
* If desired the edges can be automatically oriented in the positive
|
||||
* direction (source Id < target Id) by setting `canonicalOrientation` to
|
||||
* true.
|
||||
*
|
||||
* @param sc the SparkContext used to construct RDDs
|
||||
* @param path the path to the text file containing the edge list
|
||||
* @param edgeParser a function that takes an array of strings and
|
||||
* returns an ED object
|
||||
* @param minEdgePartitions the number of partitions for the
|
||||
* the Edge RDD
|
||||
*
|
||||
*/
|
||||
def textFile[ED: ClassTag](
|
||||
sc: SparkContext,
|
||||
path: String,
|
||||
edgeParser: Array[String] => ED,
|
||||
minEdgePartitions: Int = 1):
|
||||
Graph[Int, ED] = {
|
||||
// Parse the edge data table
|
||||
val edges = sc.textFile(path, minEdgePartitions).mapPartitions( iter =>
|
||||
iter.filter(line => !line.isEmpty && line(0) != '#').map { line =>
|
||||
val lineArray = line.split("\\s+")
|
||||
if(lineArray.length < 2) {
|
||||
println("Invalid line: " + line)
|
||||
assert(false)
|
||||
}
|
||||
val source = lineArray(0).trim.toLong
|
||||
val target = lineArray(1).trim.toLong
|
||||
val tail = lineArray.drop(2)
|
||||
val edata = edgeParser(tail)
|
||||
Edge(source, target, edata)
|
||||
})
|
||||
val defaultVertexAttr = 1
|
||||
Graph.fromEdges(edges, defaultVertexAttr)
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a graph from an edge list formatted file with each line containing
|
||||
* two integers: a source Id and a target Id.
|
||||
*
|
||||
* @example A file in the following format:
|
||||
* @example Loads a file in the following format:
|
||||
* {{{
|
||||
* # Comment Line
|
||||
* # Source Id <\t> Target Id
|
||||
|
@ -62,25 +31,19 @@ object GraphLoader extends Logging {
|
|||
* 1 8
|
||||
* }}}
|
||||
*
|
||||
* If desired the edges can be automatically oriented in the positive
|
||||
* direction (source Id < target Id) by setting `canonicalOrientation` to
|
||||
* true
|
||||
*
|
||||
* @param sc
|
||||
* @param path the path to the file (e.g., /Home/data/file or hdfs://file)
|
||||
* @param path the path to the file (e.g., /home/data/file or hdfs://file)
|
||||
* @param canonicalOrientation whether to orient edges in the positive
|
||||
* direction.
|
||||
* direction
|
||||
* @param minEdgePartitions the number of partitions for the
|
||||
* the Edge RDD
|
||||
* the edge RDD
|
||||
* @tparam ED
|
||||
* @return
|
||||
*/
|
||||
def edgeListFile(
|
||||
sc: SparkContext,
|
||||
path: String,
|
||||
canonicalOrientation: Boolean = false,
|
||||
minEdgePartitions: Int = 1):
|
||||
Graph[Int, Int] = {
|
||||
minEdgePartitions: Int = 1): Graph[Int, Int] = {
|
||||
val startTime = System.currentTimeMillis
|
||||
|
||||
// Parse the edge data table directly into edge partitions
|
||||
|
|
Loading…
Reference in a new issue