Only group elements ten at a time into SequenceFile records in

saveAsObjectFile
This commit is contained in:
Matei Zaharia 2012-10-04 16:49:30 -07:00
parent 716e10ca32
commit 65113b7e1b

View file

@ -415,7 +415,7 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
}
def saveAsObjectFile(path: String) {
this.glom
this.mapPartitions(iter => iter.grouped(10).map(_.toArray))
.map(x => (NullWritable.get(), new BytesWritable(Utils.serialize(x))))
.saveAsSequenceFile(path)
}
@ -424,4 +424,4 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
private[spark] def collectPartitions(): Array[Array[T]] = {
sc.runJob(this, (iter: Iterator[T]) => iter.toArray)
}
}
}