From 65113b7e1b32b4bc2cd879ac8be86562a7996120 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Thu, 4 Oct 2012 16:49:30 -0700 Subject: [PATCH] Only group elements ten at a time into SequenceFile records in saveAsObjectFile --- core/src/main/scala/spark/RDD.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala index f0d2b2d783..f32ff475da 100644 --- a/core/src/main/scala/spark/RDD.scala +++ b/core/src/main/scala/spark/RDD.scala @@ -415,7 +415,7 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial } def saveAsObjectFile(path: String) { - this.glom + this.mapPartitions(iter => iter.grouped(10).map(_.toArray)) .map(x => (NullWritable.get(), new BytesWritable(Utils.serialize(x)))) .saveAsSequenceFile(path) } @@ -424,4 +424,4 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial private[spark] def collectPartitions(): Array[Array[T]] = { sc.runJob(this, (iter: Iterator[T]) => iter.toArray) } -} \ No newline at end of file +}