Only group elements ten at a time into SequenceFile records in
saveAsObjectFile
This commit is contained in:
parent
716e10ca32
commit
65113b7e1b
|
@ -415,7 +415,7 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
|
||||||
}
|
}
|
||||||
|
|
||||||
def saveAsObjectFile(path: String) {
|
def saveAsObjectFile(path: String) {
|
||||||
this.glom
|
this.mapPartitions(iter => iter.grouped(10).map(_.toArray))
|
||||||
.map(x => (NullWritable.get(), new BytesWritable(Utils.serialize(x))))
|
.map(x => (NullWritable.get(), new BytesWritable(Utils.serialize(x))))
|
||||||
.saveAsSequenceFile(path)
|
.saveAsSequenceFile(path)
|
||||||
}
|
}
|
||||||
|
@ -424,4 +424,4 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
|
||||||
private[spark] def collectPartitions(): Array[Array[T]] = {
|
private[spark] def collectPartitions(): Array[Array[T]] = {
|
||||||
sc.runJob(this, (iter: Iterator[T]) => iter.toArray)
|
sc.runJob(this, (iter: Iterator[T]) => iter.toArray)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue