Only group elements ten at a time into SequenceFile records in
saveAsObjectFile
This commit is contained in:
parent
716e10ca32
commit
65113b7e1b
|
@ -415,7 +415,7 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
|
|||
}
|
||||
|
||||
def saveAsObjectFile(path: String) {
|
||||
this.glom
|
||||
this.mapPartitions(iter => iter.grouped(10).map(_.toArray))
|
||||
.map(x => (NullWritable.get(), new BytesWritable(Utils.serialize(x))))
|
||||
.saveAsSequenceFile(path)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue