Merge pull request #470 from stephenh/morek
Make CoGroupedRDDs explicitly have the same key type.
This commit is contained in:
commit
3260b6120e
|
@ -363,7 +363,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
|
|||
throw new SparkException("Default partitioner cannot partition array keys.")
|
||||
}
|
||||
val cg = new CoGroupedRDD[K](
|
||||
Seq(self.asInstanceOf[RDD[(_, _)]], other.asInstanceOf[RDD[(_, _)]]),
|
||||
Seq(self.asInstanceOf[RDD[(K, _)]], other.asInstanceOf[RDD[(K, _)]]),
|
||||
partitioner)
|
||||
val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classManifest[K], Manifests.seqSeqManifest)
|
||||
prfs.mapValues {
|
||||
|
@ -382,9 +382,9 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
|
|||
throw new SparkException("Default partitioner cannot partition array keys.")
|
||||
}
|
||||
val cg = new CoGroupedRDD[K](
|
||||
Seq(self.asInstanceOf[RDD[(_, _)]],
|
||||
other1.asInstanceOf[RDD[(_, _)]],
|
||||
other2.asInstanceOf[RDD[(_, _)]]),
|
||||
Seq(self.asInstanceOf[RDD[(K, _)]],
|
||||
other1.asInstanceOf[RDD[(K, _)]],
|
||||
other2.asInstanceOf[RDD[(K, _)]]),
|
||||
partitioner)
|
||||
val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classManifest[K], Manifests.seqSeqManifest)
|
||||
prfs.mapValues {
|
||||
|
|
|
@ -40,8 +40,8 @@ private[spark] class CoGroupAggregator
|
|||
{ (b1, b2) => b1 ++ b2 })
|
||||
with Serializable
|
||||
|
||||
class CoGroupedRDD[K](@transient var rdds: Seq[RDD[(_, _)]], part: Partitioner)
|
||||
extends RDD[(K, Seq[Seq[_]])](rdds.head.context, Nil) with Logging {
|
||||
class CoGroupedRDD[K](@transient var rdds: Seq[RDD[(K, _)]], part: Partitioner)
|
||||
extends RDD[(K, Seq[Seq[_]])](rdds.head.context, Nil) {
|
||||
|
||||
private val aggr = new CoGroupAggregator
|
||||
|
||||
|
|
|
@ -347,7 +347,7 @@ object CheckpointSuite {
|
|||
def cogroup[K, V](first: RDD[(K, V)], second: RDD[(K, V)], part: Partitioner) = {
|
||||
//println("First = " + first + ", second = " + second)
|
||||
new CoGroupedRDD[K](
|
||||
Seq(first.asInstanceOf[RDD[(_, _)]], second.asInstanceOf[RDD[(_, _)]]),
|
||||
Seq(first.asInstanceOf[RDD[(K, _)]], second.asInstanceOf[RDD[(K, _)]]),
|
||||
part
|
||||
).asInstanceOf[RDD[(K, Seq[Seq[V]])]]
|
||||
}
|
||||
|
|
|
@ -457,7 +457,7 @@ extends Serializable {
|
|||
): DStream[(K, (Seq[V], Seq[W]))] = {
|
||||
|
||||
val cgd = new CoGroupedDStream[K](
|
||||
Seq(self.asInstanceOf[DStream[(_, _)]], other.asInstanceOf[DStream[(_, _)]]),
|
||||
Seq(self.asInstanceOf[DStream[(K, _)]], other.asInstanceOf[DStream[(K, _)]]),
|
||||
partitioner
|
||||
)
|
||||
val pdfs = new PairDStreamFunctions[K, Seq[Seq[_]]](cgd)(
|
||||
|
|
|
@ -6,7 +6,7 @@ import spark.streaming.{Time, DStream, Duration}
|
|||
|
||||
private[streaming]
|
||||
class CoGroupedDStream[K : ClassManifest](
|
||||
parents: Seq[DStream[(_, _)]],
|
||||
parents: Seq[DStream[(K, _)]],
|
||||
partitioner: Partitioner
|
||||
) extends DStream[(K, Seq[Seq[_]])](parents.head.ssc) {
|
||||
|
||||
|
|
|
@ -101,7 +101,7 @@ class ReducedWindowedDStream[K: ClassManifest, V: ClassManifest](
|
|||
val allRDDs = new ArrayBuffer[RDD[(K, V)]]() += previousWindowRDD ++= oldRDDs ++= newRDDs
|
||||
|
||||
// Cogroup the reduced RDDs and merge the reduced values
|
||||
val cogroupedRDD = new CoGroupedRDD[K](allRDDs.toSeq.asInstanceOf[Seq[RDD[(_, _)]]], partitioner)
|
||||
val cogroupedRDD = new CoGroupedRDD[K](allRDDs.toSeq.asInstanceOf[Seq[RDD[(K, _)]]], partitioner)
|
||||
//val mergeValuesFunc = mergeValues(oldRDDs.size, newRDDs.size) _
|
||||
|
||||
val numOldValues = oldRDDs.size
|
||||
|
|
Loading…
Reference in a new issue