Allow null keys in Spark's reduce and group by

This commit is contained in:
Matei Zaharia 2012-07-12 18:36:02 -07:00
parent 800fcbfbca
commit 452330efb4

View file

@ -8,7 +8,10 @@ abstract class Partitioner extends Serializable {
class HashPartitioner(partitions: Int) extends Partitioner {
def numPartitions = partitions
def getPartition(key: Any) = {
def getPartition(key: Any): Int = {
if (key == null) {
return 0
} else {
val mod = key.hashCode % partitions
if (mod < 0) {
mod + partitions
@ -16,6 +19,7 @@ class HashPartitioner(partitions: Int) extends Partitioner {
mod // Guard against negative hash codes
}
}
}
override def equals(other: Any): Boolean = other match {
case h: HashPartitioner =>