Allow null keys in Spark's reduce and group by

This commit is contained in:
Matei Zaharia 2012-07-12 18:36:02 -07:00
parent 800fcbfbca
commit 452330efb4

View file

@ -8,12 +8,16 @@ abstract class Partitioner extends Serializable {
class HashPartitioner(partitions: Int) extends Partitioner {
def numPartitions = partitions
def getPartition(key: Any) = {
val mod = key.hashCode % partitions
if (mod < 0) {
mod + partitions
def getPartition(key: Any): Int = {
if (key == null) {
return 0
} else {
mod // Guard against negative hash codes
val mod = key.hashCode % partitions
if (mod < 0) {
mod + partitions
} else {
mod // Guard against negative hash codes
}
}
}