Allow null keys in Spark's reduce and group by
This commit is contained in:
parent
800fcbfbca
commit
452330efb4
|
@ -8,7 +8,10 @@ abstract class Partitioner extends Serializable {
|
||||||
class HashPartitioner(partitions: Int) extends Partitioner {
|
class HashPartitioner(partitions: Int) extends Partitioner {
|
||||||
def numPartitions = partitions
|
def numPartitions = partitions
|
||||||
|
|
||||||
def getPartition(key: Any) = {
|
def getPartition(key: Any): Int = {
|
||||||
|
if (key == null) {
|
||||||
|
return 0
|
||||||
|
} else {
|
||||||
val mod = key.hashCode % partitions
|
val mod = key.hashCode % partitions
|
||||||
if (mod < 0) {
|
if (mod < 0) {
|
||||||
mod + partitions
|
mod + partitions
|
||||||
|
@ -16,6 +19,7 @@ class HashPartitioner(partitions: Int) extends Partitioner {
|
||||||
mod // Guard against negative hash codes
|
mod // Guard against negative hash codes
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
override def equals(other: Any): Boolean = other match {
|
override def equals(other: Any): Boolean = other match {
|
||||||
case h: HashPartitioner =>
|
case h: HashPartitioner =>
|
||||||
|
|
Loading…
Reference in a new issue