Allow null keys in Spark's reduce and group by

2012-07-12 18:36:02 -07:00 · 2012-07-12 18:36:02 -07:00 · 452330efb4
parent 800fcbfbca
commit 452330efb4
1 changed files with 9 additions and 5 deletions
--- a/core/src/main/scala/spark/Partitioner.scala
+++ b/core/src/main/scala/spark/Partitioner.scala
@ -8,7 +8,10 @@ abstract class Partitioner extends Serializable {
 class HashPartitioner(partitions: Int) extends Partitioner {
  def numPartitions = partitions

-  def getPartition(key: Any) = {
+  def getPartition(key: Any): Int = {
+    if (key == null) {
+      return 0
+    } else {
      val mod = key.hashCode % partitions
      if (mod < 0) {
        mod + partitions
@ -16,6 +19,7 @@ class HashPartitioner(partitions: Int) extends Partitioner {
        mod // Guard against negative hash codes
      }
    }
+  }
  
  override def equals(other: Any): Boolean = other match {
    case h: HashPartitioner =>