Allow null keys in Spark's reduce and group by

2012-07-12 18:36:02 -07:00 · 2012-07-12 18:36:02 -07:00 · 452330efb4
parent 800fcbfbca
commit 452330efb4
1 changed files with 9 additions and 5 deletions
--- a/core/src/main/scala/spark/Partitioner.scala
+++ b/core/src/main/scala/spark/Partitioner.scala
@ -8,12 +8,16 @@ abstract class Partitioner extends Serializable {
 class HashPartitioner(partitions: Int) extends Partitioner {
  def numPartitions = partitions

-  def getPartition(key: Any) = {
-    val mod = key.hashCode % partitions
-    if (mod < 0) {
-      mod + partitions
+  def getPartition(key: Any): Int = {
+    if (key == null) {
+      return 0
    } else {
-      mod // Guard against negative hash codes
+      val mod = key.hashCode % partitions
+      if (mod < 0) {
+        mod + partitions
+      } else {
+        mod // Guard against negative hash codes
+      }
    }
  }