[SPARK-11997] [SQL] NPE when save a DataFrame as parquet and partitioned by long column

Check for partition column null-ability while building the partition spec. Author: Dilip Biswal <dbiswal@us.ibm.com> Closes #10001 from dilipbiswal/spark-11997.
2015-11-26 21:04:40 -08:00 · 2015-11-26 21:04:40 -08:00 · a374e20b54
parent 10e315c28c
commit a374e20b54
2 changed files with 14 additions and 1 deletions
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@ -607,7 +607,7 @@ abstract class HadoopFsRelation private[sql](
        def castPartitionValuesToUserSchema(row: InternalRow) = {
          InternalRow((0 until row.numFields).map { i =>
            Cast(
-              Literal.create(row.getString(i), StringType),
+              Literal.create(row.getUTF8String(i), StringType),
              userProvidedSchema.fields(i).dataType).eval()
          }: _*)
        }
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@ -252,6 +252,19 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
    }
  }

+  test("SPARK-11997 parquet with null partition values") {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+      sqlContext.range(1, 3)
+        .selectExpr("if(id % 2 = 0, null, id) AS n", "id")
+        .write.partitionBy("n").parquet(path)
+
+      checkAnswer(
+        sqlContext.read.parquet(path).filter("n is null"),
+        Row(2, null))
+    }
+  }
+
  // This test case is ignored because of parquet-mr bug PARQUET-370
  ignore("SPARK-10301 requested schema clipping - schemas with disjoint sets of fields") {
    withTempPath { dir =>