From 80fbc382a60973db21367a922a0fb797e5ab382d Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 13 Nov 2019 13:12:20 +0900 Subject: [PATCH] Revert "[SPARK-29462] The data type of "array()" should be array" This reverts commit 0dcd739534eb2357daeaa576f6b1223aa4ca0a6e. --- docs/sql-migration-guide.md | 2 -- .../catalyst/expressions/complexTypeCreator.scala | 2 +- .../apache/spark/sql/DataFrameFunctionsSuite.scala | 12 ++++-------- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 153e68b58e..2d5afa919e 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -217,8 +217,6 @@ license: | For example `SELECT timestamp 'tomorrow';`. - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. - - - Since Spark 3.0, when `array` function is called without parameters, it returns an empty array with `NullType` data type. In Spark version 2.4 and earlier, the data type of the result is `StringType`. - Since Spark 3.0, the interval literal syntax does not allow multiple from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 8e00e32a71..3f722e8537 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -47,7 +47,7 @@ case class CreateArray(children: Seq[Expression]) extends Expression { override def dataType: ArrayType = { ArrayType( TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(children.map(_.dataType)) - .getOrElse(NullType), + .getOrElse(StringType), containsNull = children.exists(_.nullable)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 1dc7d34b52..06484908f5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -3400,9 +3400,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { ).foreach(assertValuesDoNotChangeAfterCoalesceOrUnion(_)) } - test("SPARK-21281 use string types by default if map have no argument") { + test("SPARK-21281 use string types by default if array and map have no argument") { val ds = spark.range(1) var expectedSchema = new StructType() + .add("x", ArrayType(StringType, containsNull = false), nullable = false) + assert(ds.select(array().as("x")).schema == expectedSchema) + expectedSchema = new StructType() .add("x", MapType(StringType, StringType, valueContainsNull = false), nullable = false) assert(ds.select(map().as("x")).schema == expectedSchema) } @@ -3460,13 +3463,6 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer(df.select("x").filter("exists(i, x -> x % d == 0)"), Seq(Row(1))) } - - test("SPARK-29462: Use null type by default if array have no argument") { - val ds = spark.range(1) - var expectedSchema = new StructType() - .add("x", ArrayType(NullType, containsNull = false), nullable = false) - assert(ds.select(array().as("x")).schema == expectedSchema) - } } object DataFrameFunctionsSuite {