From 3dca81e4f5d51c81d6c183ddf762de011e4b9093 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 22 Aug 2020 06:23:46 +0900 Subject: [PATCH] [SPARK-32669][SQL][TEST] Expression unit tests should explore all cases that can lead to null result ### What changes were proposed in this pull request? Add document to `ExpressionEvalHelper`, and ask people to explore all the cases that can lead to null results (including null in struct fields, array elements and map values). This PR also fixes `ComplexTypeSuite.GetArrayStructFields` to explore all the null cases. ### Why are the changes needed? It happened several times that we hit correctness bugs caused by wrong expression nullability. When writing unit tests, we usually don't test the nullability flag directly, and it's too late to add such tests for all expressions. In https://github.com/apache/spark/pull/22375, we extended the expression test framework, which checks the nullability flag when the expected result/field/element is null. This requires the test cases to explore all the cases that can lead to null results ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? I reverted https://github.com/apache/spark/commit/5d296ed39e3dd79ddb10c68657e773adba40a5e0 locally, and `ComplexTypeSuite` can catch the bug. Closes #29493 from cloud-fan/small. Authored-by: Wenchen Fan Signed-off-by: Takeshi Yamamuro --- .../expressions/ComplexTypeSuite.scala | 29 +++++++++++-------- .../expressions/ExpressionEvalHelper.scala | 5 ++++ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index dbe43709d1..cdb83d3580 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -143,21 +143,26 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { } test("GetArrayStructFields") { - val typeAS = ArrayType(StructType(StructField("a", IntegerType, false) :: Nil)) - val typeNullAS = ArrayType(StructType(StructField("a", IntegerType) :: Nil)) - val arrayStruct = Literal.create(Seq(create_row(1)), typeAS) - val nullArrayStruct = Literal.create(null, typeNullAS) + // test 4 types: struct field nullability X array element nullability + val type1 = ArrayType(StructType(StructField("a", IntegerType) :: Nil)) + val type2 = ArrayType(StructType(StructField("a", IntegerType, nullable = false) :: Nil)) + val type3 = ArrayType(StructType(StructField("a", IntegerType) :: Nil), containsNull = false) + val type4 = ArrayType( + StructType(StructField("a", IntegerType, nullable = false) :: Nil), containsNull = false) - def getArrayStructFields(expr: Expression, fieldName: String): GetArrayStructFields = { - expr.dataType match { - case ArrayType(StructType(fields), containsNull) => - val field = fields.find(_.name == fieldName).get - GetArrayStructFields(expr, field, fields.indexOf(field), fields.length, containsNull) - } + val input1 = Literal.create(Seq(create_row(1)), type4) + val input2 = Literal.create(Seq(create_row(null)), type3) + val input3 = Literal.create(Seq(null), type2) + val input4 = Literal.create(null, type1) + + def getArrayStructFields(expr: Expression, fieldName: String): Expression = { + ExtractValue.apply(expr, Literal.create(fieldName, StringType), _ == _) } - checkEvaluation(getArrayStructFields(arrayStruct, "a"), Seq(1)) - checkEvaluation(getArrayStructFields(nullArrayStruct, "a"), null) + checkEvaluation(getArrayStructFields(input1, "a"), Seq(1)) + checkEvaluation(getArrayStructFields(input2, "a"), Seq(null)) + checkEvaluation(getArrayStructFields(input3, "a"), Seq(null)) + checkEvaluation(getArrayStructFields(input4, "a"), null) } test("SPARK-32167: nullability of GetArrayStructFields") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 6f73c1b0c0..341b26ddf6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -40,6 +40,11 @@ import org.apache.spark.util.Utils /** * A few helper functions for expression evaluation testing. Mixin this trait to use them. + * + * Note: when you write unit test for an expression and call `checkEvaluation` to check the result, + * please make sure that you explore all the cases that can lead to null result (including + * null in struct fields, array elements and map values). The framework will test the + * nullability flag of the expression automatically. */ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestBase { self: SparkFunSuite =>