From 2356cdd420f600f38d0e786dc50c15f2603b7ff2 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 29 Mar 2021 12:05:00 +0900 Subject: [PATCH] [SPARK-34814][SQL] LikeSimplification should handle NULL ### What changes were proposed in this pull request? LikeSimplification should handle NULL. UT will failed before this pr ``` test("SPARK-34814: LikeSimplification should handle NULL") { withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> ConstantFolding.getClass.getName.stripSuffix("$")) { checkEvaluation(Literal.create("foo", StringType) .likeAll("%foo%", Literal.create(null, StringType)), null) } } [info] - test *** FAILED *** (2 seconds, 443 milliseconds) [info] java.lang.NullPointerException: [info] at org.apache.spark.sql.catalyst.optimizer.LikeSimplification$.$anonfun$simplifyMultiLike$1(expressions.scala:697) [info] at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) [info] at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) [info] at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) [info] at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) [info] at scala.collection.TraversableLike.map(TraversableLike.scala:238) [info] at scala.collection.TraversableLike.map$(TraversableLike.scala:231) [info] at scala.collection.AbstractTraversable.map(Traversable.scala:108) [info] at org.apache.spark.sql.catalyst.optimizer.LikeSimplification$.org$apache$spark$sql$catalyst$optimizer$LikeSimplification$$simplifyMultiLike(expressions.scala:697) [info] at org.apache.spark.sql.catalyst.optimizer.LikeSimplification$$anonfun$apply$9.applyOrElse(expressions.scala:722) [info] at org.apache.spark.sql.catalyst.optimizer.LikeSimplification$$anonfun$apply$9.applyOrElse(expressions.scala:714) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$1(TreeNode.scala:316) [info] at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:72) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:316) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:321) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:406) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:242) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:404) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:357) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:321) [info] at org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$transformExpressionsDown$1(QueryPlan.scala:94) [info] at org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$1(QueryPlan.scala:116) [info] at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:72) ``` ### Why are the changes needed? Fix bug ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added UT Closes #31976 from AngersZhuuuu/SPARK-34814. Authored-by: Angerszhuuuu Signed-off-by: HyukjinKwon --- .../spark/sql/catalyst/optimizer/expressions.scala | 4 +++- .../catalyst/expressions/RegexpExpressionsSuite.scala | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index c7c6b93503..c3d2f336f0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -692,7 +692,9 @@ object LikeSimplification extends Rule[LogicalPlan] { private def simplifyMultiLike( child: Expression, patterns: Seq[UTF8String], multi: MultiLikeBase): Expression = { val (remainPatternMap, replacementMap) = - patterns.map { p => p -> simplifyLike(child, p.toString)}.partition(_._2.isEmpty) + patterns.map { p => + p -> Option(p).flatMap(p => simplifyLike(child, p.toString)) + }.partition(_._2.isEmpty) val remainPatterns = remainPatternMap.map(_._1) val replacements = replacementMap.map(_._2.get) if (replacements.isEmpty) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index 8d7501d952..019857580d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -22,6 +22,8 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection +import org.apache.spark.sql.catalyst.optimizer.ConstantFolding +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StringType /** @@ -470,4 +472,12 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { cache.setAccessible(true) assert(cache.get(expr).asInstanceOf[java.util.regex.Pattern].pattern().contains("a")) } + + test("SPARK-34814: LikeSimplification should handle NULL") { + withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> + ConstantFolding.getClass.getName.stripSuffix("$")) { + checkEvaluation(Literal.create("foo", StringType) + .likeAll("%foo%", Literal.create(null, StringType)), null) + } + } }