From 3aa4e113c5162f5de12c2aa43b6af65a7f2110af Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 7 Jan 2021 14:28:30 +0900 Subject: [PATCH] [SPARK-33861][SQL][FOLLOWUP] Simplify conditional in predicate should consider deterministic ### What changes were proposed in this pull request? This pr address https://github.com/apache/spark/pull/30865#pullrequestreview-562344089 to fix simplify conditional in predicate should consider deterministic. ### Why are the changes needed? Fix bug. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #31067 from wangyum/SPARK-33861-2. Authored-by: Yuming Wang Signed-off-by: HyukjinKwon --- .../optimizer/SimplifyConditionalsInPredicate.scala | 6 ------ .../SimplifyConditionalsInPredicateSuite.scala | 11 ++++++++++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala index 1ea85085bc..1225f1f318 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala @@ -39,9 +39,7 @@ import org.apache.spark.sql.types.BooleanType * - CASE WHEN cond THEN trueVal ELSE null END => AND(cond, trueVal) * - CASE WHEN cond THEN trueVal ELSE true END => OR(NOT(cond), trueVal) * - CASE WHEN cond THEN false ELSE elseVal END => AND(NOT(cond), elseVal) - * - CASE WHEN cond THEN false END => false * - CASE WHEN cond THEN true ELSE elseVal END => OR(cond, elseVal) - * - CASE WHEN cond THEN true END => cond */ object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { @@ -64,12 +62,8 @@ object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { And(cond, trueValue) case CaseWhen(Seq((cond, trueValue)), Some(TrueLiteral)) => Or(Not(cond), trueValue) - case CaseWhen(Seq((_, FalseLiteral)), Some(FalseLiteral) | None) => - FalseLiteral case CaseWhen(Seq((cond, FalseLiteral)), Some(elseValue)) => And(Not(cond), elseValue) - case CaseWhen(Seq((cond, TrueLiteral)), Some(FalseLiteral) | None) => - cond case CaseWhen(Seq((cond, TrueLiteral)), Some(elseValue)) => Or(cond, elseValue) case e if e.dataType == BooleanType => e diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala index 1f3c24bdbb..04ebb4e63c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, IsNotNull, Literal, Or} +import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, IsNotNull, Literal, Or, Rand} import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable} @@ -158,6 +158,15 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest { testProjection(originalCond, expectedExpr = originalCond) } + test("CASE WHEN non-deterministic-cond THEN false END") { + val originalCond = + CaseWhen(Seq((UnresolvedAttribute("i") > Rand(0), FalseLiteral))) + val expectedCond = And(UnresolvedAttribute("i") > Rand(0), FalseLiteral) + // nondeterministic expressions are only allowed in Project, Filter, Aggregate or Window, + testFilter(originalCond, expectedCond = FalseLiteral) + testProjection(originalCond, expectedExpr = originalCond) + } + test("CASE WHEN cond THEN true ELSE elseVal END => OR(cond, elseVal)") { val originalCond = CaseWhen( Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral)),