[SPARK-33861][SQL][FOLLOWUP] Simplify conditional in predicate should consider deterministic

### What changes were proposed in this pull request?

This pr address https://github.com/apache/spark/pull/30865#pullrequestreview-562344089 to fix simplify conditional in predicate should consider deterministic.

### Why are the changes needed?

Fix bug.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #31067 from wangyum/SPARK-33861-2.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
Yuming Wang 2021-01-07 14:28:30 +09:00 committed by HyukjinKwon
parent 26b603992c
commit 3aa4e113c5
2 changed files with 10 additions and 7 deletions

View file

@ -39,9 +39,7 @@ import org.apache.spark.sql.types.BooleanType
* - CASE WHEN cond THEN trueVal ELSE null END => AND(cond, trueVal)
* - CASE WHEN cond THEN trueVal ELSE true END => OR(NOT(cond), trueVal)
* - CASE WHEN cond THEN false ELSE elseVal END => AND(NOT(cond), elseVal)
* - CASE WHEN cond THEN false END => false
* - CASE WHEN cond THEN true ELSE elseVal END => OR(cond, elseVal)
* - CASE WHEN cond THEN true END => cond
*/
object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] {
@ -64,12 +62,8 @@ object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] {
And(cond, trueValue)
case CaseWhen(Seq((cond, trueValue)), Some(TrueLiteral)) =>
Or(Not(cond), trueValue)
case CaseWhen(Seq((_, FalseLiteral)), Some(FalseLiteral) | None) =>
FalseLiteral
case CaseWhen(Seq((cond, FalseLiteral)), Some(elseValue)) =>
And(Not(cond), elseValue)
case CaseWhen(Seq((cond, TrueLiteral)), Some(FalseLiteral) | None) =>
cond
case CaseWhen(Seq((cond, TrueLiteral)), Some(elseValue)) =>
Or(cond, elseValue)
case e if e.dataType == BooleanType => e

View file

@ -21,7 +21,7 @@ import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, IsNotNull, Literal, Or}
import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, IsNotNull, Literal, Or, Rand}
import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable}
@ -158,6 +158,15 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest {
testProjection(originalCond, expectedExpr = originalCond)
}
test("CASE WHEN non-deterministic-cond THEN false END") {
val originalCond =
CaseWhen(Seq((UnresolvedAttribute("i") > Rand(0), FalseLiteral)))
val expectedCond = And(UnresolvedAttribute("i") > Rand(0), FalseLiteral)
// nondeterministic expressions are only allowed in Project, Filter, Aggregate or Window,
testFilter(originalCond, expectedCond = FalseLiteral)
testProjection(originalCond, expectedExpr = originalCond)
}
test("CASE WHEN cond THEN true ELSE elseVal END => OR(cond, elseVal)") {
val originalCond = CaseWhen(
Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral)),