From 00b986384d81ff9d5d57063eda35452101bf3794 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 27 Sep 2021 12:13:47 +0900 Subject: [PATCH] [SPARK-36838][SQL] Improve InSet generated code performance ### What changes were proposed in this pull request? Since Set can't check is NaN value is contained in current set. With codegen, only when value set contains NaN then we have necessary to check if the value is NaN, or we just need t o check is the Set contains the value. ### Why are the changes needed? Improve generated code's performance. Make only check NaN when Set contains NaN. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existed UT Closes #34097 from AngersZhuuuu/SPARK-36838. Authored-by: Angerszhuuuu Signed-off-by: Hyukjin Kwon --- .../sql/catalyst/expressions/predicates.scala | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index fcd533dc5f..78bfdccc02 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -612,26 +612,27 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with "" } - val ret = child.dataType match { + val isNaNCode = child.dataType match { case DoubleType => Some((v: Any) => s"java.lang.Double.isNaN($v)") case FloatType => Some((v: Any) => s"java.lang.Float.isNaN($v)") case _ => None } - ret.map { isNaN => + if (hasNaN && isNaNCode.isDefined) { s""" - |if ($setTerm.contains($c)) { - | ${ev.value} = true; - |} else if (${isNaN(c)}) { - | ${ev.value} = $hasNaN; - |} - |$setIsNull - |""".stripMargin - }.getOrElse( + |if ($setTerm.contains($c)) { + | ${ev.value} = true; + |} else if (${isNaNCode.get(c)}) { + | ${ev.value} = true; + |} + |$setIsNull + """.stripMargin + } else { s""" |${ev.value} = $setTerm.contains($c); |$setIsNull - """.stripMargin) + """.stripMargin + } }) }