From 65eb4a212914bf1fbf9864ac0c0882643eb41a77 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 5 Oct 2021 10:43:28 +0900 Subject: [PATCH] [SPARK-36920][SQL] Support ANSI intervals by `ABS()` ### What changes were proposed in this pull request? In the PR, I propose to handle ANSI interval types by the `Abs` expression, and the `abs()` function as a consequence of that: - for positive and zero intervals, `ABS()` returns the same input value, - for minimal supported values (`Int.MinValue` months for year-month interval and `Long.MinValue` microseconds for day-time interval), `ABS()` throws the arithmetic overflow exception. - for other supported negative intervals, `ABS()` negate its input and returns a positive interval. For example: ```sql spark-sql> SELECT ABS(INTERVAL -'10-8' YEAR TO MONTH); 10-8 spark-sql> SELECT ABS(INTERVAL '-10 01:02:03.123456' DAY TO SECOND); 10 01:02:03.123456000 ``` ### Why are the changes needed? To improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No, this PR just extends `ABS()` by supporting new types. ### How was this patch tested? By running new tests: ``` $ build/sbt "test:testOnly *ArithmeticExpressionSuite" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z interval.sql" $ build/sbt "sql/test:testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite" ``` Closes #34169 from MaxGekk/abs-ansi-intervals. Authored-by: Max Gekk Signed-off-by: Hyukjin Kwon --- .../sql/catalyst/expressions/arithmetic.scala | 16 +++++++--- .../ArithmeticExpressionSuite.scala | 31 +++++++++++++++++++ .../resources/sql-tests/inputs/interval.sql | 2 ++ .../sql-tests/results/ansi/interval.sql.out | 18 ++++++++++- .../sql-tests/results/interval.sql.out | 18 ++++++++++- 5 files changed, 79 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 05308a9cd7..9fd1f3536c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -144,14 +144,16 @@ case class UnaryPositive(child: Expression) } /** - * A function that get the absolute value of the numeric value. + * A function that get the absolute value of the numeric or interval value. */ @ExpressionDescription( - usage = "_FUNC_(expr) - Returns the absolute value of the numeric value.", + usage = "_FUNC_(expr) - Returns the absolute value of the numeric or interval value.", examples = """ Examples: > SELECT _FUNC_(-1); 1 + > SELECT _FUNC_(INTERVAL -'1-1' YEAR TO MONTH); + 1-1 """, since = "1.2.0", group = "math_funcs") @@ -160,11 +162,15 @@ case class Abs(child: Expression, failOnError: Boolean = SQLConf.get.ansiEnabled def this(child: Expression) = this(child, SQLConf.get.ansiEnabled) - override def inputTypes: Seq[AbstractDataType] = Seq(NumericType) + override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval) override def dataType: DataType = child.dataType - private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError) + private lazy val numeric = (dataType match { + case _: DayTimeIntervalType => LongExactNumeric + case _: YearMonthIntervalType => IntegerExactNumeric + case _ => TypeUtils.getNumeric(dataType, failOnError) + }).asInstanceOf[Numeric[Any]] override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = dataType match { case _: DecimalType => @@ -187,6 +193,8 @@ case class Abs(child: Expression, failOnError: Boolean = SQLConf.get.ansiEnabled case IntegerType | LongType if failOnError => defineCodeGen(ctx, ev, c => s"$c < 0 ? java.lang.Math.negateExact($c) : $c") + case _: AnsiIntervalType => + defineCodeGen(ctx, ev, c => s"$c < 0 ? java.lang.Math.negateExact($c) : $c") case dt: NumericType => defineCodeGen(ctx, ev, c => s"(${CodeGenerator.javaType(dt)})(java.lang.Math.abs($c))") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala index 8a27fde53b..af1bc72102 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.time.{Duration, Period} +import java.time.temporal.ChronoUnit import org.apache.spark.{SparkArithmeticException, SparkFunSuite} import org.apache.spark.sql.catalyst.InternalRow @@ -668,4 +669,34 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper } } } + + test("SPARK-36920: Support year-month intervals by ABS") { + checkEvaluation(Abs(Literal(Period.ZERO)), Period.ZERO) + checkEvaluation(Abs(Literal(Period.ofMonths(-1))), Period.ofMonths(1)) + checkEvaluation(Abs(Literal(Period.ofYears(-12345))), Period.ofYears(12345)) + checkEvaluation(Abs(Literal.create(null, YearMonthIntervalType())), null) + checkExceptionInExpression[ArithmeticException]( + Abs(Literal(Period.ofMonths(Int.MinValue))), + "overflow") + + DataTypeTestUtils.yearMonthIntervalTypes.foreach { tpe => + checkConsistencyBetweenInterpretedAndCodegen((e: Expression) => Abs(e, false), tpe) + } + } + + test("SPARK-36920: Support day-time intervals by ABS") { + checkEvaluation(Abs(Literal(Duration.ZERO)), Duration.ZERO) + checkEvaluation( + Abs(Literal(Duration.of(-1, ChronoUnit.MICROS))), + Duration.of(1, ChronoUnit.MICROS)) + checkEvaluation(Abs(Literal(Duration.ofDays(-12345))), Duration.ofDays(12345)) + checkEvaluation(Abs(Literal.create(null, DayTimeIntervalType())), null) + checkExceptionInExpression[ArithmeticException]( + Abs(Literal(Duration.of(Long.MinValue, ChronoUnit.MICROS))), + "overflow") + + DataTypeTestUtils.dayTimeIntervalTypes.foreach { tpe => + checkConsistencyBetweenInterpretedAndCodegen((e: Expression) => Abs(e, false), tpe) + } + } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index a3daff659b..032bfca441 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -369,3 +369,5 @@ SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS); SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH); SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE); SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS); +SELECT abs(INTERVAL '-10' YEAR); +SELECT abs(INTERVAL -'1 02:03:04.123' DAY TO SECOND); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 9e7736a901..4ebf31313c 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 269 +-- Number of queries: 271 -- !query @@ -2552,3 +2552,19 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function coalesce should all be the same type, but it's [interval month, interval day]; line 1 pos 7 + + +-- !query +SELECT abs(INTERVAL '-10' YEAR) +-- !query schema +struct +-- !query output +10-0 + + +-- !query +SELECT abs(INTERVAL -'1 02:03:04.123' DAY TO SECOND) +-- !query schema +struct +-- !query output +1 02:03:04.123000000 diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index a8fa101a78..3835c5fe43 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 269 +-- Number of queries: 271 -- !query @@ -2541,3 +2541,19 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function coalesce should all be the same type, but it's [interval month, interval day]; line 1 pos 7 + + +-- !query +SELECT abs(INTERVAL '-10' YEAR) +-- !query schema +struct +-- !query output +10-0 + + +-- !query +SELECT abs(INTERVAL -'1 02:03:04.123' DAY TO SECOND) +-- !query schema +struct +-- !query output +1 02:03:04.123000000