[SPARK-36920][SQL] Support ANSI intervals by ABS()

### What changes were proposed in this pull request?
In the PR, I propose to handle ANSI interval types by the `Abs` expression, and the `abs()` function as a consequence of that:
- for positive and zero intervals, `ABS()` returns the same input value,
- for minimal supported values (`Int.MinValue` months for year-month interval and `Long.MinValue` microseconds for day-time interval), `ABS()` throws the arithmetic overflow exception.
- for other supported negative intervals, `ABS()` negate its input and returns a positive interval.

For example:
```sql
spark-sql> SELECT ABS(INTERVAL -'10-8' YEAR TO MONTH);
10-8
spark-sql> SELECT ABS(INTERVAL '-10 01:02:03.123456' DAY TO SECOND);
10 01:02:03.123456000
```

### Why are the changes needed?
To improve user experience with Spark SQL.

### Does this PR introduce _any_ user-facing change?
No, this PR just extends `ABS()` by supporting new types.

### How was this patch tested?
By running new tests:
```
$ build/sbt "test:testOnly *ArithmeticExpressionSuite"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z interval.sql"
$ build/sbt "sql/test:testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite"
```

Closes #34169 from MaxGekk/abs-ansi-intervals.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
This commit is contained in:
Max Gekk 2021-10-05 10:43:28 +09:00 committed by Hyukjin Kwon
parent b30e214483
commit 65eb4a2129
5 changed files with 79 additions and 6 deletions

View file

@ -144,14 +144,16 @@ case class UnaryPositive(child: Expression)
}
/**
* A function that get the absolute value of the numeric value.
* A function that get the absolute value of the numeric or interval value.
*/
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the absolute value of the numeric value.",
usage = "_FUNC_(expr) - Returns the absolute value of the numeric or interval value.",
examples = """
Examples:
> SELECT _FUNC_(-1);
1
> SELECT _FUNC_(INTERVAL -'1-1' YEAR TO MONTH);
1-1
""",
since = "1.2.0",
group = "math_funcs")
@ -160,11 +162,15 @@ case class Abs(child: Expression, failOnError: Boolean = SQLConf.get.ansiEnabled
def this(child: Expression) = this(child, SQLConf.get.ansiEnabled)
override def inputTypes: Seq[AbstractDataType] = Seq(NumericType)
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
override def dataType: DataType = child.dataType
private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError)
private lazy val numeric = (dataType match {
case _: DayTimeIntervalType => LongExactNumeric
case _: YearMonthIntervalType => IntegerExactNumeric
case _ => TypeUtils.getNumeric(dataType, failOnError)
}).asInstanceOf[Numeric[Any]]
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = dataType match {
case _: DecimalType =>
@ -187,6 +193,8 @@ case class Abs(child: Expression, failOnError: Boolean = SQLConf.get.ansiEnabled
case IntegerType | LongType if failOnError =>
defineCodeGen(ctx, ev, c => s"$c < 0 ? java.lang.Math.negateExact($c) : $c")
case _: AnsiIntervalType =>
defineCodeGen(ctx, ev, c => s"$c < 0 ? java.lang.Math.negateExact($c) : $c")
case dt: NumericType =>
defineCodeGen(ctx, ev, c => s"(${CodeGenerator.javaType(dt)})(java.lang.Math.abs($c))")

View file

@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
import java.sql.{Date, Timestamp}
import java.time.{Duration, Period}
import java.time.temporal.ChronoUnit
import org.apache.spark.{SparkArithmeticException, SparkFunSuite}
import org.apache.spark.sql.catalyst.InternalRow
@ -668,4 +669,34 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
}
}
}
test("SPARK-36920: Support year-month intervals by ABS") {
checkEvaluation(Abs(Literal(Period.ZERO)), Period.ZERO)
checkEvaluation(Abs(Literal(Period.ofMonths(-1))), Period.ofMonths(1))
checkEvaluation(Abs(Literal(Period.ofYears(-12345))), Period.ofYears(12345))
checkEvaluation(Abs(Literal.create(null, YearMonthIntervalType())), null)
checkExceptionInExpression[ArithmeticException](
Abs(Literal(Period.ofMonths(Int.MinValue))),
"overflow")
DataTypeTestUtils.yearMonthIntervalTypes.foreach { tpe =>
checkConsistencyBetweenInterpretedAndCodegen((e: Expression) => Abs(e, false), tpe)
}
}
test("SPARK-36920: Support day-time intervals by ABS") {
checkEvaluation(Abs(Literal(Duration.ZERO)), Duration.ZERO)
checkEvaluation(
Abs(Literal(Duration.of(-1, ChronoUnit.MICROS))),
Duration.of(1, ChronoUnit.MICROS))
checkEvaluation(Abs(Literal(Duration.ofDays(-12345))), Duration.ofDays(12345))
checkEvaluation(Abs(Literal.create(null, DayTimeIntervalType())), null)
checkExceptionInExpression[ArithmeticException](
Abs(Literal(Duration.of(Long.MinValue, ChronoUnit.MICROS))),
"overflow")
DataTypeTestUtils.dayTimeIntervalTypes.foreach { tpe =>
checkConsistencyBetweenInterpretedAndCodegen((e: Expression) => Abs(e, false), tpe)
}
}
}

View file

@ -369,3 +369,5 @@ SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS);
SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH);
SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE);
SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS);
SELECT abs(INTERVAL '-10' YEAR);
SELECT abs(INTERVAL -'1 02:03:04.123' DAY TO SECOND);

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 269
-- Number of queries: 271
-- !query
@ -2552,3 +2552,19 @@ struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function coalesce should all be the same type, but it's [interval month, interval day]; line 1 pos 7
-- !query
SELECT abs(INTERVAL '-10' YEAR)
-- !query schema
struct<abs(INTERVAL '-10' YEAR):interval year>
-- !query output
10-0
-- !query
SELECT abs(INTERVAL -'1 02:03:04.123' DAY TO SECOND)
-- !query schema
struct<abs(INTERVAL '-1 02:03:04.123' DAY TO SECOND):interval day to second>
-- !query output
1 02:03:04.123000000

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 269
-- Number of queries: 271
-- !query
@ -2541,3 +2541,19 @@ struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function coalesce should all be the same type, but it's [interval month, interval day]; line 1 pos 7
-- !query
SELECT abs(INTERVAL '-10' YEAR)
-- !query schema
struct<abs(INTERVAL '-10' YEAR):interval year>
-- !query output
10-0
-- !query
SELECT abs(INTERVAL -'1 02:03:04.123' DAY TO SECOND)
-- !query schema
struct<abs(INTERVAL '-1 02:03:04.123' DAY TO SECOND):interval day to second>
-- !query output
1 02:03:04.123000000