[SPARK-36920][SQL] Support ANSI intervals by ABS()
### What changes were proposed in this pull request? In the PR, I propose to handle ANSI interval types by the `Abs` expression, and the `abs()` function as a consequence of that: - for positive and zero intervals, `ABS()` returns the same input value, - for minimal supported values (`Int.MinValue` months for year-month interval and `Long.MinValue` microseconds for day-time interval), `ABS()` throws the arithmetic overflow exception. - for other supported negative intervals, `ABS()` negate its input and returns a positive interval. For example: ```sql spark-sql> SELECT ABS(INTERVAL -'10-8' YEAR TO MONTH); 10-8 spark-sql> SELECT ABS(INTERVAL '-10 01:02:03.123456' DAY TO SECOND); 10 01:02:03.123456000 ``` ### Why are the changes needed? To improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No, this PR just extends `ABS()` by supporting new types. ### How was this patch tested? By running new tests: ``` $ build/sbt "test:testOnly *ArithmeticExpressionSuite" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z interval.sql" $ build/sbt "sql/test:testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite" ``` Closes #34169 from MaxGekk/abs-ansi-intervals. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
This commit is contained in:
parent
b30e214483
commit
65eb4a2129
|
@ -144,14 +144,16 @@ case class UnaryPositive(child: Expression)
|
|||
}
|
||||
|
||||
/**
|
||||
* A function that get the absolute value of the numeric value.
|
||||
* A function that get the absolute value of the numeric or interval value.
|
||||
*/
|
||||
@ExpressionDescription(
|
||||
usage = "_FUNC_(expr) - Returns the absolute value of the numeric value.",
|
||||
usage = "_FUNC_(expr) - Returns the absolute value of the numeric or interval value.",
|
||||
examples = """
|
||||
Examples:
|
||||
> SELECT _FUNC_(-1);
|
||||
1
|
||||
> SELECT _FUNC_(INTERVAL -'1-1' YEAR TO MONTH);
|
||||
1-1
|
||||
""",
|
||||
since = "1.2.0",
|
||||
group = "math_funcs")
|
||||
|
@ -160,11 +162,15 @@ case class Abs(child: Expression, failOnError: Boolean = SQLConf.get.ansiEnabled
|
|||
|
||||
def this(child: Expression) = this(child, SQLConf.get.ansiEnabled)
|
||||
|
||||
override def inputTypes: Seq[AbstractDataType] = Seq(NumericType)
|
||||
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
|
||||
|
||||
override def dataType: DataType = child.dataType
|
||||
|
||||
private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError)
|
||||
private lazy val numeric = (dataType match {
|
||||
case _: DayTimeIntervalType => LongExactNumeric
|
||||
case _: YearMonthIntervalType => IntegerExactNumeric
|
||||
case _ => TypeUtils.getNumeric(dataType, failOnError)
|
||||
}).asInstanceOf[Numeric[Any]]
|
||||
|
||||
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = dataType match {
|
||||
case _: DecimalType =>
|
||||
|
@ -187,6 +193,8 @@ case class Abs(child: Expression, failOnError: Boolean = SQLConf.get.ansiEnabled
|
|||
case IntegerType | LongType if failOnError =>
|
||||
defineCodeGen(ctx, ev, c => s"$c < 0 ? java.lang.Math.negateExact($c) : $c")
|
||||
|
||||
case _: AnsiIntervalType =>
|
||||
defineCodeGen(ctx, ev, c => s"$c < 0 ? java.lang.Math.negateExact($c) : $c")
|
||||
|
||||
case dt: NumericType =>
|
||||
defineCodeGen(ctx, ev, c => s"(${CodeGenerator.javaType(dt)})(java.lang.Math.abs($c))")
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
|
|||
|
||||
import java.sql.{Date, Timestamp}
|
||||
import java.time.{Duration, Period}
|
||||
import java.time.temporal.ChronoUnit
|
||||
|
||||
import org.apache.spark.{SparkArithmeticException, SparkFunSuite}
|
||||
import org.apache.spark.sql.catalyst.InternalRow
|
||||
|
@ -668,4 +669,34 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-36920: Support year-month intervals by ABS") {
|
||||
checkEvaluation(Abs(Literal(Period.ZERO)), Period.ZERO)
|
||||
checkEvaluation(Abs(Literal(Period.ofMonths(-1))), Period.ofMonths(1))
|
||||
checkEvaluation(Abs(Literal(Period.ofYears(-12345))), Period.ofYears(12345))
|
||||
checkEvaluation(Abs(Literal.create(null, YearMonthIntervalType())), null)
|
||||
checkExceptionInExpression[ArithmeticException](
|
||||
Abs(Literal(Period.ofMonths(Int.MinValue))),
|
||||
"overflow")
|
||||
|
||||
DataTypeTestUtils.yearMonthIntervalTypes.foreach { tpe =>
|
||||
checkConsistencyBetweenInterpretedAndCodegen((e: Expression) => Abs(e, false), tpe)
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-36920: Support day-time intervals by ABS") {
|
||||
checkEvaluation(Abs(Literal(Duration.ZERO)), Duration.ZERO)
|
||||
checkEvaluation(
|
||||
Abs(Literal(Duration.of(-1, ChronoUnit.MICROS))),
|
||||
Duration.of(1, ChronoUnit.MICROS))
|
||||
checkEvaluation(Abs(Literal(Duration.ofDays(-12345))), Duration.ofDays(12345))
|
||||
checkEvaluation(Abs(Literal.create(null, DayTimeIntervalType())), null)
|
||||
checkExceptionInExpression[ArithmeticException](
|
||||
Abs(Literal(Duration.of(Long.MinValue, ChronoUnit.MICROS))),
|
||||
"overflow")
|
||||
|
||||
DataTypeTestUtils.dayTimeIntervalTypes.foreach { tpe =>
|
||||
checkConsistencyBetweenInterpretedAndCodegen((e: Expression) => Abs(e, false), tpe)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -369,3 +369,5 @@ SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS);
|
|||
SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH);
|
||||
SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE);
|
||||
SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS);
|
||||
SELECT abs(INTERVAL '-10' YEAR);
|
||||
SELECT abs(INTERVAL -'1 02:03:04.123' DAY TO SECOND);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 269
|
||||
-- Number of queries: 271
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -2552,3 +2552,19 @@ struct<>
|
|||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function coalesce should all be the same type, but it's [interval month, interval day]; line 1 pos 7
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT abs(INTERVAL '-10' YEAR)
|
||||
-- !query schema
|
||||
struct<abs(INTERVAL '-10' YEAR):interval year>
|
||||
-- !query output
|
||||
10-0
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT abs(INTERVAL -'1 02:03:04.123' DAY TO SECOND)
|
||||
-- !query schema
|
||||
struct<abs(INTERVAL '-1 02:03:04.123' DAY TO SECOND):interval day to second>
|
||||
-- !query output
|
||||
1 02:03:04.123000000
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 269
|
||||
-- Number of queries: 271
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -2541,3 +2541,19 @@ struct<>
|
|||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function coalesce should all be the same type, but it's [interval month, interval day]; line 1 pos 7
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT abs(INTERVAL '-10' YEAR)
|
||||
-- !query schema
|
||||
struct<abs(INTERVAL '-10' YEAR):interval year>
|
||||
-- !query output
|
||||
10-0
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT abs(INTERVAL -'1 02:03:04.123' DAY TO SECOND)
|
||||
-- !query schema
|
||||
struct<abs(INTERVAL '-1 02:03:04.123' DAY TO SECOND):interval day to second>
|
||||
-- !query output
|
||||
1 02:03:04.123000000
|
||||
|
|
Loading…
Reference in a new issue