From d4466d55cadbcea9233cb8fbb90a62a7e56a2da8 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 13 Aug 2021 11:10:32 +0800 Subject: [PATCH] [SPARK-36497][SQL] Support Interval add/subtract NULL ### What changes were proposed in this pull request? Currently, `null + interval` will become `cast(cast(null as timestamp) + interval) as null`. This is a unexpected behavior and the result should not be of null type. This weird behavior applies to `null - interval`, `interval + null`, `interval - null` as well. To change it, I propose to cast the null as the same data type of the other element in the add/subtract: ``` null + interval => cast(null as interval) + interval null - interval => cast(null as interval) - interval interval + null=> interval + cast(null as interval) interval - null => interval - cast(null as interval) ``` ### Why are the changes needed? Change the confusing behavior of `Interval +/- NULL` and `NULL +/- Interval` ### Does this PR introduce _any_ user-facing change? No, the new interval type is not released yet. ### How was this patch tested? Existing UT Closes #33727 from gengliangwang/intervalTypeCoercion. Authored-by: Gengliang Wang Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 8 ++++++++ .../resources/sql-tests/results/ansi/interval.sql.out | 5 ++--- .../src/test/resources/sql-tests/results/interval.sql.out | 5 ++--- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index c1e629aca7..fde220df6b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -376,6 +376,10 @@ class Analyzer(override val catalogManager: CatalogManager) TimestampAddYMInterval(r, l) case (CalendarIntervalType, CalendarIntervalType) | (_: DayTimeIntervalType, _: DayTimeIntervalType) => a + case (_: NullType, _: DayTimeIntervalType | _: YearMonthIntervalType) => + a.copy(left = Cast(a.left, a.right.dataType)) + case (_: DayTimeIntervalType | _: YearMonthIntervalType, _: NullType) => + a.copy(right = Cast(a.right, a.left.dataType)) case (DateType, CalendarIntervalType) => DateAddInterval(l, r, ansiEnabled = f) case (_, CalendarIntervalType | _: DayTimeIntervalType) => Cast(TimeAdd(l, r), l.dataType) case (CalendarIntervalType, DateType) => DateAddInterval(r, l, ansiEnabled = f) @@ -395,6 +399,10 @@ class Analyzer(override val catalogManager: CatalogManager) DatetimeSub(l, r, TimestampAddYMInterval(l, UnaryMinus(r, f))) case (CalendarIntervalType, CalendarIntervalType) | (_: DayTimeIntervalType, _: DayTimeIntervalType) => s + case (_: NullType, _: DayTimeIntervalType | _: YearMonthIntervalType) => + s.copy(left = Cast(s.left, s.right.dataType)) + case (_: DayTimeIntervalType | _: YearMonthIntervalType, _: NullType) => + s.copy(right = Cast(s.right, s.left.dataType)) case (DateType, CalendarIntervalType) => DatetimeSub(l, r, DateAddInterval(l, UnaryMinus(r, f), ansiEnabled = f)) case (_, CalendarIntervalType | _: DayTimeIntervalType) => diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 2b79fef810..b048105423 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -1175,10 +1175,9 @@ select null + interval '2' hour, null - interval '2' hour -- !query schema -struct<> +struct<(INTERVAL '2' YEAR + NULL):interval year,(INTERVAL '2' YEAR - NULL):interval year,(INTERVAL '02' HOUR + NULL):interval hour,(INTERVAL '02' HOUR - NULL):interval hour,(NULL + INTERVAL '2' YEAR):interval year,(NULL - INTERVAL '2' YEAR):interval year,(NULL + INTERVAL '02' HOUR):interval hour,(NULL - INTERVAL '02' HOUR):interval hour> -- !query output -org.apache.spark.sql.AnalysisException -cannot resolve 'CAST(CAST(NULL AS TIMESTAMP) + INTERVAL '02' HOUR AS VOID)' due to data type mismatch: cannot cast timestamp to void; line 4 pos 2 +NULL NULL NULL NULL NULL NULL NULL NULL -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 07b24a3c06..2391b8a2bd 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1174,10 +1174,9 @@ select null + interval '2' hour, null - interval '2' hour -- !query schema -struct<> +struct<(INTERVAL '2' YEAR + NULL):interval year,(INTERVAL '2' YEAR - NULL):interval year,(INTERVAL '02' HOUR + NULL):interval hour,(INTERVAL '02' HOUR - NULL):interval hour,(NULL + INTERVAL '2' YEAR):interval year,(NULL - INTERVAL '2' YEAR):interval year,(NULL + INTERVAL '02' HOUR):interval hour,(NULL - INTERVAL '02' HOUR):interval hour> -- !query output -org.apache.spark.sql.AnalysisException -cannot resolve 'CAST(CAST(NULL AS TIMESTAMP) + INTERVAL '02' HOUR AS VOID)' due to data type mismatch: cannot cast timestamp to void; line 4 pos 2 +NULL NULL NULL NULL NULL NULL NULL NULL -- !query