[SPARK-36431][SQL] Support TypeCoercion of ANSI intervals with different fields
### What changes were proposed in this pull request?
Support TypeCoercion of ANSI intervals with different fields
### Why are the changes needed?
Support TypeCoercion of ANSI intervals with different fields
### Does this PR introduce _any_ user-facing change?
After this pr user can
- use comparison function with different fields of DayTimeIntervalType/YearMonthIntervalType such as `INTERVAL '1' YEAR` > `INTERVAL '11' MONTH`
- support different field of ansi interval type in collection function such as `array(INTERVAL '1' YEAR, INTERVAL '11' MONTH)`
- support different field of ansi interval type in `coalesce` etc..
### How was this patch tested?
Added UT
Closes #33661 from AngersZhuuuu/SPARK-SPARK-36431.
Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit 89d8a4eacf
)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
This commit is contained in:
parent
45acd00dd6
commit
fb6f3792af
|
@ -120,6 +120,11 @@ object AnsiTypeCoercion extends TypeCoercionBase {
|
|||
case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) =>
|
||||
Some(TimestampType)
|
||||
|
||||
case (t1: DayTimeIntervalType, t2: DayTimeIntervalType) =>
|
||||
Some(DayTimeIntervalType(t1.startField.min(t2.startField), t1.endField.max(t2.endField)))
|
||||
case (t1: YearMonthIntervalType, t2: YearMonthIntervalType) =>
|
||||
Some(YearMonthIntervalType(t1.startField.min(t2.startField), t1.endField.max(t2.endField)))
|
||||
|
||||
case (t1, t2) => findTypeForComplex(t1, t2, findTightestCommonType)
|
||||
}
|
||||
|
||||
|
|
|
@ -867,6 +867,11 @@ object TypeCoercion extends TypeCoercionBase {
|
|||
case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) =>
|
||||
Some(TimestampType)
|
||||
|
||||
case (t1: DayTimeIntervalType, t2: DayTimeIntervalType) =>
|
||||
Some(DayTimeIntervalType(t1.startField.min(t2.startField), t1.endField.max(t2.endField)))
|
||||
case (t1: YearMonthIntervalType, t2: YearMonthIntervalType) =>
|
||||
Some(YearMonthIntervalType(t1.startField.min(t2.startField), t1.endField.max(t2.endField)))
|
||||
|
||||
case (_: TimestampNTZType, _: DateType) | (_: DateType, _: TimestampNTZType) =>
|
||||
Some(TimestampNTZType)
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.spark.sql.catalyst.analysis
|
||||
|
||||
import java.sql.Timestamp
|
||||
import java.time.{Duration, Period}
|
||||
|
||||
import org.apache.spark.internal.config.Tests.IS_TESTING
|
||||
import org.apache.spark.sql.catalyst.analysis.TypeCoercion._
|
||||
|
@ -1604,6 +1605,52 @@ class TypeCoercionSuite extends AnalysisTest {
|
|||
ruleTest(TypeCoercion.IntegralDivision, IntegralDivide(2, 1L),
|
||||
IntegralDivide(Cast(2, LongType), 1L))
|
||||
}
|
||||
|
||||
test("SPARK-36431: Support TypeCoercion of ANSI intervals with different fields") {
|
||||
DataTypeTestUtils.yearMonthIntervalTypes.foreach { ym1 =>
|
||||
DataTypeTestUtils.yearMonthIntervalTypes.foreach { ym2 =>
|
||||
val literal1 = Literal.create(Period.ofMonths(12), ym1)
|
||||
val literal2 = Literal.create(Period.ofMonths(12), ym2)
|
||||
val commonType = YearMonthIntervalType(
|
||||
ym1.startField.min(ym2.startField), ym1.endField.max(ym2.endField))
|
||||
if (commonType == ym1 && commonType == ym2) {
|
||||
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
|
||||
EqualTo(literal1, literal2))
|
||||
} else if (commonType == ym1) {
|
||||
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
|
||||
EqualTo(literal1, Cast(literal2, commonType)))
|
||||
} else if (commonType == ym2) {
|
||||
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
|
||||
EqualTo(Cast(literal1, commonType), literal2))
|
||||
} else {
|
||||
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
|
||||
EqualTo(Cast(literal1, commonType), Cast(literal2, commonType)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DataTypeTestUtils.dayTimeIntervalTypes.foreach { dt1 =>
|
||||
DataTypeTestUtils.dayTimeIntervalTypes.foreach { dt2 =>
|
||||
val literal1 = Literal.create(Duration.ofSeconds(1111), dt1)
|
||||
val literal2 = Literal.create(Duration.ofSeconds(1111), dt2)
|
||||
val commonType = DayTimeIntervalType(
|
||||
dt1.startField.min(dt2.startField), dt1.endField.max(dt2.endField))
|
||||
if (commonType == dt1 && commonType == dt2) {
|
||||
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
|
||||
EqualTo(literal1, literal2))
|
||||
} else if (commonType == dt1) {
|
||||
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
|
||||
EqualTo(literal1, Cast(literal2, commonType)))
|
||||
} else if (commonType == dt2) {
|
||||
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
|
||||
EqualTo(Cast(literal1, commonType), literal2))
|
||||
} else {
|
||||
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
|
||||
EqualTo(Cast(literal1, commonType), Cast(literal2, commonType)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -322,3 +322,16 @@ SELECT INTERVAL '153722867280' MINUTE;
|
|||
SELECT INTERVAL '-153722867280' MINUTE;
|
||||
SELECT INTERVAL '54.775807' SECOND;
|
||||
SELECT INTERVAL '-54.775807' SECOND;
|
||||
|
||||
SELECT INTERVAL '1' DAY > INTERVAL '1' HOUR;
|
||||
SELECT INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND;
|
||||
SELECT INTERVAL '1' YEAR < INTERVAL '1' MONTH;
|
||||
SELECT INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH;
|
||||
SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS;
|
||||
|
||||
SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH);
|
||||
SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE);
|
||||
SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS);
|
||||
SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH);
|
||||
SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE);
|
||||
SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 200
|
||||
-- Number of queries: 211
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -818,10 +818,9 @@ struct<map(1, INTERVAL '1' DAY, 2, INTERVAL '2' DAY):map<int,interval day>>
|
|||
-- !query
|
||||
select map(1, interval 1 year, 2, interval 2 month)
|
||||
-- !query schema
|
||||
struct<>
|
||||
struct<map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH):map<int,interval year to month>>
|
||||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve 'map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH)' due to data type mismatch: The given values of function map should all be the same type, but they are [interval year, interval month]; line 1 pos 7
|
||||
{1:1-0,2:0-2}
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -1985,3 +1984,94 @@ SELECT INTERVAL '-54.775807' SECOND
|
|||
struct<INTERVAL '-54.775807' SECOND:interval second>
|
||||
-- !query output
|
||||
-0 00:00:54.775807000
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT INTERVAL '1' DAY > INTERVAL '1' HOUR
|
||||
-- !query schema
|
||||
struct<(INTERVAL '1' DAY > INTERVAL '01' HOUR):boolean>
|
||||
-- !query output
|
||||
true
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND
|
||||
-- !query schema
|
||||
struct<(INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND):boolean>
|
||||
-- !query output
|
||||
false
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT INTERVAL '1' YEAR < INTERVAL '1' MONTH
|
||||
-- !query schema
|
||||
struct<(INTERVAL '1' YEAR < INTERVAL '1' MONTH):boolean>
|
||||
-- !query output
|
||||
false
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH
|
||||
-- !query schema
|
||||
struct<(INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH):boolean>
|
||||
-- !query output
|
||||
true
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type mismatch: differing types in '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' (interval month and interval day).; line 1 pos 7
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
|
||||
-- !query schema
|
||||
struct<array(INTERVAL '1' YEAR, INTERVAL '1' MONTH):array<interval year to month>>
|
||||
-- !query output
|
||||
[1-0,0-1]
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
|
||||
-- !query schema
|
||||
struct<array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):array<interval day to minute>>
|
||||
-- !query output
|
||||
[1 00:00:00.000000000,0 01:01:00.000000000]
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve 'array(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function array should all be the same type, but it's [interval month, interval day]; line 1 pos 7
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
|
||||
-- !query schema
|
||||
struct<coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH):interval year to month>
|
||||
-- !query output
|
||||
1-0
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
|
||||
-- !query schema
|
||||
struct<coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):interval day to minute>
|
||||
-- !query output
|
||||
1 00:00:00.000000000
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function coalesce should all be the same type, but it's [interval month, interval day]; line 1 pos 7
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 200
|
||||
-- Number of queries: 211
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -817,10 +817,9 @@ struct<map(1, INTERVAL '1' DAY, 2, INTERVAL '2' DAY):map<int,interval day>>
|
|||
-- !query
|
||||
select map(1, interval 1 year, 2, interval 2 month)
|
||||
-- !query schema
|
||||
struct<>
|
||||
struct<map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH):map<int,interval year to month>>
|
||||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve 'map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH)' due to data type mismatch: The given values of function map should all be the same type, but they are [interval year, interval month]; line 1 pos 7
|
||||
{1:1-0,2:0-2}
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -1984,3 +1983,94 @@ SELECT INTERVAL '-54.775807' SECOND
|
|||
struct<INTERVAL '-54.775807' SECOND:interval second>
|
||||
-- !query output
|
||||
-0 00:00:54.775807000
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT INTERVAL '1' DAY > INTERVAL '1' HOUR
|
||||
-- !query schema
|
||||
struct<(INTERVAL '1' DAY > INTERVAL '01' HOUR):boolean>
|
||||
-- !query output
|
||||
true
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND
|
||||
-- !query schema
|
||||
struct<(INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND):boolean>
|
||||
-- !query output
|
||||
false
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT INTERVAL '1' YEAR < INTERVAL '1' MONTH
|
||||
-- !query schema
|
||||
struct<(INTERVAL '1' YEAR < INTERVAL '1' MONTH):boolean>
|
||||
-- !query output
|
||||
false
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH
|
||||
-- !query schema
|
||||
struct<(INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH):boolean>
|
||||
-- !query output
|
||||
true
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type mismatch: differing types in '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' (interval month and interval day).; line 1 pos 7
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
|
||||
-- !query schema
|
||||
struct<array(INTERVAL '1' YEAR, INTERVAL '1' MONTH):array<interval year to month>>
|
||||
-- !query output
|
||||
[1-0,0-1]
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
|
||||
-- !query schema
|
||||
struct<array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):array<interval day to minute>>
|
||||
-- !query output
|
||||
[1 00:00:00.000000000,0 01:01:00.000000000]
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve 'array(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function array should all be the same type, but it's [interval month, interval day]; line 1 pos 7
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
|
||||
-- !query schema
|
||||
struct<coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH):interval year to month>
|
||||
-- !query output
|
||||
1-0
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
|
||||
-- !query schema
|
||||
struct<coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):interval day to minute>
|
||||
-- !query output
|
||||
1 00:00:00.000000000
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function coalesce should all be the same type, but it's [interval month, interval day]; line 1 pos 7
|
||||
|
|
Loading…
Reference in a new issue