[SPARK-36445][SQL][FOLLOWUP] ANSI type coercion: revisit promoting string literals in datetime expressions
### What changes were proposed in this pull request?
1. Promote more string literal in subtractions. In the ANSI type coercion rule, we already promoted
```
string - timestamp => cast(string as timestamp) - timestamp
```
This PR is to promote the following string literals:
```
string - date => cast(string as date) - date
date - string => date - cast(date as string)
timestamp - string => timestamp
```
It is very straightforward to cast the string literal as the data type of the other side in the subtraction.
2. Merge the string promotion logic from the rule `StringLiteralCoercion`:
```
date_sub(date, string) => date_sub(date, cast(string as int))
date_add(date, string) => date_add(date, cast(string as int))
```
### Why are the changes needed?
1. Promote the string literal in the subtraction as the data type of the other side. This is straightforward and consistent with PostgreSQL
2. Certerize all the string literal promotion in the ANSI type coercion rule
### Does this PR introduce _any_ user-facing change?
No, the new ANSI type coercion rules are not released yet.
### How was this patch tested?
Existing UT
Closes #33724 from gengliangwang/datetimeTypeCoercion.
Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
(cherry picked from commit 48e333af54
)
Signed-off-by: Gengliang Wang <gengliang@apache.org>
This commit is contained in:
parent
89cc547afd
commit
3785738838
|
@ -91,7 +91,6 @@ object AnsiTypeCoercion extends TypeCoercionBase {
|
|||
ImplicitTypeCasts ::
|
||||
DateTimeOperations ::
|
||||
WindowFrameCoercion ::
|
||||
StringLiteralCoercion ::
|
||||
GetDateFieldOperations:: Nil) :: Nil
|
||||
|
||||
val findTightestCommonType: (DataType, DataType) => Option[DataType] = {
|
||||
|
@ -297,12 +296,23 @@ object AnsiTypeCoercion extends TypeCoercionBase {
|
|||
|
||||
case d @ DateAdd(left @ StringType(), _) if left.foldable =>
|
||||
d.copy(startDate = Cast(d.startDate, DateType))
|
||||
case d @ DateAdd(_, right @ StringType()) if right.foldable =>
|
||||
d.copy(days = Cast(right, IntegerType))
|
||||
case d @ DateSub(left @ StringType(), _) if left.foldable =>
|
||||
d.copy(startDate = Cast(d.startDate, DateType))
|
||||
case d @ DateSub(_, right @ StringType()) if right.foldable =>
|
||||
d.copy(days = Cast(right, IntegerType))
|
||||
|
||||
case s @ SubtractDates(left @ StringType(), _, _) if left.foldable =>
|
||||
s.copy(left = Cast(s.left, DateType))
|
||||
case s @ SubtractDates(_, right @ StringType(), _) if right.foldable =>
|
||||
s.copy(right = Cast(s.right, DateType))
|
||||
case t @ TimeAdd(left @ StringType(), _, _) if left.foldable =>
|
||||
t.copy(start = Cast(t.start, TimestampType))
|
||||
case t @ SubtractTimestamps(left @ StringType(), _, _, _) if left.foldable =>
|
||||
t.copy(left = Cast(t.left, TimestampType))
|
||||
t.copy(left = Cast(t.left, t.right.dataType))
|
||||
case t @ SubtractTimestamps(_, right @ StringType(), _, _) if right.foldable =>
|
||||
t.copy(right = Cast(right, t.left.dataType))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -120,9 +120,11 @@ select null - date '2019-10-06';
|
|||
select date_str - date '2001-09-28' from date_view;
|
||||
select date '2001-09-28' - date_str from date_view;
|
||||
|
||||
-- invalid: date + string/null literal
|
||||
-- invalid: date + string literal
|
||||
select date'2011-11-11' + '1';
|
||||
select '1' + date'2011-11-11';
|
||||
|
||||
-- null result: date + null
|
||||
select date'2011-11-11' + null;
|
||||
select null + date'2011-11-11';
|
||||
|
||||
|
|
|
@ -108,9 +108,11 @@ create temporary view ts_view as select '2011-11-11 11:11:11' str;
|
|||
select str - timestamp'2011-11-11 11:11:11' from ts_view;
|
||||
select timestamp'2011-11-11 11:11:11' - str from ts_view;
|
||||
|
||||
-- invalid: timestamp + string/null literal
|
||||
-- invalid: timestamp + string literal
|
||||
select timestamp'2011-11-11 11:11:11' + '1';
|
||||
select '1' + timestamp'2011-11-11 11:11:11';
|
||||
|
||||
-- null result: timestamp + null
|
||||
select timestamp'2011-11-11 11:11:11' + null;
|
||||
select null + timestamp'2011-11-11 11:11:11';
|
||||
|
||||
|
|
|
@ -331,8 +331,8 @@ select date_add('2011-11-11', '1.2')
|
|||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
The second argument of 'date_add' function needs to be an integer.
|
||||
java.lang.NumberFormatException
|
||||
invalid input syntax for type numeric: 1.2
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -441,8 +441,8 @@ select date_sub(date'2011-11-11', '1.2')
|
|||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
The second argument of 'date_sub' function needs to be an integer.
|
||||
java.lang.NumberFormatException
|
||||
invalid input syntax for type numeric: 1.2
|
||||
|
||||
|
||||
-- !query
|
||||
|
|
|
@ -559,10 +559,9 @@ struct<(TIMESTAMP '2019-10-06 10:11:12.345678' - DATE '2020-01-01'):interval day
|
|||
-- !query
|
||||
select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'
|
||||
-- !query schema
|
||||
struct<>
|
||||
struct<(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10):interval day to second>
|
||||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' - '2011-11-11 11:11:10')' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, ''2011-11-11 11:11:10'' is of string type.; line 1 pos 7
|
||||
0 00:00:01.000000000
|
||||
|
||||
|
||||
-- !query
|
||||
|
|
|
@ -559,10 +559,9 @@ struct<(TIMESTAMP_NTZ '2019-10-06 10:11:12.345678' - DATE '2020-01-01'):interval
|
|||
-- !query
|
||||
select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'
|
||||
-- !query schema
|
||||
struct<>
|
||||
struct<(TIMESTAMP_NTZ '2011-11-11 11:11:11' - 2011-11-11 11:11:10):interval day to second>
|
||||
-- !query output
|
||||
org.apache.spark.sql.AnalysisException
|
||||
cannot resolve '(TIMESTAMP_NTZ '2011-11-11 11:11:11' - '2011-11-11 11:11:10')' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, ''2011-11-11 11:11:10'' is of string type.; line 1 pos 7
|
||||
0 00:00:01.000000000
|
||||
|
||||
|
||||
-- !query
|
||||
|
|
Loading…
Reference in a new issue