From 37857388389e34ebec5215ba29b5b78dac1cfb2c Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 13 Aug 2021 01:02:34 +0800 Subject: [PATCH] [SPARK-36445][SQL][FOLLOWUP] ANSI type coercion: revisit promoting string literals in datetime expressions ### What changes were proposed in this pull request? 1. Promote more string literal in subtractions. In the ANSI type coercion rule, we already promoted ``` string - timestamp => cast(string as timestamp) - timestamp ``` This PR is to promote the following string literals: ``` string - date => cast(string as date) - date date - string => date - cast(date as string) timestamp - string => timestamp ``` It is very straightforward to cast the string literal as the data type of the other side in the subtraction. 2. Merge the string promotion logic from the rule `StringLiteralCoercion`: ``` date_sub(date, string) => date_sub(date, cast(string as int)) date_add(date, string) => date_add(date, cast(string as int)) ``` ### Why are the changes needed? 1. Promote the string literal in the subtraction as the data type of the other side. This is straightforward and consistent with PostgreSQL 2. Certerize all the string literal promotion in the ANSI type coercion rule ### Does this PR introduce _any_ user-facing change? No, the new ANSI type coercion rules are not released yet. ### How was this patch tested? Existing UT Closes #33724 from gengliangwang/datetimeTypeCoercion. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 48e333af5473f849274c4247313ea7b51de70faf) Signed-off-by: Gengliang Wang --- .../sql/catalyst/analysis/AnsiTypeCoercion.scala | 14 ++++++++++++-- .../src/test/resources/sql-tests/inputs/date.sql | 4 +++- .../test/resources/sql-tests/inputs/timestamp.sql | 4 +++- .../resources/sql-tests/results/ansi/date.sql.out | 8 ++++---- .../sql-tests/results/ansi/timestamp.sql.out | 5 ++--- .../results/timestampNTZ/timestamp-ansi.sql.out | 5 ++--- 6 files changed, 26 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala index c5f0845e7a..61724aed84 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala @@ -91,7 +91,6 @@ object AnsiTypeCoercion extends TypeCoercionBase { ImplicitTypeCasts :: DateTimeOperations :: WindowFrameCoercion :: - StringLiteralCoercion :: GetDateFieldOperations:: Nil) :: Nil val findTightestCommonType: (DataType, DataType) => Option[DataType] = { @@ -297,12 +296,23 @@ object AnsiTypeCoercion extends TypeCoercionBase { case d @ DateAdd(left @ StringType(), _) if left.foldable => d.copy(startDate = Cast(d.startDate, DateType)) + case d @ DateAdd(_, right @ StringType()) if right.foldable => + d.copy(days = Cast(right, IntegerType)) case d @ DateSub(left @ StringType(), _) if left.foldable => d.copy(startDate = Cast(d.startDate, DateType)) + case d @ DateSub(_, right @ StringType()) if right.foldable => + d.copy(days = Cast(right, IntegerType)) + + case s @ SubtractDates(left @ StringType(), _, _) if left.foldable => + s.copy(left = Cast(s.left, DateType)) + case s @ SubtractDates(_, right @ StringType(), _) if right.foldable => + s.copy(right = Cast(s.right, DateType)) case t @ TimeAdd(left @ StringType(), _, _) if left.foldable => t.copy(start = Cast(t.start, TimestampType)) case t @ SubtractTimestamps(left @ StringType(), _, _, _) if left.foldable => - t.copy(left = Cast(t.left, TimestampType)) + t.copy(left = Cast(t.left, t.right.dataType)) + case t @ SubtractTimestamps(_, right @ StringType(), _, _) if right.foldable => + t.copy(right = Cast(right, t.left.dataType)) } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/date.sql b/sql/core/src/test/resources/sql-tests/inputs/date.sql index bfa96ed1ec..57049eb461 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/date.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/date.sql @@ -120,9 +120,11 @@ select null - date '2019-10-06'; select date_str - date '2001-09-28' from date_view; select date '2001-09-28' - date_str from date_view; --- invalid: date + string/null literal +-- invalid: date + string literal select date'2011-11-11' + '1'; select '1' + date'2011-11-11'; + +-- null result: date + null select date'2011-11-11' + null; select null + date'2011-11-11'; diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql index fc8416fb07..f8a3b3cf7c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql @@ -108,9 +108,11 @@ create temporary view ts_view as select '2011-11-11 11:11:11' str; select str - timestamp'2011-11-11 11:11:11' from ts_view; select timestamp'2011-11-11 11:11:11' - str from ts_view; --- invalid: timestamp + string/null literal +-- invalid: timestamp + string literal select timestamp'2011-11-11 11:11:11' + '1'; select '1' + timestamp'2011-11-11 11:11:11'; + +-- null result: timestamp + null select timestamp'2011-11-11 11:11:11' + null; select null + timestamp'2011-11-11 11:11:11'; diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out index 46eaa682ed..0196b99cb0 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out @@ -331,8 +331,8 @@ select date_add('2011-11-11', '1.2') -- !query schema struct<> -- !query output -org.apache.spark.sql.AnalysisException -The second argument of 'date_add' function needs to be an integer. +java.lang.NumberFormatException +invalid input syntax for type numeric: 1.2 -- !query @@ -441,8 +441,8 @@ select date_sub(date'2011-11-11', '1.2') -- !query schema struct<> -- !query output -org.apache.spark.sql.AnalysisException -The second argument of 'date_sub' function needs to be an integer. +java.lang.NumberFormatException +invalid input syntax for type numeric: 1.2 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out index a136615829..948a7901f5 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out @@ -559,10 +559,9 @@ struct<(TIMESTAMP '2019-10-06 10:11:12.345678' - DATE '2020-01-01'):interval day -- !query select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10' -- !query schema -struct<> +struct<(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10):interval day to second> -- !query output -org.apache.spark.sql.AnalysisException -cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' - '2011-11-11 11:11:10')' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, ''2011-11-11 11:11:10'' is of string type.; line 1 pos 7 +0 00:00:01.000000000 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out index 7c179c7bbd..a355c2d5c3 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out @@ -559,10 +559,9 @@ struct<(TIMESTAMP_NTZ '2019-10-06 10:11:12.345678' - DATE '2020-01-01'):interval -- !query select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10' -- !query schema -struct<> +struct<(TIMESTAMP_NTZ '2011-11-11 11:11:11' - 2011-11-11 11:11:10):interval day to second> -- !query output -org.apache.spark.sql.AnalysisException -cannot resolve '(TIMESTAMP_NTZ '2011-11-11 11:11:11' - '2011-11-11 11:11:10')' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, ''2011-11-11 11:11:10'' is of string type.; line 1 pos 7 +0 00:00:01.000000000 -- !query