From 26d6b952dcf7d387930701396de9cef679df7432 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 16 Aug 2021 22:41:14 +0300 Subject: [PATCH] [SPARK-36521][SQL] Disallow comparison between Interval and String ### What changes were proposed in this pull request? Disallow comparison between Interval and String in the default type coercion rules. ### Why are the changes needed? If a binary comparison contains interval type and string type, we can't decide which interval type the string should be promoted as. There are many possible interval types, such as year interval, month interval, day interval, hour interval, etc. ### Does this PR introduce _any_ user-facing change? No, the new interval type is not released yet. ### How was this patch tested? Existing UT Closes #33750 from gengliangwang/disallowCom. Authored-by: Gengliang Wang Signed-off-by: Max Gekk --- .../sql/catalyst/analysis/TypeCoercion.scala | 16 +++- .../resources/sql-tests/inputs/interval.sql | 6 ++ .../sql-tests/results/ansi/interval.sql.out | 56 +++++++++++- .../sql-tests/results/interval.sql.out | 86 ++++++++++++++++--- 4 files changed, 148 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 23654afac6..863bdc056b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -862,6 +862,18 @@ object TypeCoercion extends TypeCoercionBase { case _ => None } + // Return whether a string literal can be promoted as the give data type in a binary comparison. + private def canPromoteAsInBinaryComparison(dt: DataType) = dt match { + // If a binary comparison contains interval type and string type, we can't decide which + // interval type the string should be promoted as. There are many possible interval + // types, such as year interval, month interval, day interval, hour interval, etc. + case _: YearMonthIntervalType | _: DayTimeIntervalType => false + // There is no need to add `Cast` for comparison between strings. + case _: StringType => false + case _: AtomicType => true + case _ => false + } + /** * This function determines the target type of a comparison operator when one operand * is a String and the other is not. It also handles when one op is a Date and the @@ -891,8 +903,8 @@ object TypeCoercion extends TypeCoercionBase { case (n: DecimalType, s: StringType) => Some(DoubleType) case (s: StringType, n: DecimalType) => Some(DoubleType) - case (l: StringType, r: AtomicType) if r != StringType => Some(r) - case (l: AtomicType, r: StringType) if l != StringType => Some(l) + case (l: StringType, r: AtomicType) if canPromoteAsInBinaryComparison(r) => Some(r) + case (l: AtomicType, r: StringType) if canPromoteAsInBinaryComparison(l) => Some(l) case (l, r) => None } diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index 618cf16c44..279c544113 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -341,9 +341,15 @@ SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS; SELECT INTERVAL '1' DAY < '1'; SELECT INTERVAL '1' DAY = '1'; SELECT INTERVAL '1' DAY > '1'; +SELECT '1' < INTERVAL '1' DAY; +SELECT '1' = INTERVAL '1' DAY; +SELECT '1' > INTERVAL '1' DAY; SELECT INTERVAL '1' YEAR < '1'; SELECT INTERVAL '1' YEAR = '1'; SELECT INTERVAL '1' YEAR > '1'; +SELECT '1' < INTERVAL '1' YEAR; +SELECT '1' = INTERVAL '1' YEAR; +SELECT '1' > INTERVAL '1' YEAR; SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH); SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index e0bf076452..1aa0920b55 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 251 +-- Number of queries: 257 -- !query @@ -2327,6 +2327,33 @@ org.apache.spark.sql.AnalysisException cannot resolve '(INTERVAL '1' DAY > '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY > '1')' (interval day and string).; line 1 pos 7 +-- !query +SELECT '1' < INTERVAL '1' DAY +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve '('1' < INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' < INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7 + + +-- !query +SELECT '1' = INTERVAL '1' DAY +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve '('1' = INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' = INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7 + + +-- !query +SELECT '1' > INTERVAL '1' DAY +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve '('1' > INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' > INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7 + + -- !query SELECT INTERVAL '1' YEAR < '1' -- !query schema @@ -2354,6 +2381,33 @@ org.apache.spark.sql.AnalysisException cannot resolve '(INTERVAL '1' YEAR > '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR > '1')' (interval year and string).; line 1 pos 7 +-- !query +SELECT '1' < INTERVAL '1' YEAR +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve '('1' < INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' < INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7 + + +-- !query +SELECT '1' = INTERVAL '1' YEAR +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve '('1' = INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' = INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7 + + +-- !query +SELECT '1' > INTERVAL '1' YEAR +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve '('1' > INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' > INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7 + + -- !query SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH) -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 3e6380b55e..5f6af71a31 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 251 +-- Number of queries: 257 -- !query @@ -2292,49 +2292,109 @@ cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type misma -- !query SELECT INTERVAL '1' DAY < '1' -- !query schema -struct<(INTERVAL '1' DAY < 1):boolean> +struct<> -- !query output -false +org.apache.spark.sql.AnalysisException +cannot resolve '(INTERVAL '1' DAY < '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY < '1')' (interval day and string).; line 1 pos 7 -- !query SELECT INTERVAL '1' DAY = '1' -- !query schema -struct<(INTERVAL '1' DAY = 1):boolean> +struct<> -- !query output -true +org.apache.spark.sql.AnalysisException +cannot resolve '(INTERVAL '1' DAY = '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY = '1')' (interval day and string).; line 1 pos 7 -- !query SELECT INTERVAL '1' DAY > '1' -- !query schema -struct<(INTERVAL '1' DAY > 1):boolean> +struct<> -- !query output -false +org.apache.spark.sql.AnalysisException +cannot resolve '(INTERVAL '1' DAY > '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY > '1')' (interval day and string).; line 1 pos 7 + + +-- !query +SELECT '1' < INTERVAL '1' DAY +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve '('1' < INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' < INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7 + + +-- !query +SELECT '1' = INTERVAL '1' DAY +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve '('1' = INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' = INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7 + + +-- !query +SELECT '1' > INTERVAL '1' DAY +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve '('1' > INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' > INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7 -- !query SELECT INTERVAL '1' YEAR < '1' -- !query schema -struct<(INTERVAL '1' YEAR < 1):boolean> +struct<> -- !query output -false +org.apache.spark.sql.AnalysisException +cannot resolve '(INTERVAL '1' YEAR < '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR < '1')' (interval year and string).; line 1 pos 7 -- !query SELECT INTERVAL '1' YEAR = '1' -- !query schema -struct<(INTERVAL '1' YEAR = 1):boolean> +struct<> -- !query output -true +org.apache.spark.sql.AnalysisException +cannot resolve '(INTERVAL '1' YEAR = '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR = '1')' (interval year and string).; line 1 pos 7 -- !query SELECT INTERVAL '1' YEAR > '1' -- !query schema -struct<(INTERVAL '1' YEAR > 1):boolean> +struct<> -- !query output -false +org.apache.spark.sql.AnalysisException +cannot resolve '(INTERVAL '1' YEAR > '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR > '1')' (interval year and string).; line 1 pos 7 + + +-- !query +SELECT '1' < INTERVAL '1' YEAR +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve '('1' < INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' < INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7 + + +-- !query +SELECT '1' = INTERVAL '1' YEAR +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve '('1' = INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' = INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7 + + +-- !query +SELECT '1' > INTERVAL '1' YEAR +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve '('1' > INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' > INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7 -- !query