[SPARK-36521][SQL] Disallow comparison between Interval and String

### What changes were proposed in this pull request?

Disallow comparison between Interval and String in the default type coercion rules.

### Why are the changes needed?

If a binary comparison contains interval type and string type, we can't decide which
interval type the string should be promoted as. There are many possible interval
types, such as year interval, month interval, day interval, hour interval, etc.

### Does this PR introduce _any_ user-facing change?

No, the new interval type is not released yet.

### How was this patch tested?

Existing UT

Closes #33750 from gengliangwang/disallowCom.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
This commit is contained in:
Gengliang Wang 2021-08-16 22:41:14 +03:00 committed by Max Gekk
parent 3d57e00a7f
commit 26d6b952dc
4 changed files with 148 additions and 16 deletions

View file

@ -862,6 +862,18 @@ object TypeCoercion extends TypeCoercionBase {
case _ => None
}
// Return whether a string literal can be promoted as the give data type in a binary comparison.
private def canPromoteAsInBinaryComparison(dt: DataType) = dt match {
// If a binary comparison contains interval type and string type, we can't decide which
// interval type the string should be promoted as. There are many possible interval
// types, such as year interval, month interval, day interval, hour interval, etc.
case _: YearMonthIntervalType | _: DayTimeIntervalType => false
// There is no need to add `Cast` for comparison between strings.
case _: StringType => false
case _: AtomicType => true
case _ => false
}
/**
* This function determines the target type of a comparison operator when one operand
* is a String and the other is not. It also handles when one op is a Date and the
@ -891,8 +903,8 @@ object TypeCoercion extends TypeCoercionBase {
case (n: DecimalType, s: StringType) => Some(DoubleType)
case (s: StringType, n: DecimalType) => Some(DoubleType)
case (l: StringType, r: AtomicType) if r != StringType => Some(r)
case (l: AtomicType, r: StringType) if l != StringType => Some(l)
case (l: StringType, r: AtomicType) if canPromoteAsInBinaryComparison(r) => Some(r)
case (l: AtomicType, r: StringType) if canPromoteAsInBinaryComparison(l) => Some(l)
case (l, r) => None
}

View file

@ -341,9 +341,15 @@ SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS;
SELECT INTERVAL '1' DAY < '1';
SELECT INTERVAL '1' DAY = '1';
SELECT INTERVAL '1' DAY > '1';
SELECT '1' < INTERVAL '1' DAY;
SELECT '1' = INTERVAL '1' DAY;
SELECT '1' > INTERVAL '1' DAY;
SELECT INTERVAL '1' YEAR < '1';
SELECT INTERVAL '1' YEAR = '1';
SELECT INTERVAL '1' YEAR > '1';
SELECT '1' < INTERVAL '1' YEAR;
SELECT '1' = INTERVAL '1' YEAR;
SELECT '1' > INTERVAL '1' YEAR;
SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH);
SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE);

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 251
-- Number of queries: 257
-- !query
@ -2327,6 +2327,33 @@ org.apache.spark.sql.AnalysisException
cannot resolve '(INTERVAL '1' DAY > '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY > '1')' (interval day and string).; line 1 pos 7
-- !query
SELECT '1' < INTERVAL '1' DAY
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '('1' < INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' < INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
-- !query
SELECT '1' = INTERVAL '1' DAY
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '('1' = INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' = INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
-- !query
SELECT '1' > INTERVAL '1' DAY
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '('1' > INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' > INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
-- !query
SELECT INTERVAL '1' YEAR < '1'
-- !query schema
@ -2354,6 +2381,33 @@ org.apache.spark.sql.AnalysisException
cannot resolve '(INTERVAL '1' YEAR > '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR > '1')' (interval year and string).; line 1 pos 7
-- !query
SELECT '1' < INTERVAL '1' YEAR
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '('1' < INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' < INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7
-- !query
SELECT '1' = INTERVAL '1' YEAR
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '('1' = INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' = INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7
-- !query
SELECT '1' > INTERVAL '1' YEAR
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '('1' > INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' > INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7
-- !query
SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
-- !query schema

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 251
-- Number of queries: 257
-- !query
@ -2292,49 +2292,109 @@ cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type misma
-- !query
SELECT INTERVAL '1' DAY < '1'
-- !query schema
struct<(INTERVAL '1' DAY < 1):boolean>
struct<>
-- !query output
false
org.apache.spark.sql.AnalysisException
cannot resolve '(INTERVAL '1' DAY < '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY < '1')' (interval day and string).; line 1 pos 7
-- !query
SELECT INTERVAL '1' DAY = '1'
-- !query schema
struct<(INTERVAL '1' DAY = 1):boolean>
struct<>
-- !query output
true
org.apache.spark.sql.AnalysisException
cannot resolve '(INTERVAL '1' DAY = '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY = '1')' (interval day and string).; line 1 pos 7
-- !query
SELECT INTERVAL '1' DAY > '1'
-- !query schema
struct<(INTERVAL '1' DAY > 1):boolean>
struct<>
-- !query output
false
org.apache.spark.sql.AnalysisException
cannot resolve '(INTERVAL '1' DAY > '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY > '1')' (interval day and string).; line 1 pos 7
-- !query
SELECT '1' < INTERVAL '1' DAY
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '('1' < INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' < INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
-- !query
SELECT '1' = INTERVAL '1' DAY
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '('1' = INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' = INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
-- !query
SELECT '1' > INTERVAL '1' DAY
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '('1' > INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' > INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
-- !query
SELECT INTERVAL '1' YEAR < '1'
-- !query schema
struct<(INTERVAL '1' YEAR < 1):boolean>
struct<>
-- !query output
false
org.apache.spark.sql.AnalysisException
cannot resolve '(INTERVAL '1' YEAR < '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR < '1')' (interval year and string).; line 1 pos 7
-- !query
SELECT INTERVAL '1' YEAR = '1'
-- !query schema
struct<(INTERVAL '1' YEAR = 1):boolean>
struct<>
-- !query output
true
org.apache.spark.sql.AnalysisException
cannot resolve '(INTERVAL '1' YEAR = '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR = '1')' (interval year and string).; line 1 pos 7
-- !query
SELECT INTERVAL '1' YEAR > '1'
-- !query schema
struct<(INTERVAL '1' YEAR > 1):boolean>
struct<>
-- !query output
false
org.apache.spark.sql.AnalysisException
cannot resolve '(INTERVAL '1' YEAR > '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR > '1')' (interval year and string).; line 1 pos 7
-- !query
SELECT '1' < INTERVAL '1' YEAR
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '('1' < INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' < INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7
-- !query
SELECT '1' = INTERVAL '1' YEAR
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '('1' = INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' = INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7
-- !query
SELECT '1' > INTERVAL '1' YEAR
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '('1' > INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' > INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7
-- !query