[SPARK-33658][SQL] Suggest using Datetime conversion functions for invalid ANSI casting

### What changes were proposed in this pull request?

Suggest users using Datetime conversion functions in the error message of invalid ANSI explicit casting.

### Why are the changes needed?

In ANSI mode, explicit cast between DateTime types and Numeric types is not allowed.
As of now, we have introduced new functions `UNIX_SECONDS`/`UNIX_MILLIS`/`UNIX_MICROS`/`UNIX_DATE`/`DATE_FROM_UNIX_DATE`, we can show suggestions to users so that they can complete these type conversions precisely and easily in ANSI mode.

### Does this PR introduce _any_ user-facing change?

Yes, better error messages

### How was this patch tested?

Unit test

Closes #30603 from gengliangwang/improveErrorMsgOfExplicitCast.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
Gengliang Wang 2020-12-04 16:24:41 +09:00 committed by HyukjinKwon
parent 15579ba1f8
commit e8380665c7
3 changed files with 46 additions and 7 deletions

View file

@ -96,6 +96,10 @@ java.lang.NumberFormatException: invalid input syntax for type numeric: a
SELECT CAST(2147483648L AS INT);
java.lang.ArithmeticException: Casting 2147483648 to int causes overflow
SELECT CAST(DATE'2020-01-01' AS INT)
org.apache.spark.sql.AnalysisException: cannot resolve 'CAST(DATE '2020-01-01' AS INT)' due to data type mismatch: cannot cast date to int.
To convert values from date to int, you can use function UNIX_DATE instead.
-- `spark.sql.ansi.enabled=false` (This is a default behaviour)
SELECT CAST('a' AS INT);
+--------------+
@ -111,6 +115,13 @@ SELECT CAST(2147483648L AS INT);
| -2147483648|
+-----------------------+
SELECT CAST(DATE'2020-01-01' AS INT)
+------------------------------+
|CAST(DATE '2020-01-01' AS INT)|
+------------------------------+
| null|
+------------------------------+
-- Examples of store assignment rules
CREATE TABLE t (v INT);

View file

@ -1894,6 +1894,19 @@ object AnsiCast {
case _ => false
}
// Show suggestion on how to complete the disallowed explicit casting with built-in type
// conversion functions.
private def suggestionOnConversionFunctions (
from: DataType,
to: DataType,
functionNames: String): String = {
// scalastyle:off line.size.limit
s"""cannot cast ${from.catalogString} to ${to.catalogString}.
|To convert values from ${from.catalogString} to ${to.catalogString}, you can use $functionNames instead.
|""".stripMargin
// scalastyle:on line.size.limit
}
def typeCheckFailureMessage(
from: DataType,
to: DataType,
@ -1901,12 +1914,19 @@ object AnsiCast {
fallbackConfValue: String): String =
(from, to) match {
case (_: NumericType, TimestampType) =>
// scalastyle:off line.size.limit
s"""
| cannot cast ${from.catalogString} to ${to.catalogString}.
| To convert values from ${from.catalogString} to ${to.catalogString}, you can use functions TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS instead.
|""".stripMargin
suggestionOnConversionFunctions(from, to,
"functions TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS")
case (TimestampType, _: NumericType) =>
suggestionOnConversionFunctions(from, to, "functions UNIX_SECONDS/UNIX_MILLIS/UNIX_MICROS")
case (_: NumericType, DateType) =>
suggestionOnConversionFunctions(from, to, "function DATE_FROM_UNIX_DATE")
case (DateType, _: NumericType) =>
suggestionOnConversionFunctions(from, to, "function UNIX_DATE")
// scalastyle:off line.size.limit
case (_: ArrayType, StringType) =>
s"""
| cannot cast ${from.catalogString} to ${to.catalogString} with ANSI mode on.

View file

@ -850,18 +850,26 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
test("ANSI mode: disallow type conversions between Numeric types and Timestamp type") {
import DataTypeTestUtils.numericTypes
checkInvalidCastFromNumericType(TimestampType)
var errorMsg =
"you can use functions TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS instead"
verifyCastFailure(cast(Literal(0L), TimestampType), Some(errorMsg))
val timestampLiteral = Literal(1L, TimestampType)
errorMsg = "you can use functions UNIX_SECONDS/UNIX_MILLIS/UNIX_MICROS instead."
numericTypes.foreach { numericType =>
verifyCastFailure(cast(timestampLiteral, numericType))
verifyCastFailure(cast(timestampLiteral, numericType), Some(errorMsg))
}
}
test("ANSI mode: disallow type conversions between Numeric types and Date type") {
import DataTypeTestUtils.numericTypes
checkInvalidCastFromNumericType(DateType)
var errorMsg = "you can use function DATE_FROM_UNIX_DATE instead"
verifyCastFailure(cast(Literal(0L), DateType), Some(errorMsg))
val dateLiteral = Literal(1, DateType)
errorMsg = "you can use function UNIX_DATE instead"
numericTypes.foreach { numericType =>
verifyCastFailure(cast(dateLiteral, numericType))
verifyCastFailure(cast(dateLiteral, numericType), Some(errorMsg))
}
}