[SPARK-35854][SQL] Improve the error message of to_timestamp_ntz with invalid format pattern
### What changes were proposed in this pull request? When SQL function `to_timestamp_ntz` has invalid format pattern input, throw a runtime exception with hints for the valid patterns, instead of throwing an upgrade exception with suggestions to use legacy formatters. ### Why are the changes needed? As discussed in https://github.com/apache/spark/pull/32995/files#r655148980, there is an error message saying "You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0" This is not true for function to_timestamp_ntz, which only uses the Iso8601TimestampFormatter and added since Spark 3.2. We should improve it. ### Does this PR introduce _any_ user-facing change? No, the new SQL function is not released yet. ### How was this patch tested? Unit test Closes #33019 from gengliangwang/improveError. Authored-by: Gengliang Wang <gengliang@apache.org> Signed-off-by: Gengliang Wang <gengliang@apache.org>
This commit is contained in:
parent
bc61b62a55
commit
ce53b7199d
|
@ -184,7 +184,12 @@ trait DateTimeFormatterHelper {
|
|||
} catch {
|
||||
case _: Throwable => throw e
|
||||
}
|
||||
throw QueryExecutionErrors.failToRecognizePatternInDateTimeFormatterError(pattern, e)
|
||||
throw QueryExecutionErrors.failToRecognizePatternAfterUpgradeError(pattern, e)
|
||||
}
|
||||
|
||||
protected def checkInvalidPattern(pattern: String): PartialFunction[Throwable, Nothing] = {
|
||||
case e: IllegalArgumentException =>
|
||||
throw QueryExecutionErrors.failToRecognizePatternError(pattern, e)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -80,7 +80,13 @@ sealed trait TimestampFormatter extends Serializable {
|
|||
s"The method `format(localDateTime: LocalDateTime)` should be implemented in the formatter " +
|
||||
"of timestamp without time zone")
|
||||
|
||||
def validatePatternString(): Unit
|
||||
/**
|
||||
* Validates the pattern string.
|
||||
* @param checkLegacy if true and the pattern is invalid, check whether the pattern is valid for
|
||||
* legacy formatters and show hints for using legacy formatter.
|
||||
* Otherwise, simply check the pattern string.
|
||||
*/
|
||||
def validatePatternString(checkLegacy: Boolean): Unit
|
||||
}
|
||||
|
||||
class Iso8601TimestampFormatter(
|
||||
|
@ -140,10 +146,17 @@ class Iso8601TimestampFormatter(
|
|||
localDateTime.format(formatter)
|
||||
}
|
||||
|
||||
override def validatePatternString(): Unit = {
|
||||
try {
|
||||
formatter
|
||||
} catch checkLegacyFormatter(pattern, legacyFormatter.validatePatternString)
|
||||
override def validatePatternString(checkLegacy: Boolean): Unit = {
|
||||
if (checkLegacy) {
|
||||
try {
|
||||
formatter
|
||||
} catch checkLegacyFormatter(pattern,
|
||||
legacyFormatter.validatePatternString(checkLegacy = true))
|
||||
} else {
|
||||
try {
|
||||
formatter
|
||||
} catch checkInvalidPattern(pattern)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -275,7 +288,7 @@ class LegacyFastTimestampFormatter(
|
|||
format(instantToMicros(instant))
|
||||
}
|
||||
|
||||
override def validatePatternString(): Unit = fastDateFormat
|
||||
override def validatePatternString(checkLegacy: Boolean): Unit = fastDateFormat
|
||||
}
|
||||
|
||||
class LegacySimpleTimestampFormatter(
|
||||
|
@ -306,7 +319,7 @@ class LegacySimpleTimestampFormatter(
|
|||
format(instantToMicros(instant))
|
||||
}
|
||||
|
||||
override def validatePatternString(): Unit = sdf
|
||||
override def validatePatternString(checkLegacy: Boolean): Unit = sdf
|
||||
}
|
||||
|
||||
object LegacyDateFormats extends Enumeration {
|
||||
|
@ -335,7 +348,7 @@ object TimestampFormatter {
|
|||
new Iso8601TimestampFormatter(
|
||||
pattern, zoneId, locale, legacyFormat, isParsing)
|
||||
}
|
||||
formatter.validatePatternString()
|
||||
formatter.validatePatternString(checkLegacy = !forTimestampWithoutTZ)
|
||||
formatter
|
||||
}
|
||||
|
||||
|
|
|
@ -919,8 +919,7 @@ object QueryExecutionErrors {
|
|||
""".stripMargin.replaceAll("\n", " "), e)
|
||||
}
|
||||
|
||||
def failToRecognizePatternInDateTimeFormatterError(
|
||||
pattern: String, e: Throwable): Throwable = {
|
||||
def failToRecognizePatternAfterUpgradeError(pattern: String, e: Throwable): Throwable = {
|
||||
new SparkUpgradeException("3.0", s"Fail to recognize '$pattern' pattern in the" +
|
||||
s" DateTimeFormatter. 1) You can set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY" +
|
||||
s" to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern" +
|
||||
|
@ -928,6 +927,13 @@ object QueryExecutionErrors {
|
|||
e)
|
||||
}
|
||||
|
||||
def failToRecognizePatternError(pattern: String, e: Throwable): Throwable = {
|
||||
new RuntimeException(s"Fail to recognize '$pattern' pattern in the" +
|
||||
" DateTimeFormatter. You can form a valid datetime pattern" +
|
||||
" with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html",
|
||||
e)
|
||||
}
|
||||
|
||||
def cannotCastUTF8StringToDataTypeError(s: UTF8String, to: DataType): Throwable = {
|
||||
new DateTimeException(s"Cannot cast $s to $to.")
|
||||
}
|
||||
|
|
|
@ -1219,8 +1219,8 @@ select to_timestamp_ntz('2019-10-06 A', 'yyyy-MM-dd GGGGG')
|
|||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.SparkUpgradeException
|
||||
You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
java.lang.RuntimeException
|
||||
Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -1228,8 +1228,8 @@ select to_timestamp_ntz('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
|
|||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.SparkUpgradeException
|
||||
You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
java.lang.RuntimeException
|
||||
Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -1237,8 +1237,8 @@ select to_timestamp_ntz('22 05 2020 Friday', 'dd MM yyyy EEEEE')
|
|||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.SparkUpgradeException
|
||||
You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
java.lang.RuntimeException
|
||||
Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
|
||||
|
||||
-- !query
|
||||
|
|
|
@ -1180,8 +1180,8 @@ select to_timestamp_ntz('2019-10-06 A', 'yyyy-MM-dd GGGGG')
|
|||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.SparkUpgradeException
|
||||
You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
java.lang.RuntimeException
|
||||
Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -1189,8 +1189,8 @@ select to_timestamp_ntz('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
|
|||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.SparkUpgradeException
|
||||
You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
java.lang.RuntimeException
|
||||
Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -1198,8 +1198,8 @@ select to_timestamp_ntz('22 05 2020 Friday', 'dd MM yyyy EEEEE')
|
|||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.SparkUpgradeException
|
||||
You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
java.lang.RuntimeException
|
||||
Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
|
||||
|
||||
-- !query
|
||||
|
|
|
@ -1183,8 +1183,8 @@ select to_timestamp_ntz('2019-10-06 A', 'yyyy-MM-dd GGGGG')
|
|||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.SparkUpgradeException
|
||||
You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
java.lang.RuntimeException
|
||||
Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -1192,8 +1192,8 @@ select to_timestamp_ntz('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
|
|||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.SparkUpgradeException
|
||||
You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
java.lang.RuntimeException
|
||||
Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -1201,8 +1201,8 @@ select to_timestamp_ntz('22 05 2020 Friday', 'dd MM yyyy EEEEE')
|
|||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.SparkUpgradeException
|
||||
You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
java.lang.RuntimeException
|
||||
Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
|
||||
|
||||
-- !query
|
||||
|
|
Loading…
Reference in a new issue