[SPARK-36286][SQL] Block some invalid datetime string
### What changes were proposed in this pull request? In PR #32959, we found some weird datetime strings that can be parsed. ([details](https://github.com/apache/spark/pull/32959#discussion_r665015489)) This PR blocks the invalid datetime string. ### Why are the changes needed? bug fix ### Does this PR introduce _any_ user-facing change? Yes, below strings will have different results when cast to datetime. ```sql select cast('12::' as timestamp); -- Before: 2021-07-07 12:00:00, After: NULL select cast('T' as timestamp); -- Before: 2021-07-07 00:00:00, After: NULL ``` ### How was this patch tested? some new test cases Closes #33490 from linhongliu-db/SPARK-35780-block-invalid-format. Authored-by: Linhong Liu <linhong.liu@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
9c5cb99d6e
commit
ed0e351f05
|
@ -254,7 +254,9 @@ object DateTimeUtils {
|
|||
val maxDigitsYear = 6
|
||||
// For the nanosecond part, more than 6 digits is allowed, but will be truncated.
|
||||
segment == 6 || (segment == 0 && digits >= 4 && digits <= maxDigitsYear) ||
|
||||
(segment != 0 && segment != 6 && digits <= 2)
|
||||
// For the zoneId segment(7), it's could be zero digits when it's a region-based zone ID
|
||||
(segment == 7 && digits <= 2) ||
|
||||
(segment != 0 && segment != 6 && segment != 7 && digits > 0 && digits <= 2)
|
||||
}
|
||||
if (s == null || s.trimAll().numBytes() == 0) {
|
||||
return (Array.empty, None, false)
|
||||
|
@ -527,7 +529,8 @@ object DateTimeUtils {
|
|||
def isValidDigits(segment: Int, digits: Int): Boolean = {
|
||||
// An integer is able to represent a date within [+-]5 million years.
|
||||
var maxDigitsYear = 7
|
||||
(segment == 0 && digits >= 4 && digits <= maxDigitsYear) || (segment != 0 && digits <= 2)
|
||||
(segment == 0 && digits >= 4 && digits <= maxDigitsYear) ||
|
||||
(segment != 0 && digits > 0 && digits <= 2)
|
||||
}
|
||||
if (s == null || s.trimAll().numBytes() == 0) {
|
||||
return None
|
||||
|
|
|
@ -576,4 +576,8 @@ class CastSuite extends CastSuiteBase {
|
|||
checkEvaluation(cast(invalidInput, TimestampNTZType), null)
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-36286: invalid string cast to timestamp") {
|
||||
checkEvaluation(cast(Literal("2015-03-18T"), TimestampType), null)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -150,7 +150,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
|
|||
c.set(Calendar.MILLISECOND, 0)
|
||||
checkCastStringToTimestamp("2015-03-18", new Timestamp(c.getTimeInMillis))
|
||||
checkCastStringToTimestamp("2015-03-18 ", new Timestamp(c.getTimeInMillis))
|
||||
checkCastStringToTimestamp("2015-03-18T", new Timestamp(c.getTimeInMillis))
|
||||
|
||||
c = Calendar.getInstance(tz)
|
||||
c.set(2015, 2, 18, 12, 3, 17)
|
||||
|
|
|
@ -147,6 +147,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
|
|||
assert(toDate("1999 08 01").isEmpty)
|
||||
assert(toDate("1999-08 01").isEmpty)
|
||||
assert(toDate("1999 08").isEmpty)
|
||||
assert(toDate("1999-08-").isEmpty)
|
||||
assert(toDate("").isEmpty)
|
||||
assert(toDate(" ").isEmpty)
|
||||
}
|
||||
|
@ -182,7 +183,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
|
|||
checkStringToTimestamp("1969-12-31 16:00:00", Option(date(1969, 12, 31, 16, zid = zid)))
|
||||
checkStringToTimestamp("0001", Option(date(1, 1, 1, 0, zid = zid)))
|
||||
checkStringToTimestamp("2015-03", Option(date(2015, 3, 1, zid = zid)))
|
||||
Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ", "2015-03-18T").foreach { s =>
|
||||
Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ").foreach { s =>
|
||||
checkStringToTimestamp(s, Option(date(2015, 3, 18, zid = zid)))
|
||||
}
|
||||
|
||||
|
@ -289,6 +290,11 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
|
|||
checkStringToTimestamp("", None)
|
||||
checkStringToTimestamp(" ", None)
|
||||
checkStringToTimestamp("+", None)
|
||||
checkStringToTimestamp("T", None)
|
||||
checkStringToTimestamp("2015-03-18T", None)
|
||||
checkStringToTimestamp("12::", None)
|
||||
checkStringToTimestamp("2015-03-18T12:03:17-8:", None)
|
||||
checkStringToTimestamp("2015-03-18T12:03:17-8:30:", None)
|
||||
|
||||
// Truncating the fractional seconds
|
||||
expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = UTC))
|
||||
|
|
Loading…
Reference in a new issue