[SPARK-31896][SQL] Handle am-pm timestamp parsing when hour is missing
### What changes were proposed in this pull request? This PR set the hour to 12/0 when the AMPM_OF_DAY field exists ### Why are the changes needed? When the hour is absent but the am-pm is present, the time is incorrect for pm ### Does this PR introduce _any_ user-facing change? yes, the change is user-facing but to change back to 2.4 to keep backward compatibility e.g. ```sql spark-sql> select to_timestamp('33:33 PM', 'mm:ss a'); 1970-01-01 12:33:33 spark-sql> select to_timestamp('33:33 AM', 'mm:ss a'); 1970-01-01 00:33:33 ``` otherwise, the results are all `1970-01-01 00:33:33` ### How was this patch tested? add unit tests Closes #28713 from yaooqinn/SPARK-31896. Authored-by: Kent Yao <yaooqinn@hotmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
349015dce0
commit
afcc14c6d2
|
@ -62,7 +62,15 @@ trait DateTimeFormatterHelper {
|
|||
accessor.get(ChronoField.HOUR_OF_DAY)
|
||||
} else if (accessor.isSupported(ChronoField.HOUR_OF_AMPM)) {
|
||||
// When we reach here, it means am/pm is not specified. Here we assume it's am.
|
||||
// All of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) will
|
||||
// be resolved to HOUR_OF_AMPM here, we do not need to handle them separately
|
||||
accessor.get(ChronoField.HOUR_OF_AMPM)
|
||||
} else if (accessor.isSupported(ChronoField.AMPM_OF_DAY) &&
|
||||
accessor.get(ChronoField.AMPM_OF_DAY) == 1) {
|
||||
// When reach here, the `hour` part is missing, and PM is specified.
|
||||
// None of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) is
|
||||
// specified
|
||||
12
|
||||
} else {
|
||||
0
|
||||
}
|
||||
|
|
|
@ -1197,4 +1197,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
|
|||
checkNullify(l)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
test("SPARK-31896: Handle am-pm timestamp parsing when hour is missing") {
|
||||
checkEvaluation(
|
||||
new ParseToTimestamp(Literal("PM"), Literal("a")).child,
|
||||
Timestamp.valueOf("1970-01-01 12:00:00.0"))
|
||||
checkEvaluation(
|
||||
new ParseToTimestamp(Literal("11:11 PM"), Literal("mm:ss a")).child,
|
||||
Timestamp.valueOf("1970-01-01 12:11:11.0"))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -389,9 +389,11 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
|
|||
}
|
||||
|
||||
test("missing am/pm field") {
|
||||
val formatter = TimestampFormatter("yyyy hh:mm:ss", UTC)
|
||||
val micros = formatter.parse("2009 11:30:01")
|
||||
assert(micros === date(2009, 1, 1, 11, 30, 1))
|
||||
Seq("HH", "hh", "KK", "kk").foreach { hour =>
|
||||
val formatter = TimestampFormatter(s"yyyy $hour:mm:ss", UTC)
|
||||
val micros = formatter.parse("2009 11:30:01")
|
||||
assert(micros === date(2009, 1, 1, 11, 30, 1))
|
||||
}
|
||||
}
|
||||
|
||||
test("missing time fields") {
|
||||
|
@ -400,6 +402,22 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
|
|||
assert(micros === date(2009, 1, 1, 11))
|
||||
}
|
||||
|
||||
test("missing hour field") {
|
||||
val f1 = TimestampFormatter("mm:ss a", UTC)
|
||||
val t1 = f1.parse("30:01 PM")
|
||||
assert(t1 === date(1970, 1, 1, 12, 30, 1))
|
||||
val t2 = f1.parse("30:01 AM")
|
||||
assert(t2 === date(1970, 1, 1, 0, 30, 1))
|
||||
val f2 = TimestampFormatter("mm:ss", UTC)
|
||||
val t3 = f2.parse("30:01")
|
||||
assert(t3 === date(1970, 1, 1, 0, 30, 1))
|
||||
val f3 = TimestampFormatter("a", UTC)
|
||||
val t4 = f3.parse("PM")
|
||||
assert(t4 === date(1970, 1, 1, 12))
|
||||
val t5 = f3.parse("AM")
|
||||
assert(t5 === date(1970))
|
||||
}
|
||||
|
||||
test("explicitly forbidden datetime patterns") {
|
||||
// not support by the legacy one too
|
||||
Seq("QQQQQ", "qqqqq", "A", "c", "e", "n", "N", "p").foreach { pattern =>
|
||||
|
|
Loading…
Reference in a new issue