[SPARK-31896][SQL] Handle am-pm timestamp parsing when hour is missing

### What changes were proposed in this pull request?

This PR set the hour to 12/0 when the AMPM_OF_DAY field exists

### Why are the changes needed?

When the hour is absent but the am-pm is present, the time is incorrect for pm

### Does this PR introduce _any_ user-facing change?
yes, the change is user-facing but to change back to 2.4 to keep backward compatibility

e.g.
```sql
spark-sql> select to_timestamp('33:33 PM', 'mm:ss a');
1970-01-01 12:33:33
spark-sql> select to_timestamp('33:33 AM', 'mm:ss a');
1970-01-01 00:33:33

```

otherwise, the results are all `1970-01-01 00:33:33`

### How was this patch tested?

add unit tests

Closes #28713 from yaooqinn/SPARK-31896.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Kent Yao 2020-06-03 13:30:22 +00:00 committed by Wenchen Fan
parent 349015dce0
commit afcc14c6d2
3 changed files with 39 additions and 3 deletions

View file

@ -62,7 +62,15 @@ trait DateTimeFormatterHelper {
accessor.get(ChronoField.HOUR_OF_DAY)
} else if (accessor.isSupported(ChronoField.HOUR_OF_AMPM)) {
// When we reach here, it means am/pm is not specified. Here we assume it's am.
// All of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) will
// be resolved to HOUR_OF_AMPM here, we do not need to handle them separately
accessor.get(ChronoField.HOUR_OF_AMPM)
} else if (accessor.isSupported(ChronoField.AMPM_OF_DAY) &&
accessor.get(ChronoField.AMPM_OF_DAY) == 1) {
// When reach here, the `hour` part is missing, and PM is specified.
// None of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) is
// specified
12
} else {
0
}

View file

@ -1197,4 +1197,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkNullify(l)
}
}
test("SPARK-31896: Handle am-pm timestamp parsing when hour is missing") {
checkEvaluation(
new ParseToTimestamp(Literal("PM"), Literal("a")).child,
Timestamp.valueOf("1970-01-01 12:00:00.0"))
checkEvaluation(
new ParseToTimestamp(Literal("11:11 PM"), Literal("mm:ss a")).child,
Timestamp.valueOf("1970-01-01 12:11:11.0"))
}
}

View file

@ -389,9 +389,11 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
}
test("missing am/pm field") {
val formatter = TimestampFormatter("yyyy hh:mm:ss", UTC)
val micros = formatter.parse("2009 11:30:01")
assert(micros === date(2009, 1, 1, 11, 30, 1))
Seq("HH", "hh", "KK", "kk").foreach { hour =>
val formatter = TimestampFormatter(s"yyyy $hour:mm:ss", UTC)
val micros = formatter.parse("2009 11:30:01")
assert(micros === date(2009, 1, 1, 11, 30, 1))
}
}
test("missing time fields") {
@ -400,6 +402,22 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
assert(micros === date(2009, 1, 1, 11))
}
test("missing hour field") {
val f1 = TimestampFormatter("mm:ss a", UTC)
val t1 = f1.parse("30:01 PM")
assert(t1 === date(1970, 1, 1, 12, 30, 1))
val t2 = f1.parse("30:01 AM")
assert(t2 === date(1970, 1, 1, 0, 30, 1))
val f2 = TimestampFormatter("mm:ss", UTC)
val t3 = f2.parse("30:01")
assert(t3 === date(1970, 1, 1, 0, 30, 1))
val f3 = TimestampFormatter("a", UTC)
val t4 = f3.parse("PM")
assert(t4 === date(1970, 1, 1, 12))
val t5 = f3.parse("AM")
assert(t5 === date(1970))
}
test("explicitly forbidden datetime patterns") {
// not support by the legacy one too
Seq("QQQQQ", "qqqqq", "A", "c", "e", "n", "N", "p").foreach { pattern =>