[SPARK-31557][SQL] Legacy time parser should return Gregorian days rather than Julian days

### What changes were proposed in this pull request?

This PR modifies LegacyDateFormatter#parse to return proleptic Gregorian days rather than hybrid Julian days.

### Why are the changes needed?

The legacy time parser currently returns epoch days in the hybrid Julian calendar. However, the callers to the legacy parser (e.g., UnivocityParser, JacksonParser) expect epoch days in the proleptic Gregorian calendar. As a result, pre-Gregorian dates like '1000-01-01' get interpreted as '1000-01-06'.

### Does this PR introduce any user-facing change?

No

### How was this patch tested?

Manual testing and modified existing unit tests.

Closes #28345 from bersprockets/SPARK-31557.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Bruce Robbins 2020-04-27 05:00:36 +00:00 committed by Wenchen Fan
parent 560bd5401f
commit a911287244
2 changed files with 44 additions and 35 deletions

View file

@ -66,8 +66,7 @@ trait LegacyDateFormatter extends DateFormatter {
def formatDate(d: Date): String
override def parse(s: String): Int = {
val micros = DateTimeUtils.millisToMicros(parseToDate(s).getTime)
DateTimeUtils.microsToDays(micros)
fromJavaDate(new java.sql.Date(parseToDate(s).getTime))
}
override def format(days: Int): String = {

View file

@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, localDateToDays}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
class DateFormatterSuite extends SparkFunSuite with SQLHelper {
test("parsing dates") {
@ -47,10 +48,13 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
}
test("roundtrip date -> days -> date") {
LegacyBehaviorPolicy.values.foreach { parserPolicy =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> parserPolicy.toString) {
Seq(
"0050-01-01",
"0953-02-02",
"1423-03-08",
"1582-10-15",
"1969-12-31",
"1972-08-25",
"1975-09-26",
@ -67,8 +71,12 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
}
}
}
}
}
test("roundtrip days -> date -> days") {
LegacyBehaviorPolicy.values.foreach { parserPolicy =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> parserPolicy.toString) {
Seq(
-701265,
-371419,
@ -90,6 +98,8 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
}
}
}
}
}
test("parsing date without explicit day") {
val formatter = DateFormatter("yyyy MMM", ZoneOffset.UTC)