From a911287244a98aa9e6464bcdd97c80e7ad732788 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Mon, 27 Apr 2020 05:00:36 +0000 Subject: [PATCH] [SPARK-31557][SQL] Legacy time parser should return Gregorian days rather than Julian days ### What changes were proposed in this pull request? This PR modifies LegacyDateFormatter#parse to return proleptic Gregorian days rather than hybrid Julian days. ### Why are the changes needed? The legacy time parser currently returns epoch days in the hybrid Julian calendar. However, the callers to the legacy parser (e.g., UnivocityParser, JacksonParser) expect epoch days in the proleptic Gregorian calendar. As a result, pre-Gregorian dates like '1000-01-01' get interpreted as '1000-01-06'. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Manual testing and modified existing unit tests. Closes #28345 from bersprockets/SPARK-31557. Authored-by: Bruce Robbins Signed-off-by: Wenchen Fan --- .../sql/catalyst/util/DateFormatter.scala | 3 +- .../spark/sql/util/DateFormatterSuite.scala | 76 +++++++++++-------- 2 files changed, 44 insertions(+), 35 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index e6bd976573..0f79c1a6a7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -66,8 +66,7 @@ trait LegacyDateFormatter extends DateFormatter { def formatDate(d: Date): String override def parse(s: String): Int = { - val micros = DateTimeUtils.millisToMicros(parseToDate(s).getTime) - DateTimeUtils.microsToDays(micros) + fromJavaDate(new java.sql.Date(parseToDate(s).getTime)) } override def format(days: Int): String = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala index a40dbcc2ec..2df1d490b7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, localDateToDays} import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy class DateFormatterSuite extends SparkFunSuite with SQLHelper { test("parsing dates") { @@ -47,45 +48,54 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { } test("roundtrip date -> days -> date") { - Seq( - "0050-01-01", - "0953-02-02", - "1423-03-08", - "1969-12-31", - "1972-08-25", - "1975-09-26", - "2018-12-12", - "2038-01-01", - "5010-11-17").foreach { date => - DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => - withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { - val formatter = DateFormatter(getZoneId(timeZone)) - val days = formatter.parse(date) - val formatted = formatter.format(days) - assert(date === formatted) + LegacyBehaviorPolicy.values.foreach { parserPolicy => + withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> parserPolicy.toString) { + Seq( + "0050-01-01", + "0953-02-02", + "1423-03-08", + "1582-10-15", + "1969-12-31", + "1972-08-25", + "1975-09-26", + "2018-12-12", + "2038-01-01", + "5010-11-17").foreach { date => + DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { + val formatter = DateFormatter(getZoneId(timeZone)) + val days = formatter.parse(date) + val formatted = formatter.format(days) + assert(date === formatted) + } + } } } } } test("roundtrip days -> date -> days") { - Seq( - -701265, - -371419, - -199722, - -1, - 0, - 967, - 2094, - 17877, - 24837, - 1110657).foreach { days => - DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => - withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { - val formatter = DateFormatter(getZoneId(timeZone)) - val date = formatter.format(days) - val parsed = formatter.parse(date) - assert(days === parsed) + LegacyBehaviorPolicy.values.foreach { parserPolicy => + withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> parserPolicy.toString) { + Seq( + -701265, + -371419, + -199722, + -1, + 0, + 967, + 2094, + 17877, + 24837, + 1110657).foreach { days => + DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { + val formatter = DateFormatter(getZoneId(timeZone)) + val date = formatter.format(days) + val parsed = formatter.parse(date) + assert(days === parsed) + } + } } } }