[SPARK-31557][SQL] Legacy time parser should return Gregorian days rather than Julian days
### What changes were proposed in this pull request? This PR modifies LegacyDateFormatter#parse to return proleptic Gregorian days rather than hybrid Julian days. ### Why are the changes needed? The legacy time parser currently returns epoch days in the hybrid Julian calendar. However, the callers to the legacy parser (e.g., UnivocityParser, JacksonParser) expect epoch days in the proleptic Gregorian calendar. As a result, pre-Gregorian dates like '1000-01-01' get interpreted as '1000-01-06'. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Manual testing and modified existing unit tests. Closes #28345 from bersprockets/SPARK-31557. Authored-by: Bruce Robbins <bersprockets@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
560bd5401f
commit
a911287244
|
@ -66,8 +66,7 @@ trait LegacyDateFormatter extends DateFormatter {
|
|||
def formatDate(d: Date): String
|
||||
|
||||
override def parse(s: String): Int = {
|
||||
val micros = DateTimeUtils.millisToMicros(parseToDate(s).getTime)
|
||||
DateTimeUtils.microsToDays(micros)
|
||||
fromJavaDate(new java.sql.Date(parseToDate(s).getTime))
|
||||
}
|
||||
|
||||
override def format(days: Int): String = {
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans.SQLHelper
|
|||
import org.apache.spark.sql.catalyst.util._
|
||||
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, localDateToDays}
|
||||
import org.apache.spark.sql.internal.SQLConf
|
||||
import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
|
||||
|
||||
class DateFormatterSuite extends SparkFunSuite with SQLHelper {
|
||||
test("parsing dates") {
|
||||
|
@ -47,45 +48,54 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
|
|||
}
|
||||
|
||||
test("roundtrip date -> days -> date") {
|
||||
Seq(
|
||||
"0050-01-01",
|
||||
"0953-02-02",
|
||||
"1423-03-08",
|
||||
"1969-12-31",
|
||||
"1972-08-25",
|
||||
"1975-09-26",
|
||||
"2018-12-12",
|
||||
"2038-01-01",
|
||||
"5010-11-17").foreach { date =>
|
||||
DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
|
||||
withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
|
||||
val formatter = DateFormatter(getZoneId(timeZone))
|
||||
val days = formatter.parse(date)
|
||||
val formatted = formatter.format(days)
|
||||
assert(date === formatted)
|
||||
LegacyBehaviorPolicy.values.foreach { parserPolicy =>
|
||||
withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> parserPolicy.toString) {
|
||||
Seq(
|
||||
"0050-01-01",
|
||||
"0953-02-02",
|
||||
"1423-03-08",
|
||||
"1582-10-15",
|
||||
"1969-12-31",
|
||||
"1972-08-25",
|
||||
"1975-09-26",
|
||||
"2018-12-12",
|
||||
"2038-01-01",
|
||||
"5010-11-17").foreach { date =>
|
||||
DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
|
||||
withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
|
||||
val formatter = DateFormatter(getZoneId(timeZone))
|
||||
val days = formatter.parse(date)
|
||||
val formatted = formatter.format(days)
|
||||
assert(date === formatted)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("roundtrip days -> date -> days") {
|
||||
Seq(
|
||||
-701265,
|
||||
-371419,
|
||||
-199722,
|
||||
-1,
|
||||
0,
|
||||
967,
|
||||
2094,
|
||||
17877,
|
||||
24837,
|
||||
1110657).foreach { days =>
|
||||
DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
|
||||
withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
|
||||
val formatter = DateFormatter(getZoneId(timeZone))
|
||||
val date = formatter.format(days)
|
||||
val parsed = formatter.parse(date)
|
||||
assert(days === parsed)
|
||||
LegacyBehaviorPolicy.values.foreach { parserPolicy =>
|
||||
withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> parserPolicy.toString) {
|
||||
Seq(
|
||||
-701265,
|
||||
-371419,
|
||||
-199722,
|
||||
-1,
|
||||
0,
|
||||
967,
|
||||
2094,
|
||||
17877,
|
||||
24837,
|
||||
1110657).foreach { days =>
|
||||
DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
|
||||
withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
|
||||
val formatter = DateFormatter(getZoneId(timeZone))
|
||||
val date = formatter.format(days)
|
||||
val parsed = formatter.parse(date)
|
||||
assert(days === parsed)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue