diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala index 8ee6c87fae..5967ca4f99 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala @@ -131,7 +131,8 @@ object RebaseDateTime { // The differences in days between Proleptic Gregorian and Julian dates. // The diff at the index `i` is applicable for all days in the date interval: // [gregJulianDiffSwitchDay(i), gregJulianDiffSwitchDay(i+1)) - private val gregJulianDiffs = Array(-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0) + private val gregJulianDiffs = Array( + -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) // The sorted days in Proleptic Gregorian calendar when difference in days between // Proleptic Gregorian and Julian was changed. // The starting point is the `0001-01-01` (-719162 days since the epoch in @@ -139,13 +140,17 @@ object RebaseDateTime { // Rebasing switch days and diffs `gregJulianDiffSwitchDay` and `gregJulianDiffs` // was generated by the `localRebaseGregorianToJulianDays` function. private val gregJulianDiffSwitchDay = Array( - -719162, -682944, -646420, -609896, -536847, -500323, -463799, - -390750, -354226, -317702, -244653, -208129, -171605, -141427) + -719162, -682944, -646420, -609896, -536847, -500323, -463799, -390750, + -354226, -317702, -244653, -208129, -171605, -141436, -141435, -141434, + -141433, -141432, -141431, -141430, -141429, -141428, -141427) // The first days of Common Era (CE) which is mapped to the '0001-01-01' date // in Proleptic Gregorian calendar. private final val gregorianCommonEraStartDay = gregJulianDiffSwitchDay(0) + private final val gregorianStartDay = LocalDate.of(1582, 10, 15) + private final val julianEndDay = LocalDate.of(1582, 10, 4) + /** * Converts the given number of days since the epoch day 1970-01-01 to a local date in Proleptic * Gregorian calendar, interprets the result as a local date in Julian calendar, and takes the @@ -165,7 +170,10 @@ object RebaseDateTime { * @return The rebased number of days in Julian calendar. */ private[sql] def localRebaseGregorianToJulianDays(days: Int): Int = { - val localDate = LocalDate.ofEpochDay(days) + var localDate = LocalDate.ofEpochDay(days) + if (localDate.isAfter(julianEndDay) && localDate.isBefore(gregorianStartDay)) { + localDate = gregorianStartDay + } val utcCal = new Calendar.Builder() // `gregory` is a hybrid calendar that supports both // the Julian and Gregorian calendar systems diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala index 15fd90c679..90935a42c8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala @@ -364,4 +364,26 @@ class RebaseDateTimeSuite extends SparkFunSuite with Matchers with SQLHelper { } } } + + test("rebase not-existed dates in the hybrid calendar") { + outstandingZoneIds.foreach { zid => + withDefaultTimeZone(zid) { + Seq( + "1582-10-04" -> "1582-10-04", + "1582-10-05" -> "1582-10-15", "1582-10-06" -> "1582-10-15", "1582-10-07" -> "1582-10-15", + "1582-10-08" -> "1582-10-15", "1582-10-09" -> "1582-10-15", "1582-10-11" -> "1582-10-15", + "1582-10-12" -> "1582-10-15", "1582-10-13" -> "1582-10-15", "1582-10-14" -> "1582-10-15", + "1582-10-15" -> "1582-10-15").foreach { case (hybridDate, gregDate) => + withClue(s"tz = ${zid.getId} hybrid date = $hybridDate greg date = $gregDate") { + val date = Date.valueOf(gregDate) + val hybridDays = fromJavaDateLegacy(date) + val gregorianDays = localDateToDays(LocalDate.parse(hybridDate)) + + assert(localRebaseGregorianToJulianDays(gregorianDays) === hybridDays) + assert(rebaseGregorianToJulianDays(gregorianDays) === hybridDays) + } + } + } + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala index 0b7500c012..f0ce6d503b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala @@ -493,17 +493,19 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll { } } - test("SPARK-31238: rebasing dates in write") { + test("SPARK-31238, SPARK-31423: rebasing dates in write") { withTempPath { dir => val path = dir.getAbsolutePath - Seq("1001-01-01").toDF("dateS") + Seq("1001-01-01", "1582-10-10").toDF("dateS") .select($"dateS".cast("date").as("date")) .write .orc(path) Seq(false, true).foreach { vectorized => withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> vectorized.toString) { - checkAnswer(spark.read.orc(path), Row(Date.valueOf("1001-01-01"))) + checkAnswer( + spark.read.orc(path), + Seq(Row(Date.valueOf("1001-01-01")), Row(Date.valueOf("1582-10-15")))) } } }