diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 0ec6291d46..ae444ebf3d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -224,12 +224,12 @@ object DateTimeUtils { * value. The return type is [[Option]] in order to distinguish between 0L and null. The following * formats are allowed: * - * `yyyy` - * `yyyy-[m]m` - * `yyyy-[m]m-[d]d` - * `yyyy-[m]m-[d]d ` - * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` - * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` + * `[+-]yyyy*` + * `[+-]yyyy*-[m]m` + * `[+-]yyyy*-[m]m-[d]d` + * `[+-]yyyy*-[m]m-[d]d ` + * `[+-]yyyy*-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` + * `[+-]yyyy*-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` * @@ -249,17 +249,30 @@ object DateTimeUtils { * the input string can't be parsed as timestamp, the result timestamp segments are empty. */ def parseTimestampString(s: UTF8String): (Array[Int], Option[ZoneId], Boolean) = { - if (s == null) { + def isValidDigits(segment: Int, digits: Int): Boolean = { + // A Long is able to represent a timestamp within [+-]200 thousand years + val maxDigitsYear = 6 + // For the nanosecond part, more than 6 digits is allowed, but will be truncated. + segment == 6 || (segment == 0 && digits >= 4 && digits <= maxDigitsYear) || + (segment != 0 && segment != 6 && digits <= 2) + } + if (s == null || s.trimAll().numBytes() == 0) { return (Array.empty, None, false) } var tz: Option[String] = None val segments: Array[Int] = Array[Int](1, 1, 1, 0, 0, 0, 0, 0, 0) var i = 0 var currentSegmentValue = 0 + var currentSegmentDigits = 0 val bytes = s.trimAll().getBytes var j = 0 var digitsMilli = 0 var justTime = false + var yearSign: Option[Int] = None + if (bytes(j) == '-' || bytes(j) == '+') { + yearSign = if (bytes(j) == '-') Some(-1) else Some(1) + j += 1 + } while (j < bytes.length) { val b = bytes(j) val parsedValue = b - '0'.toByte @@ -269,50 +282,74 @@ object DateTimeUtils { i += 3 } else if (i < 2) { if (b == '-') { - if (i == 0 && j != 4) { - // year should have exact four digits + if (!isValidDigits(i, currentSegmentDigits)) { return (Array.empty, None, false) } segments(i) = currentSegmentValue currentSegmentValue = 0 + currentSegmentDigits = 0 i += 1 - } else if (i == 0 && b == ':') { + } else if (i == 0 && b == ':' && yearSign.isEmpty) { justTime = true + if (!isValidDigits(3, currentSegmentDigits)) { + return (Array.empty, None, false) + } segments(3) = currentSegmentValue currentSegmentValue = 0 + currentSegmentDigits = 0 i = 4 } else { return (Array.empty, None, false) } } else if (i == 2) { if (b == ' ' || b == 'T') { + if (!isValidDigits(i, currentSegmentDigits)) { + return (Array.empty, None, false) + } segments(i) = currentSegmentValue currentSegmentValue = 0 + currentSegmentDigits = 0 i += 1 } else { return (Array.empty, None, false) } } else if (i == 3 || i == 4) { if (b == ':') { + if (!isValidDigits(i, currentSegmentDigits)) { + return (Array.empty, None, false) + } segments(i) = currentSegmentValue currentSegmentValue = 0 + currentSegmentDigits = 0 i += 1 } else { return (Array.empty, None, false) } } else if (i == 5 || i == 6) { if (b == '-' || b == '+') { + if (!isValidDigits(i, currentSegmentDigits)) { + return (Array.empty, None, false) + } segments(i) = currentSegmentValue currentSegmentValue = 0 + currentSegmentDigits = 0 i += 1 tz = Some(new String(bytes, j, 1)) } else if (b == '.' && i == 5) { + if (!isValidDigits(i, currentSegmentDigits)) { + return (Array.empty, None, false) + } segments(i) = currentSegmentValue currentSegmentValue = 0 + currentSegmentDigits = 0 i += 1 } else { + if (!isValidDigits(i, currentSegmentDigits)) { + return (Array.empty, None, false) + } segments(i) = currentSegmentValue currentSegmentValue = 0 + currentSegmentDigits = 0 i += 1 tz = Some(new String(bytes, j, bytes.length - j)) j = bytes.length - 1 @@ -322,8 +359,12 @@ object DateTimeUtils { } } else { if (i < segments.length && (b == ':' || b == ' ')) { + if (!isValidDigits(i, currentSegmentDigits)) { + return (Array.empty, None, false) + } segments(i) = currentSegmentValue currentSegmentValue = 0 + currentSegmentDigits = 0 i += 1 } else { return (Array.empty, None, false) @@ -333,61 +374,40 @@ object DateTimeUtils { if (i == 6) { digitsMilli += 1 } - currentSegmentValue = currentSegmentValue * 10 + parsedValue + // We will truncate the nanosecond part if there are more than 6 digits, which results + // in loss of precision + if (i != 6 || currentSegmentDigits < 6) { + currentSegmentValue = currentSegmentValue * 10 + parsedValue + } + currentSegmentDigits += 1 } j += 1 } - segments(i) = currentSegmentValue - if (!justTime && i == 0 && j != 4) { - // year should have exact four digits + if (!isValidDigits(i, currentSegmentDigits)) { return (Array.empty, None, false) } + segments(i) = currentSegmentValue while (digitsMilli < 6) { segments(6) *= 10 digitsMilli += 1 } - // We are truncating the nanosecond part, which results in loss of precision - while (digitsMilli > 6) { - segments(6) /= 10 - digitsMilli -= 1 - } // This step also validates time zone part val zoneId = tz.map { case "+" => ZoneOffset.ofHoursMinutes(segments(7), segments(8)) case "-" => ZoneOffset.ofHoursMinutes(-segments(7), -segments(8)) case zoneName: String => getZoneId(zoneName.trim) } + segments(0) *= yearSign.getOrElse(1) (segments, zoneId, justTime) } /** * Trims and parses a given UTF8 timestamp string to the corresponding a corresponding [[Long]] - * value. The return type is [[Option]] in order to distinguish between 0L and null. The following - * formats are allowed: - * - * `yyyy` - * `yyyy-[m]m` - * `yyyy-[m]m-[d]d` - * `yyyy-[m]m-[d]d ` - * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` - * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` - * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` - * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` - * - * where `zone_id` should have one of the forms: - * - Z - Zulu time zone UTC+0 - * - +|-[h]h:[m]m - * - A short id, see https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS - * - An id with one of the prefixes UTC+, UTC-, GMT+, GMT-, UT+ or UT-, - * and a suffix in the formats: - * - +|-h[h] - * - +|-hh[:]mm - * - +|-hh:mm:ss - * - +|-hhmmss - * - Region-based zone IDs in the form `area/city`, such as `Europe/Paris` + * value. The return type is [[Option]] in order to distinguish between 0L and null. Please + * refer to `parseTimestampString` for the allowed formats */ def stringToTimestamp(s: UTF8String, timeZoneId: ZoneId): Option[Long] = { try { @@ -422,30 +442,8 @@ object DateTimeUtils { * Trims and parses a given UTF8 string to a corresponding [[Long]] value which representing the * number of microseconds since the epoch. The result is independent of time zones, * which means that zone ID in the input string will be ignored. - * The return type is [[Option]] in order to distinguish between 0L and null. The following - * formats are allowed: - * - * `yyyy` - * `yyyy-[m]m` - * `yyyy-[m]m-[d]d` - * `yyyy-[m]m-[d]d ` - * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` - * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` - * - * where `zone_id` should have one of the forms: - * - Z - Zulu time zone UTC+0 - * - +|-[h]h:[m]m - * - A short id, see https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS - * - An id with one of the prefixes UTC+, UTC-, GMT+, GMT-, UT+ or UT-, - * and a suffix in the formats: - * - +|-h[h] - * - +|-hh[:]mm - * - +|-hh:mm:ss - * - +|-hhmmss - * - Region-based zone IDs in the form `area/city`, such as `Europe/Paris` - * - * Note: The input string has to contains year/month/day fields, otherwise Spark can't determine - * the value of timestamp without time zone. + * The return type is [[Option]] in order to distinguish between 0L and null. Please + * refer to `parseTimestampString` for the allowed formats. */ def stringToTimestampWithoutTimeZone(s: UTF8String): Option[Long] = { try { @@ -518,31 +516,42 @@ object DateTimeUtils { * The return type is [[Option]] in order to distinguish between 0 and null. The following * formats are allowed: * - * `yyyy` - * `yyyy-[m]m` - * `yyyy-[m]m-[d]d` - * `yyyy-[m]m-[d]d ` - * `yyyy-[m]m-[d]d *` - * `yyyy-[m]m-[d]dT*` + * `[+-]yyyy*` + * `[+-]yyyy*-[m]m` + * `[+-]yyyy*-[m]m-[d]d` + * `[+-]yyyy*-[m]m-[d]d ` + * `[+-]yyyy*-[m]m-[d]d *` + * `[+-]yyyy*-[m]m-[d]dT*` */ def stringToDate(s: UTF8String): Option[Int] = { - if (s == null) { + def isValidDigits(segment: Int, digits: Int): Boolean = { + // An integer is able to represent a date within [+-]5 million years. + var maxDigitsYear = 7 + (segment == 0 && digits >= 4 && digits <= maxDigitsYear) || (segment != 0 && digits <= 2) + } + if (s == null || s.trimAll().numBytes() == 0) { return None } val segments: Array[Int] = Array[Int](1, 1, 1) + var sign = 1 var i = 0 var currentSegmentValue = 0 + var currentSegmentDigits = 0 val bytes = s.trimAll().getBytes var j = 0 + if (bytes(j) == '-' || bytes(j) == '+') { + sign = if (bytes(j) == '-') -1 else 1 + j += 1 + } while (j < bytes.length && (i < 3 && !(bytes(j) == ' ' || bytes(j) == 'T'))) { val b = bytes(j) if (i < 2 && b == '-') { - if (i == 0 && j != 4) { - // year should have exact four digits + if (!isValidDigits(i, currentSegmentDigits)) { return None } segments(i) = currentSegmentValue currentSegmentValue = 0 + currentSegmentDigits = 0 i += 1 } else { val parsedValue = b - '0'.toByte @@ -550,12 +559,12 @@ object DateTimeUtils { return None } else { currentSegmentValue = currentSegmentValue * 10 + parsedValue + currentSegmentDigits += 1 } } j += 1 } - if (i == 0 && j != 4) { - // year should have exact four digits + if (!isValidDigits(i, currentSegmentDigits)) { return None } if (i < 2 && j < bytes.length) { @@ -564,7 +573,7 @@ object DateTimeUtils { } segments(i) = currentSegmentValue try { - val localDate = LocalDate.of(segments(0), segments(1), segments(2)) + val localDate = LocalDate.of(sign * segments(0), segments(1), segments(2)) Some(localDateToDays(localDate)) } catch { case NonFatal(_) => None diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala index a8c9dbc0e9..ac72aa5faf 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala @@ -392,8 +392,6 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { s"Cannot cast $str to DateType.") } - checkCastWithParseError("12345") - checkCastWithParseError("12345-12-18") checkCastWithParseError("2015-13-18") checkCastWithParseError("2015-03-128") checkCastWithParseError("2015/03/18") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index 38e012caa1..dcdc6f9c4d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -107,6 +107,12 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { test("cast string to date") { var c = Calendar.getInstance() + c.set(12345, 0, 1, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + checkEvaluation(Cast(Literal("12345"), DateType), new Date(c.getTimeInMillis)) + c.set(12345, 11, 18, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + checkEvaluation(Cast(Literal("12345-12-18"), DateType), new Date(c.getTimeInMillis)) c.set(2015, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 0) checkEvaluation(Cast(Literal("2015"), DateType), new Date(c.getTimeInMillis)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala index aa010ae229..216931e6ec 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala @@ -194,9 +194,11 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { // before epoch checkHiveHashForDateType("1800-01-01", -62091) + // negative year + checkHiveHashForDateType("-1212-01-01", -1162202) + // Invalid input: bad date string. Hive returns 0 for such cases intercept[NoSuchElementException](checkHiveHashForDateType("0-0-0", 0)) - intercept[NoSuchElementException](checkHiveHashForDateType("-1212-01-01", 0)) intercept[NoSuchElementException](checkHiveHashForDateType("2016-99-99", 0)) // Invalid input: Empty string. Hive returns 0 for this case diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index e6bf893cc3..2b7b94175b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -142,13 +142,31 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { assert(toDate("2015.03.18").isEmpty) assert(toDate("20150318").isEmpty) assert(toDate("2015-031-8").isEmpty) - assert(toDate("02015-03-18").isEmpty) assert(toDate("015-03-18").isEmpty) assert(toDate("015").isEmpty) - assert(toDate("02015").isEmpty) assert(toDate("1999 08 01").isEmpty) assert(toDate("1999-08 01").isEmpty) assert(toDate("1999 08").isEmpty) + assert(toDate("").isEmpty) + assert(toDate(" ").isEmpty) + } + + test("SPARK-35780: support full range of date string") { + assert(toDate("02015-03-18").get === days(2015, 3, 18)) + assert(toDate("02015").get === days(2015, 1, 1)) + assert(toDate("-02015").get === days(-2015, 1, 1)) + assert(toDate("999999-1-28").get === days(999999, 1, 28)) + assert(toDate("-999999-1-28").get === days(-999999, 1, 28)) + assert(toDate("0001-1-28").get === days(1, 1, 28)) + // Int.MaxValue and Int.MaxValue + 1 day + assert(toDate("5881580-7-11").get === days(5881580, 7, 11)) + assert(toDate("5881580-7-12").isEmpty) + // Int.MinValue and Int.MinValue - 1 day + assert(toDate("-5877641-6-23").get === days(-5877641, 6, 23)) + assert(toDate("-5877641-6-22").isEmpty) + // Check overflow of single segment in date format + assert(toDate("4294967297").isEmpty) + assert(toDate("2021-4294967297-11").isEmpty) } private def toTimestamp(str: String, zoneId: ZoneId): Option[Long] = { @@ -254,7 +272,6 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { checkStringToTimestamp("2011-05-06 07:08:09.1000", expected) checkStringToTimestamp("238", None) - checkStringToTimestamp("00238", None) checkStringToTimestamp("2015-03-18 123142", None) checkStringToTimestamp("2015-03-18T123123", None) checkStringToTimestamp("2015-03-18X", None) @@ -262,7 +279,6 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { checkStringToTimestamp("2015.03.18", None) checkStringToTimestamp("20150318", None) checkStringToTimestamp("2015-031-8", None) - checkStringToTimestamp("02015-01-18", None) checkStringToTimestamp("015-01-18", None) checkStringToTimestamp("2015-03-18T12:03.17-20:0", None) checkStringToTimestamp("2015-03-18T12:03.17-0:70", None) @@ -270,6 +286,9 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { checkStringToTimestamp("1999 08 01", None) checkStringToTimestamp("1999-08 01", None) checkStringToTimestamp("1999 08", None) + checkStringToTimestamp("", None) + checkStringToTimestamp(" ", None) + checkStringToTimestamp("+", None) // Truncating the fractional seconds expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = UTC)) @@ -283,6 +302,45 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { } } + test("SPARK-35780: support full range of timestamp string") { + def checkStringToTimestamp(str: String, expected: Option[Long]): Unit = { + assert(toTimestamp(str, UTC) === expected) + } + + checkStringToTimestamp("-1969-12-31 16:00:00", Option(date(-1969, 12, 31, 16, zid = UTC))) + checkStringToTimestamp("02015-03-18 16:00:00", Option(date(2015, 3, 18, 16, zid = UTC))) + checkStringToTimestamp("000001", Option(date(1, 1, 1, 0, zid = UTC))) + checkStringToTimestamp("-000001", Option(date(-1, 1, 1, 0, zid = UTC))) + checkStringToTimestamp("00238", Option(date(238, 1, 1, 0, zid = UTC))) + checkStringToTimestamp("99999-03-01T12:03:17", Option(date(99999, 3, 1, 12, 3, 17, zid = UTC))) + checkStringToTimestamp("+12:12:12", None) + checkStringToTimestamp("-12:12:12", None) + checkStringToTimestamp("", None) + checkStringToTimestamp(" ", None) + checkStringToTimestamp("+", None) + // Long.MaxValue and Long.MaxValue + 1 micro seconds + checkStringToTimestamp( + "294247-01-10T04:00:54.775807Z", + Option(date(294247, 1, 10, 4, 0, 54, 775807, zid = UTC))) + checkStringToTimestamp("294247-01-10T04:00:54.775808Z", None) + // Long.MinValue and Long.MinValue - 1 micro seconds + checkStringToTimestamp( + "-290308-12-21T19:59:05.224192Z", + Option(date(-290308, 12, 21, 19, 59, 5, 224192, zid = UTC))) + // Check overflow of single segment in timestamp format + checkStringToTimestamp("-290308-12-21T19:59:05.224191Z", None) + checkStringToTimestamp("4294967297", None) + checkStringToTimestamp("2021-4294967297-11", None) + checkStringToTimestamp("4294967297:30:00", None) + checkStringToTimestamp("2021-11-4294967297T12:30:00", None) + checkStringToTimestamp("2021-01-01T12:4294967297:00", None) + checkStringToTimestamp("2021-01-01T12:30:4294967297", None) + checkStringToTimestamp("2021-01-01T12:30:4294967297.123456", None) + checkStringToTimestamp("2021-01-01T12:30:4294967297+07:30", None) + checkStringToTimestamp("2021-01-01T12:30:4294967297UTC", None) + checkStringToTimestamp("2021-01-01T12:30:4294967297+4294967297:30", None) + } + test("SPARK-15379: special invalid date string") { // Test stringToDate assert(toDate("2015-02-29 00:00:00").isEmpty) diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index 74a451ec25..2c5a1aa445 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -278,3 +278,18 @@ SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 60.007); SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678); SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678, 'CET'); SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007); + +-- datetime with year outside [0000-9999] +select date'999999-03-18'; +select date'-0001-1-28'; +select date'0015'; +select cast('015' as date); +select cast('2021-4294967297-11' as date); + +select timestamp'-1969-12-31 16:00:00'; +select timestamp'0015-03-18 16:00:00'; +select timestamp'-000001'; +select timestamp'99999-03-18T12:03:17'; +select cast('4294967297' as timestamp); +select cast('2021-01-01T12:30:4294967297.123456' as timestamp); + diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index 4e999f3364..99b8d118d9 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 206 +-- Number of queries: 217 -- !query @@ -1748,3 +1748,95 @@ struct<> -- !query output java.time.DateTimeException The fraction of sec must be zero. Valid range is [0, 60]. + + +-- !query +select date'999999-03-18' +-- !query schema +struct +-- !query output ++999999-03-18 + + +-- !query +select date'-0001-1-28' +-- !query schema +struct +-- !query output +-0001-01-28 + + +-- !query +select date'0015' +-- !query schema +struct +-- !query output +0015-01-01 + + +-- !query +select cast('015' as date) +-- !query schema +struct<> +-- !query output +java.time.DateTimeException +Cannot cast 015 to DateType. + + +-- !query +select cast('2021-4294967297-11' as date) +-- !query schema +struct<> +-- !query output +java.time.DateTimeException +Cannot cast 2021-4294967297-11 to DateType. + + +-- !query +select timestamp'-1969-12-31 16:00:00' +-- !query schema +struct +-- !query output +-1969-12-31 16:00:00 + + +-- !query +select timestamp'0015-03-18 16:00:00' +-- !query schema +struct +-- !query output +0015-03-18 16:00:00 + + +-- !query +select timestamp'-000001' +-- !query schema +struct +-- !query output +-0001-01-01 00:00:00 + + +-- !query +select timestamp'99999-03-18T12:03:17' +-- !query schema +struct +-- !query output ++99999-03-18 12:03:17 + + +-- !query +select cast('4294967297' as timestamp) +-- !query schema +struct<> +-- !query output +java.time.DateTimeException +Cannot cast 4294967297 to TimestampType. + + +-- !query +select cast('2021-01-01T12:30:4294967297.123456' as timestamp) +-- !query schema +struct<> +-- !query output +java.time.DateTimeException +Cannot cast 2021-01-01T12:30:4294967297.123456 to TimestampType. diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index 21088080ec..624878185c 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 206 +-- Number of queries: 217 -- !query @@ -1689,3 +1689,91 @@ SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007) struct -- !query output NULL + + +-- !query +select date'999999-03-18' +-- !query schema +struct +-- !query output +999999-03-18 + + +-- !query +select date'-0001-1-28' +-- !query schema +struct +-- !query output +0002-01-28 + + +-- !query +select date'0015' +-- !query schema +struct +-- !query output +0015-01-01 + + +-- !query +select cast('015' as date) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast('2021-4294967297-11' as date) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select timestamp'-1969-12-31 16:00:00' +-- !query schema +struct +-- !query output +-1969-12-31 16:00:00 + + +-- !query +select timestamp'0015-03-18 16:00:00' +-- !query schema +struct +-- !query output +0015-03-18 16:00:00 + + +-- !query +select timestamp'-000001' +-- !query schema +struct +-- !query output +-0001-01-01 00:00:00 + + +-- !query +select timestamp'99999-03-18T12:03:17' +-- !query schema +struct +-- !query output ++99999-03-18 12:03:17 + + +-- !query +select cast('4294967297' as timestamp) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast('2021-01-01T12:30:4294967297.123456' as timestamp) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index f6278f618a..5882a585af 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 206 +-- Number of queries: 217 -- !query @@ -1697,3 +1697,91 @@ SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007) struct -- !query output NULL + + +-- !query +select date'999999-03-18' +-- !query schema +struct +-- !query output ++999999-03-18 + + +-- !query +select date'-0001-1-28' +-- !query schema +struct +-- !query output +-0001-01-28 + + +-- !query +select date'0015' +-- !query schema +struct +-- !query output +0015-01-01 + + +-- !query +select cast('015' as date) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast('2021-4294967297-11' as date) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select timestamp'-1969-12-31 16:00:00' +-- !query schema +struct +-- !query output +-1969-12-31 16:00:00 + + +-- !query +select timestamp'0015-03-18 16:00:00' +-- !query schema +struct +-- !query output +0015-03-18 16:00:00 + + +-- !query +select timestamp'-000001' +-- !query schema +struct +-- !query output +-0001-01-01 00:00:00 + + +-- !query +select timestamp'99999-03-18T12:03:17' +-- !query schema +struct +-- !query output ++99999-03-18 12:03:17 + + +-- !query +select cast('4294967297' as timestamp) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast('2021-01-01T12:30:4294967297.123456' as timestamp) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out index a959284750..8eee6a7822 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out @@ -454,29 +454,17 @@ struct -- !query SELECT date '5874897-12-31' -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -Cannot parse the DATE value: 5874897-12-31(line 1, pos 7) - -== SQL == -SELECT date '5874897-12-31' --------^^^ ++5874897-12-31 -- !query SELECT date '5874898-01-01' -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -Cannot parse the DATE value: 5874898-01-01(line 1, pos 7) - -== SQL == -SELECT date '5874898-01-01' --------^^^ ++5874898-01-01 -- !query @@ -584,7 +572,7 @@ select make_date(-44, 3, 15) -- !query schema struct -- !query output -0045-03-15 +-0044-03-15 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out index 69c4b8f90f..afccbcc7bc 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 206 +-- Number of queries: 217 -- !query @@ -1698,3 +1698,91 @@ SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007) struct -- !query output NULL + + +-- !query +select date'999999-03-18' +-- !query schema +struct +-- !query output ++999999-03-18 + + +-- !query +select date'-0001-1-28' +-- !query schema +struct +-- !query output +-0001-01-28 + + +-- !query +select date'0015' +-- !query schema +struct +-- !query output +0015-01-01 + + +-- !query +select cast('015' as date) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast('2021-4294967297-11' as date) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select timestamp'-1969-12-31 16:00:00' +-- !query schema +struct +-- !query output +-1969-12-31 16:00:00 + + +-- !query +select timestamp'0015-03-18 16:00:00' +-- !query schema +struct +-- !query output +0015-03-18 16:00:00 + + +-- !query +select timestamp'-000001' +-- !query schema +struct +-- !query output +-0001-01-01 00:00:00 + + +-- !query +select timestamp'99999-03-18T12:03:17' +-- !query schema +struct +-- !query output ++99999-03-18 12:03:17 + + +-- !query +select cast('4294967297' as timestamp) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast('2021-01-01T12:30:4294967297.123456' as timestamp) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index d1942936de..d5a34ae64a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -143,6 +143,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper protected override def sparkConf: SparkConf = super.sparkConf // Fewer shuffle partitions to speed up testing. .set(SQLConf.SHUFFLE_PARTITIONS, 4) + // use Java 8 time API to handle negative years properly + .set(SQLConf.DATETIME_JAVA8API_ENABLED, true) // SPARK-32106 Since we add SQL test 'transform.sql' will use `cat` command, // here we need to ignore it. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index b707a48413..dab1255eea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -21,7 +21,7 @@ import java.io._ import java.nio.charset.{Charset, StandardCharsets, UnsupportedCharsetException} import java.nio.file.Files import java.sql.{Date, Timestamp} -import java.time.{LocalDate, ZoneId} +import java.time.ZoneId import java.util.Locale import com.fasterxml.jackson.core.JsonFactory @@ -1510,8 +1510,7 @@ abstract class JsonSuite """{"col0":"Spark 1.3.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" :: """{"col0":"Spark 1.4.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" :: """{"col0":"Spark 1.4.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" :: - """{"col0":"Spark 1.5.0","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" :: - """{"col0":"Spark 1.5.0","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"16436","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" :: Nil + """{"col0":"Spark 1.5.0","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" :: Nil // scalastyle:on // Generate data for the current version. @@ -1538,7 +1537,6 @@ abstract class JsonSuite "Spark 1.4.1", "Spark 1.4.1", "Spark 1.5.0", - "Spark 1.5.0", "Spark " + spark.sparkContext.version, "Spark " + spark.sparkContext.version) val expectedResult = col0Values.map { v => @@ -2684,16 +2682,13 @@ abstract class JsonSuite } test("SPARK-30960, SPARK-31641: parse date/timestamp string with legacy format") { - val julianDay = -141704 // 1582-01-01 in Julian calendar val ds = Seq( - s"{'t': '2020-1-12 3:23:34.12', 'd': '2020-1-12 T', 'd2': '12345', 'd3': '$julianDay'}" + s"{'t': '2020-1-12 3:23:34.12', 'd': '2020-1-12 T'}" ).toDS() - val json = spark.read.schema("t timestamp, d date, d2 date, d3 date").json(ds) + val json = spark.read.schema("t timestamp, d date").json(ds) checkAnswer(json, Row( Timestamp.valueOf("2020-1-12 3:23:34.12"), - Date.valueOf("2020-1-12"), - Date.valueOf(LocalDate.ofEpochDay(12345)), - Date.valueOf("1582-01-01"))) + Date.valueOf("2020-1-12"))) } test("exception mode for parsing date/timestamp string") { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala index f405461229..6df6abbcd1 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala @@ -83,6 +83,9 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ "postgreSQL/case.sql", // SPARK-28624 "date.sql", + "datetime.sql", + "datetime-legacy.sql", + "ansi/datetime.sql", // SPARK-28620 "postgreSQL/float4.sql", // SPARK-28636