diff --git a/docs/sql-ref-datetime-pattern.md b/docs/sql-ref-datetime-pattern.md index 48e85b450e..865b9470ed 100644 --- a/docs/sql-ref-datetime-pattern.md +++ b/docs/sql-ref-datetime-pattern.md @@ -74,7 +74,7 @@ The count of pattern letters determines the format. For formatting, the fraction length would be padded to the number of contiguous 'S' with zeros. Spark supports datetime of micro-of-second precision, which has up to 6 significant digits, but can parse nano-of-second with exceeded part truncated. -- Year: The count of letters determines the minimum field width below which padding is used. If the count of letters is two, then a reduced two digit form is used. For printing, this outputs the rightmost two digits. For parsing, this will parse using the base value of 2000, resulting in a year within the range 2000 to 2099 inclusive. If the count of letters is less than four (but not two), then the sign is only output for negative years. Otherwise, the sign is output if the pad width is exceeded when 'G' is not present. +- Year: The count of letters determines the minimum field width below which padding is used. If the count of letters is two, then a reduced two digit form is used. For printing, this outputs the rightmost two digits. For parsing, this will parse using the base value of 2000, resulting in a year within the range 2000 to 2099 inclusive. If the count of letters is less than four (but not two), then the sign is only output for negative years. Otherwise, the sign is output if the pad width is exceeded when 'G' is not present. 11 or more letters will fail. - Month: It follows the rule of Number/Text. The text form is depend on letters - 'M' denotes the 'standard' form, and 'L' is for 'stand-alone' form. These two forms are different only in some certain languages. For example, in Russian, 'Июль' is the stand-alone form of July, and 'Июля' is the standard form. Here are examples for all supported pattern letters: - `'M'` or `'L'`: Month number in a year starting from 1. There is no difference between 'M' and 'L'. Month from 1 to 9 are printed without padding. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index 353c074caa..5b9d839653 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -227,8 +227,16 @@ private object DateTimeFormatterHelper { formatter.format(LocalDate.of(2000, 1, 1)) == "1 1" } final val unsupportedLetters = Set('A', 'c', 'e', 'n', 'N', 'p') - final val unsupportedNarrowTextStyle = - Seq("G", "M", "L", "E", "u", "Q", "q").map(_ * 5).toSet + final val unsupportedPatternLengths = { + // SPARK-31771: Disable Narrow-form TextStyle to avoid silent data change, as it is Full-form in + // 2.4 + Seq("G", "M", "L", "E", "u", "Q", "q").map(_ * 5) ++ + // SPARK-31867: Disable year pattern longer than 10 which will cause Java time library throw + // unchecked `ArrayIndexOutOfBoundsException` by the `NumberPrinterParser` for formatting. It + // makes the call side difficult to handle exceptions and easily leads to silent data change + // because of the exceptions being suppressed. + Seq("y", "Y").map(_ * 11) + }.toSet /** * In Spark 3.0, we switch to the Proleptic Gregorian calendar and use DateTimeFormatter for @@ -250,7 +258,7 @@ private object DateTimeFormatterHelper { for (c <- patternPart if unsupportedLetters.contains(c)) { throw new IllegalArgumentException(s"Illegal pattern character: $c") } - for (style <- unsupportedNarrowTextStyle if patternPart.contains(style)) { + for (style <- unsupportedPatternLengths if patternPart.contains(style)) { throw new IllegalArgumentException(s"Too many pattern letters: ${style.head}") } if (bugInStandAloneForm && (patternPart.contains("LLL") || patternPart.contains("qqq"))) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala index caf7bdde10..18acaafc6d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala @@ -40,7 +40,7 @@ class DateTimeFormatterHelperSuite extends SparkFunSuite { val e = intercept[IllegalArgumentException](convertIncompatiblePattern(s"yyyy-MM-dd $l G")) assert(e.getMessage === s"Illegal pattern character: $l") } - unsupportedNarrowTextStyle.foreach { style => + unsupportedPatternLengths.foreach { style => val e1 = intercept[IllegalArgumentException] { convertIncompatiblePattern(s"yyyy-MM-dd $style") } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala index 7ff9b46bc6..aeb238e75f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala @@ -406,8 +406,9 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers intercept[IllegalArgumentException](TimestampFormatter(pattern, UTC).format(0)) } // supported by the legacy one, then we will suggest users with SparkUpgradeException - Seq("GGGGG", "MMMMM", "LLLLL", "EEEEE", "uuuuu", "aa", "aaa").foreach { pattern => - intercept[SparkUpgradeException](TimestampFormatter(pattern, UTC).format(0)) + Seq("GGGGG", "MMMMM", "LLLLL", "EEEEE", "uuuuu", "aa", "aaa", "y" * 11, "y" * 11) + .foreach { pattern => + intercept[SparkUpgradeException](TimestampFormatter(pattern, UTC).format(0)) } } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index 663c62f1a6..9bd936f6f4 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -160,3 +160,7 @@ select from_json('{"time":"26/October/2015"}', 'time Timestamp', map('timestampF select from_json('{"date":"26/October/2015"}', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy')); select from_csv('26/October/2015', 'time Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy')); select from_csv('26/October/2015', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy')); + +select from_unixtime(1, 'yyyyyyyyyyy-MM-dd'); +select date_format(timestamp '2018-11-17 13:33:33', 'yyyyyyyyyy-MM-dd HH:mm:ss'); +select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd'); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index 5857a0ac90..ca04b008d6 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 116 +-- Number of queries: 119 -- !query @@ -999,3 +999,29 @@ struct<> -- !query output org.apache.spark.SparkUpgradeException You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select from_unixtime(1, 'yyyyyyyyyyy-MM-dd') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format(timestamp '2018-11-17 13:33:33', 'yyyyyyyyyy-MM-dd HH:mm:ss') +-- !query schema +struct +-- !query output +0000002018-11-17 13:33:33 + + +-- !query +select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index 8a726efafa..fe932d3a70 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 116 +-- Number of queries: 119 -- !query @@ -956,3 +956,27 @@ select from_csv('26/October/2015', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy struct> -- !query output {"date":2015-10-26} + + +-- !query +select from_unixtime(1, 'yyyyyyyyyyy-MM-dd') +-- !query schema +struct +-- !query output +00000001969-12-31 + + +-- !query +select date_format(timestamp '2018-11-17 13:33:33', 'yyyyyyyyyy-MM-dd HH:mm:ss') +-- !query schema +struct +-- !query output +0000002018-11-17 13:33:33 + + +-- !query +select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd') +-- !query schema +struct +-- !query output +00000002018-11-17 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 7cacaec42c..06a41da267 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 116 +-- Number of queries: 119 -- !query @@ -971,3 +971,29 @@ struct<> -- !query output org.apache.spark.SparkUpgradeException You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select from_unixtime(1, 'yyyyyyyyyyy-MM-dd') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format(timestamp '2018-11-17 13:33:33', 'yyyyyyyyyy-MM-dd HH:mm:ss') +-- !query schema +struct +-- !query output +0000002018-11-17 13:33:33 + + +-- !query +select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html