diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 0e03eac410..de60aed748 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -40,6 +40,8 @@ license: | - In Spark 3.1, `path` option cannot coexist when the following methods are called with path parameter(s): `DataFrameReader.load()`, `DataFrameWriter.save()`, `DataStreamReader.load()`, or `DataStreamWriter.start()`. In addition, `paths` option cannot coexist for `DataFrameReader.load()`. For example, `spark.read.format("csv").option("path", "/tmp").load("/tmp2")` or `spark.read.option("path", "/tmp").csv("/tmp2")` will throw `org.apache.spark.sql.AnalysisException`. In Spark version 3.0 and below, `path` option is overwritten if one path parameter is passed to above methods; `path` option is added to the overall paths if multiple path parameters are passed to `DataFrameReader.load()`. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.pathOptionBehavior.enabled` to `true`. + - In Spark 3.1, incomplete interval literals, e.g. `INTERVAL '1'`, `INTERVAL '1 DAY 2'` will fail with IllegalArgumentException. In Spark 3.0, they result `NULL`s. + ## Upgrading from Spark SQL 3.0 to 3.0.1 - In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index b1d4c119fa..f716ca1777 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -736,7 +736,10 @@ object IntervalUtils { val result = state match { case UNIT_SUFFIX | UNIT_END | TRIM_BEFORE_SIGN => new CalendarInterval(months, days, microseconds) - case _ => null + case TRIM_BEFORE_VALUE => throwIAE(s"expect a number after '$currentWord' but hit EOL") + case VALUE | VALUE_FRACTIONAL_PART => + throwIAE(s"expect a unit name after '$currentWord' but hit EOL") + case _ => throwIAE(s"unknown error when parsing '$currentWord'") } result diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala index d560d1048b..c313b54687 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala @@ -77,6 +77,19 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper { } } + test("string to interval: interval with dangling parts should not results null") { + checkFromInvalidString("+", "expect a number after '+' but hit EOL") + checkFromInvalidString("-", "expect a number after '-' but hit EOL") + checkFromInvalidString("+ 2", "expect a unit name after '2' but hit EOL") + checkFromInvalidString("- 1", "expect a unit name after '1' but hit EOL") + checkFromInvalidString("1", "expect a unit name after '1' but hit EOL") + checkFromInvalidString("1.2", "expect a unit name after '1.2' but hit EOL") + checkFromInvalidString("1 day 2", "expect a unit name after '2' but hit EOL") + checkFromInvalidString("1 day 2.2", "expect a unit name after '2.2' but hit EOL") + checkFromInvalidString("1 day -", "expect a number after '-' but hit EOL") + checkFromInvalidString("-.", "expect a unit name after '-.' but hit EOL") + } + test("string to interval: multiple units") { Seq( "-1 MONTH 1 day -1 microseconds" -> new CalendarInterval(-1, 1, -1), diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index 2f6a2c9fef..e925c4508f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -198,3 +198,11 @@ SELECT to_json(from_json('{"a":"1 days"}', 'a interval')), to_json(map('a', interval 25 month 100 day 130 minute)), from_json(to_json(map('a', interval 25 month 100 day 130 minute)), 'a interval'); + +select interval '+'; +select interval '+.'; +select interval '1'; +select interval '1.2'; +select interval '- 2'; +select interval '1 day -'; +select interval '1 day 1'; diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index cf8a72ed01..33d918bbeb 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 100 +-- Number of queries: 107 -- !query @@ -1024,3 +1024,101 @@ SELECT struct,to_json(from_json({"a":"1 days"})):string,to_json(map(a, INTERVAL '2 years 1 months 100 days 2 hours 10 minutes')):string,from_json(to_json(map(a, INTERVAL '2 years 1 months 100 days 2 hours 10 minutes'))):struct> -- !query output {"a":1 days} {"a":"1 days"} {"a":"2 years 1 months 100 days 2 hours 10 minutes"} {"a":2 years 1 months 100 days 2 hours 10 minutes} + + +-- !query +select interval '+' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: +(line 1, pos 7) + +== SQL == +select interval '+' +-------^^^ + + +-- !query +select interval '+.' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: +.(line 1, pos 7) + +== SQL == +select interval '+.' +-------^^^ + + +-- !query +select interval '1' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: 1(line 1, pos 7) + +== SQL == +select interval '1' +-------^^^ + + +-- !query +select interval '1.2' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: 1.2(line 1, pos 7) + +== SQL == +select interval '1.2' +-------^^^ + + +-- !query +select interval '- 2' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: - 2(line 1, pos 7) + +== SQL == +select interval '- 2' +-------^^^ + + +-- !query +select interval '1 day -' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: 1 day -(line 1, pos 7) + +== SQL == +select interval '1 day -' +-------^^^ + + +-- !query +select interval '1 day 1' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: 1 day 1(line 1, pos 7) + +== SQL == +select interval '1 day 1' +-------^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 99b2a6ff7e..898be09a40 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 100 +-- Number of queries: 107 -- !query @@ -1012,3 +1012,101 @@ SELECT struct,to_json(from_json({"a":"1 days"})):string,to_json(map(a, INTERVAL '2 years 1 months 100 days 2 hours 10 minutes')):string,from_json(to_json(map(a, INTERVAL '2 years 1 months 100 days 2 hours 10 minutes'))):struct> -- !query output {"a":1 days} {"a":"1 days"} {"a":"2 years 1 months 100 days 2 hours 10 minutes"} {"a":2 years 1 months 100 days 2 hours 10 minutes} + + +-- !query +select interval '+' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: +(line 1, pos 7) + +== SQL == +select interval '+' +-------^^^ + + +-- !query +select interval '+.' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: +.(line 1, pos 7) + +== SQL == +select interval '+.' +-------^^^ + + +-- !query +select interval '1' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: 1(line 1, pos 7) + +== SQL == +select interval '1' +-------^^^ + + +-- !query +select interval '1.2' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: 1.2(line 1, pos 7) + +== SQL == +select interval '1.2' +-------^^^ + + +-- !query +select interval '- 2' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: - 2(line 1, pos 7) + +== SQL == +select interval '- 2' +-------^^^ + + +-- !query +select interval '1 day -' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: 1 day -(line 1, pos 7) + +== SQL == +select interval '1 day -' +-------^^^ + + +-- !query +select interval '1 day 1' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot parse the INTERVAL value: 1 day 1(line 1, pos 7) + +== SQL == +select interval '1 day 1' +-------^^^