From 31ae446e9c0be4dff2b75e510a2e1b65773d757e Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 2 Nov 2019 21:35:56 +0800 Subject: [PATCH] [SPARK-29623][SQL] do not allow multiple unit TO unit statements in interval literal syntax ### What changes were proposed in this pull request? re-arrange the parser rules to make it clear that multiple unit TO unit statement like `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH` is not allowed. ### Why are the changes needed? This is definitely an accident that we support such a weird syntax in the past. It's not supported by any other DBs and I can't think of any use case of it. Also no test covers this syntax in the current codebase. ### Does this PR introduce any user-facing change? Yes, and a migration guide item is added. ### How was this patch tested? new tests. Closes #26285 from cloud-fan/syntax. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- docs/sql-migration-guide.md | 6 +- .../spark/sql/catalyst/parser/SqlBase.g4 | 22 +- .../sql/catalyst/parser/AstBuilder.scala | 150 +++++----- .../parser/ExpressionParserSuite.scala | 7 +- .../resources/sql-tests/inputs/literals.sql | 23 ++ .../sql-tests/results/literals.sql.out | 278 +++++++++++++++++- .../org/apache/spark/sql/SQLQuerySuite.scala | 11 - 7 files changed, 406 insertions(+), 91 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index d03ca663e8..a97a4b04de 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -9,9 +9,9 @@ license: | The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - + http://www.apache.org/licenses/LICENSE-2.0 - + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -218,6 +218,8 @@ license: | - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. + - Since Spark 3.0, the interval literal syntax does not allow multiple from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception. + ## Upgrading from Spark SQL 2.4 to 2.4.1 - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 96d1e42ffa..a9e3ca6549 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -80,7 +80,7 @@ singleTableSchema ; singleInterval - : INTERVAL? (intervalValue intervalUnit)+ EOF + : INTERVAL? multiUnitsInterval EOF ; statement @@ -759,12 +759,24 @@ booleanValue ; interval - : {ansi}? INTERVAL? intervalField+ - | {!ansi}? INTERVAL intervalField* + : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)? + | {ansi}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) ; -intervalField - : value=intervalValue unit=intervalUnit (TO to=intervalUnit)? +errorCapturingMultiUnitsInterval + : multiUnitsInterval unitToUnitInterval? + ; + +multiUnitsInterval + : (intervalValue intervalUnit)+ + ; + +errorCapturingUnitToUnitInterval + : body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)? + ; + +unitToUnitInterval + : value=intervalValue from=intervalUnit TO to=intervalUnit ; intervalValue diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 838fc4d84a..3de13abf54 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -102,20 +102,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } override def visitSingleInterval(ctx: SingleIntervalContext): CalendarInterval = { - withOrigin(ctx) { - val units = ctx.intervalUnit().asScala.map { - u => normalizeInternalUnit(u.getText.toLowerCase(Locale.ROOT)) - }.toArray - val values = ctx.intervalValue().asScala.map(getIntervalValue).toArray - try { - IntervalUtils.fromUnitStrings(units, values) - } catch { - case i: IllegalArgumentException => - val e = new ParseException(i.getMessage, ctx) - e.setStackTrace(i.getStackTrace) - throw e - } - } + withOrigin(ctx)(visitMultiUnitsInterval(ctx.multiUnitsInterval)) } /* ******************************************************************************************** @@ -1940,71 +1927,102 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Create a [[CalendarInterval]] literal expression. An interval expression can contain multiple - * unit value pairs, for instance: interval 2 months 2 days. + * Create a [[CalendarInterval]] literal expression. Two syntaxes are supported: + * - multiple unit value pairs, for instance: interval 2 months 2 days. + * - from-to unit, for instance: interval '1-2' year to month. */ override def visitInterval(ctx: IntervalContext): Literal = withOrigin(ctx) { - val intervals = ctx.intervalField.asScala.map(visitIntervalField) - validate(intervals.nonEmpty, "at least one time unit should be given for interval literal", ctx) - Literal(intervals.reduce(_.add(_))) + if (ctx.errorCapturingMultiUnitsInterval != null) { + val innerCtx = ctx.errorCapturingMultiUnitsInterval + if (innerCtx.unitToUnitInterval != null) { + throw new ParseException( + "Can only have a single from-to unit in the interval literal syntax", + innerCtx.unitToUnitInterval) + } + Literal(visitMultiUnitsInterval(innerCtx.multiUnitsInterval), CalendarIntervalType) + } else if (ctx.errorCapturingUnitToUnitInterval != null) { + val innerCtx = ctx.errorCapturingUnitToUnitInterval + if (innerCtx.error1 != null || innerCtx.error2 != null) { + val errorCtx = if (innerCtx.error1 != null) innerCtx.error1 else innerCtx.error2 + throw new ParseException( + "Can only have a single from-to unit in the interval literal syntax", + errorCtx) + } + Literal(visitUnitToUnitInterval(innerCtx.body), CalendarIntervalType) + } else { + throw new ParseException("at least one time unit should be given for interval literal", ctx) + } } /** - * Create a [[CalendarInterval]] for a unit value pair. Two unit configuration types are - * supported: - * - Single unit. - * - From-To unit ('YEAR TO MONTH', 'DAY TO HOUR', 'DAY TO MINUTE', 'DAY TO SECOND', - * 'HOUR TO MINUTE', 'HOUR TO SECOND' and 'MINUTE TO SECOND' are supported). + * Creates a [[CalendarInterval]] with multiple unit value pairs, e.g. 1 YEAR 2 DAYS. */ - override def visitIntervalField(ctx: IntervalFieldContext): CalendarInterval = withOrigin(ctx) { - import ctx._ - val s = getIntervalValue(value) - try { - val unitText = unit.getText.toLowerCase(Locale.ROOT) - val interval = (unitText, Option(to).map(_.getText.toLowerCase(Locale.ROOT))) match { - case (u, None) => - IntervalUtils.fromUnitStrings(Array(normalizeInternalUnit(u)), Array(s)) - case ("year", Some("month")) => - IntervalUtils.fromYearMonthString(s) - case ("day", Some("hour")) => - IntervalUtils.fromDayTimeString(s, "day", "hour") - case ("day", Some("minute")) => - IntervalUtils.fromDayTimeString(s, "day", "minute") - case ("day", Some("second")) => - IntervalUtils.fromDayTimeString(s, "day", "second") - case ("hour", Some("minute")) => - IntervalUtils.fromDayTimeString(s, "hour", "minute") - case ("hour", Some("second")) => - IntervalUtils.fromDayTimeString(s, "hour", "second") - case ("minute", Some("second")) => - IntervalUtils.fromDayTimeString(s, "minute", "second") - case (from, Some(t)) => - throw new ParseException(s"Intervals FROM $from TO $t are not supported.", ctx) + override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext): CalendarInterval = { + withOrigin(ctx) { + val units = ctx.intervalUnit().asScala.map { unit => + val u = unit.getText.toLowerCase(Locale.ROOT) + // Handle plural forms, e.g: yearS/monthS/weekS/dayS/hourS/minuteS/hourS/... + if (u.endsWith("s")) u.substring(0, u.length - 1) else u + }.toArray + + val values = ctx.intervalValue().asScala.map { value => + if (value.STRING() != null) { + string(value.STRING()) + } else { + value.getText + } + }.toArray + + try { + IntervalUtils.fromUnitStrings(units, values) + } catch { + case i: IllegalArgumentException => + val e = new ParseException(i.getMessage, ctx) + e.setStackTrace(i.getStackTrace) + throw e } - validate(interval != null, "No interval can be constructed", ctx) - interval - } catch { - // Handle Exceptions thrown by CalendarInterval - case e: IllegalArgumentException => - val pe = new ParseException(e.getMessage, ctx) - pe.setStackTrace(e.getStackTrace) - throw pe } } - private def getIntervalValue(value: IntervalValueContext): String = { - if (value.STRING() != null) { - string(value.STRING()) - } else { - value.getText + /** + * Creates a [[CalendarInterval]] with from-to unit, e.g. '2-1' YEAR TO MONTH. + */ + override def visitUnitToUnitInterval(ctx: UnitToUnitIntervalContext): CalendarInterval = { + withOrigin(ctx) { + val value = Option(ctx.intervalValue.STRING).map(string).getOrElse { + throw new ParseException("The value of from-to unit must be a string", ctx.intervalValue) + } + try { + val from = ctx.from.getText.toLowerCase(Locale.ROOT) + val to = ctx.to.getText.toLowerCase(Locale.ROOT) + (from, to) match { + case ("year", "month") => + IntervalUtils.fromYearMonthString(value) + case ("day", "hour") => + IntervalUtils.fromDayTimeString(value, "day", "hour") + case ("day", "minute") => + IntervalUtils.fromDayTimeString(value, "day", "minute") + case ("day", "second") => + IntervalUtils.fromDayTimeString(value, "day", "second") + case ("hour", "minute") => + IntervalUtils.fromDayTimeString(value, "hour", "minute") + case ("hour", "second") => + IntervalUtils.fromDayTimeString(value, "hour", "second") + case ("minute", "second") => + IntervalUtils.fromDayTimeString(value, "minute", "second") + case _ => + throw new ParseException(s"Intervals FROM $from TO $to are not supported.", ctx) + } + } catch { + // Handle Exceptions thrown by CalendarInterval + case e: IllegalArgumentException => + val pe = new ParseException(e.getMessage, ctx) + pe.setStackTrace(e.getStackTrace) + throw pe + } } } - // Handle plural forms, e.g: yearS/monthS/weekS/dayS/hourS/minuteS/hourS/... - private def normalizeInternalUnit(s: String): String = { - if (s.endsWith("s")) s.substring(0, s.length - 1) else s - } - /* ******************************************************************************************** * DataType parsing * ******************************************************************************************** */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index d675c7c483..859a20e4cb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -644,7 +644,7 @@ class ExpressionParserSuite extends AnalysisTest { // Non Existing unit intercept("interval 10 nanoseconds", - "no viable alternative at input 'interval 10 nanoseconds'") + "no viable alternative at input '10 nanoseconds'") // Year-Month intervals. val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0") @@ -679,16 +679,13 @@ class ExpressionParserSuite extends AnalysisTest { } // Unknown FROM TO intervals - intercept("interval 10 month to second", + intercept("interval '10' month to second", "Intervals FROM month TO second are not supported.") // Composed intervals. checkIntervals( "3 months 4 days 22 seconds 1 millisecond", Literal(new CalendarInterval(3, 4, 22001000L))) - checkIntervals( - "3 years '-1-10' year to month 3 weeks '1 0:0:2' day to second", - Literal(new CalendarInterval(14, 22, 2 * CalendarInterval.MICROS_PER_SECOND))) } test("SPARK-23264 Interval Compatibility tests") { diff --git a/sql/core/src/test/resources/sql-tests/inputs/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/literals.sql index 389b9cc53e..3c3be2913a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/literals.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/literals.sql @@ -130,3 +130,26 @@ select interval '3 year 1 hour'; select integer '7'; select integer'7'; select integer '2147483648'; + +-- malformed interval literal +select interval; +select interval 1 fake_unit; +select interval 1 year to month; +select interval '1' year to second; +select interval '10-9' year to month '2-1' year to month; +select interval '10-9' year to month '12:11:10' hour to second; +select interval '1 15:11' day to minute '12:11:10' hour to second; +select interval 1 year '2-1' year to month; +select interval 1 year '12:11:10' hour to second; +select interval '10-9' year to month '1' year; +select interval '12:11:10' hour to second '1' year; +-- malformed interval literal with ansi mode +SET spark.sql.ansi.enabled=true; +select interval; +select interval 1 fake_unit; +select interval 1 year to month; +select 1 year to month; +select interval '1' year to second; +select '1' year to second; +select interval 1 year '2-1' year to month; +select 1 year '2-1' year to month; diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out index b7a0dcaa1e..c5981414a2 100644 --- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 62 +-- Number of queries: 82 -- !query 0 @@ -447,7 +447,7 @@ struct<> -- !query 49 output org.apache.spark.sql.catalyst.parser.ParseException -no viable alternative at input 'interval 10 nanoseconds'(line 1, pos 19) +no viable alternative at input '10 nanoseconds'(line 1, pos 19) == SQL == select interval 10 nanoseconds @@ -572,3 +572,277 @@ Cannot parse the Int value: 2147483648, java.lang.NumberFormatException: For inp == SQL == select integer '2147483648' -------^^^ + + +-- !query 62 +select interval +-- !query 62 schema +struct<> +-- !query 62 output +org.apache.spark.sql.catalyst.parser.ParseException + +at least one time unit should be given for interval literal(line 1, pos 7) + +== SQL == +select interval +-------^^^ + + +-- !query 63 +select interval 1 fake_unit +-- !query 63 schema +struct<> +-- !query 63 output +org.apache.spark.sql.catalyst.parser.ParseException + +no viable alternative at input '1 fake_unit'(line 1, pos 18) + +== SQL == +select interval 1 fake_unit +------------------^^^ + + +-- !query 64 +select interval 1 year to month +-- !query 64 schema +struct<> +-- !query 64 output +org.apache.spark.sql.catalyst.parser.ParseException + +The value of from-to unit must be a string(line 1, pos 16) + +== SQL == +select interval 1 year to month +----------------^^^ + + +-- !query 65 +select interval '1' year to second +-- !query 65 schema +struct<> +-- !query 65 output +org.apache.spark.sql.catalyst.parser.ParseException + +Intervals FROM year TO second are not supported.(line 1, pos 16) + +== SQL == +select interval '1' year to second +----------------^^^ + + +-- !query 66 +select interval '10-9' year to month '2-1' year to month +-- !query 66 schema +struct<> +-- !query 66 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single from-to unit in the interval literal syntax(line 1, pos 37) + +== SQL == +select interval '10-9' year to month '2-1' year to month +-------------------------------------^^^ + + +-- !query 67 +select interval '10-9' year to month '12:11:10' hour to second +-- !query 67 schema +struct<> +-- !query 67 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single from-to unit in the interval literal syntax(line 1, pos 37) + +== SQL == +select interval '10-9' year to month '12:11:10' hour to second +-------------------------------------^^^ + + +-- !query 68 +select interval '1 15:11' day to minute '12:11:10' hour to second +-- !query 68 schema +struct<> +-- !query 68 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single from-to unit in the interval literal syntax(line 1, pos 40) + +== SQL == +select interval '1 15:11' day to minute '12:11:10' hour to second +----------------------------------------^^^ + + +-- !query 69 +select interval 1 year '2-1' year to month +-- !query 69 schema +struct<> +-- !query 69 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single from-to unit in the interval literal syntax(line 1, pos 23) + +== SQL == +select interval 1 year '2-1' year to month +-----------------------^^^ + + +-- !query 70 +select interval 1 year '12:11:10' hour to second +-- !query 70 schema +struct<> +-- !query 70 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single from-to unit in the interval literal syntax(line 1, pos 23) + +== SQL == +select interval 1 year '12:11:10' hour to second +-----------------------^^^ + + +-- !query 71 +select interval '10-9' year to month '1' year +-- !query 71 schema +struct<> +-- !query 71 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single from-to unit in the interval literal syntax(line 1, pos 37) + +== SQL == +select interval '10-9' year to month '1' year +-------------------------------------^^^ + + +-- !query 72 +select interval '12:11:10' hour to second '1' year +-- !query 72 schema +struct<> +-- !query 72 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single from-to unit in the interval literal syntax(line 1, pos 42) + +== SQL == +select interval '12:11:10' hour to second '1' year +------------------------------------------^^^ + + +-- !query 73 +SET spark.sql.ansi.enabled=true +-- !query 73 schema +struct +-- !query 73 output +spark.sql.ansi.enabled true + + +-- !query 74 +select interval +-- !query 74 schema +struct<> +-- !query 74 output +org.apache.spark.sql.catalyst.parser.ParseException + +at least one time unit should be given for interval literal(line 1, pos 7) + +== SQL == +select interval +-------^^^ + + +-- !query 75 +select interval 1 fake_unit +-- !query 75 schema +struct<> +-- !query 75 output +org.apache.spark.sql.catalyst.parser.ParseException + +no viable alternative at input '1 fake_unit'(line 1, pos 18) + +== SQL == +select interval 1 fake_unit +------------------^^^ + + +-- !query 76 +select interval 1 year to month +-- !query 76 schema +struct<> +-- !query 76 output +org.apache.spark.sql.catalyst.parser.ParseException + +The value of from-to unit must be a string(line 1, pos 16) + +== SQL == +select interval 1 year to month +----------------^^^ + + +-- !query 77 +select 1 year to month +-- !query 77 schema +struct<> +-- !query 77 output +org.apache.spark.sql.catalyst.parser.ParseException + +The value of from-to unit must be a string(line 1, pos 7) + +== SQL == +select 1 year to month +-------^^^ + + +-- !query 78 +select interval '1' year to second +-- !query 78 schema +struct<> +-- !query 78 output +org.apache.spark.sql.catalyst.parser.ParseException + +Intervals FROM year TO second are not supported.(line 1, pos 16) + +== SQL == +select interval '1' year to second +----------------^^^ + + +-- !query 79 +select '1' year to second +-- !query 79 schema +struct<> +-- !query 79 output +org.apache.spark.sql.catalyst.parser.ParseException + +Intervals FROM year TO second are not supported.(line 1, pos 7) + +== SQL == +select '1' year to second +-------^^^ + + +-- !query 80 +select interval 1 year '2-1' year to month +-- !query 80 schema +struct<> +-- !query 80 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single from-to unit in the interval literal syntax(line 1, pos 23) + +== SQL == +select interval 1 year '2-1' year to month +-----------------------^^^ + + +-- !query 81 +select 1 year '2-1' year to month +-- !query 81 schema +struct<> +-- !query 81 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single from-to unit in the interval literal syntax(line 1, pos 14) + +== SQL == +select 1 year '2-1' year to month +--------------^^^ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 941304cb25..cbf4d1a2ad 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1564,17 +1564,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession { } e.message.contains("Cannot save interval data type into external storage") }) - - val e1 = intercept[AnalysisException] { - sql("select interval") - } - assert(e1.message.contains("at least one time unit should be given for interval literal")) - - // Currently we don't yet support nanosecond - val e2 = intercept[AnalysisException] { - sql("select interval 23 nanosecond") - } - assert(e2.message.contains("no viable alternative at input 'interval 23 nanosecond'")) } test("SPARK-8945: add and subtract expressions for interval type") {