[SPARK-29623][SQL] do not allow multiple unit TO unit statements in interval literal syntax

### What changes were proposed in this pull request?

re-arrange the parser rules to make it clear that multiple unit TO unit statement like `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH` is not allowed.

### Why are the changes needed?

This is definitely an accident that we support such a weird syntax in the past. It's not supported by any other DBs and I can't think of any use case of it. Also no test covers this syntax in the current codebase.

### Does this PR introduce any user-facing change?

Yes, and a migration guide item is added.

### How was this patch tested?

new tests.

Closes #26285 from cloud-fan/syntax.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Wenchen Fan 2019-11-02 21:35:56 +08:00
parent 28ccd31aee
commit 31ae446e9c
7 changed files with 406 additions and 91 deletions

View file

@ -9,9 +9,9 @@ license: |
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -218,6 +218,8 @@ license: |
- Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`.
- Since Spark 3.0, the interval literal syntax does not allow multiple from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception.
## Upgrading from Spark SQL 2.4 to 2.4.1
- The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was

View file

@ -80,7 +80,7 @@ singleTableSchema
;
singleInterval
: INTERVAL? (intervalValue intervalUnit)+ EOF
: INTERVAL? multiUnitsInterval EOF
;
statement
@ -759,12 +759,24 @@ booleanValue
;
interval
: {ansi}? INTERVAL? intervalField+
| {!ansi}? INTERVAL intervalField*
: INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
| {ansi}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
;
intervalField
: value=intervalValue unit=intervalUnit (TO to=intervalUnit)?
errorCapturingMultiUnitsInterval
: multiUnitsInterval unitToUnitInterval?
;
multiUnitsInterval
: (intervalValue intervalUnit)+
;
errorCapturingUnitToUnitInterval
: body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)?
;
unitToUnitInterval
: value=intervalValue from=intervalUnit TO to=intervalUnit
;
intervalValue

View file

@ -102,20 +102,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
}
override def visitSingleInterval(ctx: SingleIntervalContext): CalendarInterval = {
withOrigin(ctx) {
val units = ctx.intervalUnit().asScala.map {
u => normalizeInternalUnit(u.getText.toLowerCase(Locale.ROOT))
}.toArray
val values = ctx.intervalValue().asScala.map(getIntervalValue).toArray
try {
IntervalUtils.fromUnitStrings(units, values)
} catch {
case i: IllegalArgumentException =>
val e = new ParseException(i.getMessage, ctx)
e.setStackTrace(i.getStackTrace)
throw e
}
}
withOrigin(ctx)(visitMultiUnitsInterval(ctx.multiUnitsInterval))
}
/* ********************************************************************************************
@ -1940,71 +1927,102 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
}
/**
* Create a [[CalendarInterval]] literal expression. An interval expression can contain multiple
* unit value pairs, for instance: interval 2 months 2 days.
* Create a [[CalendarInterval]] literal expression. Two syntaxes are supported:
* - multiple unit value pairs, for instance: interval 2 months 2 days.
* - from-to unit, for instance: interval '1-2' year to month.
*/
override def visitInterval(ctx: IntervalContext): Literal = withOrigin(ctx) {
val intervals = ctx.intervalField.asScala.map(visitIntervalField)
validate(intervals.nonEmpty, "at least one time unit should be given for interval literal", ctx)
Literal(intervals.reduce(_.add(_)))
if (ctx.errorCapturingMultiUnitsInterval != null) {
val innerCtx = ctx.errorCapturingMultiUnitsInterval
if (innerCtx.unitToUnitInterval != null) {
throw new ParseException(
"Can only have a single from-to unit in the interval literal syntax",
innerCtx.unitToUnitInterval)
}
Literal(visitMultiUnitsInterval(innerCtx.multiUnitsInterval), CalendarIntervalType)
} else if (ctx.errorCapturingUnitToUnitInterval != null) {
val innerCtx = ctx.errorCapturingUnitToUnitInterval
if (innerCtx.error1 != null || innerCtx.error2 != null) {
val errorCtx = if (innerCtx.error1 != null) innerCtx.error1 else innerCtx.error2
throw new ParseException(
"Can only have a single from-to unit in the interval literal syntax",
errorCtx)
}
Literal(visitUnitToUnitInterval(innerCtx.body), CalendarIntervalType)
} else {
throw new ParseException("at least one time unit should be given for interval literal", ctx)
}
}
/**
* Create a [[CalendarInterval]] for a unit value pair. Two unit configuration types are
* supported:
* - Single unit.
* - From-To unit ('YEAR TO MONTH', 'DAY TO HOUR', 'DAY TO MINUTE', 'DAY TO SECOND',
* 'HOUR TO MINUTE', 'HOUR TO SECOND' and 'MINUTE TO SECOND' are supported).
* Creates a [[CalendarInterval]] with multiple unit value pairs, e.g. 1 YEAR 2 DAYS.
*/
override def visitIntervalField(ctx: IntervalFieldContext): CalendarInterval = withOrigin(ctx) {
import ctx._
val s = getIntervalValue(value)
try {
val unitText = unit.getText.toLowerCase(Locale.ROOT)
val interval = (unitText, Option(to).map(_.getText.toLowerCase(Locale.ROOT))) match {
case (u, None) =>
IntervalUtils.fromUnitStrings(Array(normalizeInternalUnit(u)), Array(s))
case ("year", Some("month")) =>
IntervalUtils.fromYearMonthString(s)
case ("day", Some("hour")) =>
IntervalUtils.fromDayTimeString(s, "day", "hour")
case ("day", Some("minute")) =>
IntervalUtils.fromDayTimeString(s, "day", "minute")
case ("day", Some("second")) =>
IntervalUtils.fromDayTimeString(s, "day", "second")
case ("hour", Some("minute")) =>
IntervalUtils.fromDayTimeString(s, "hour", "minute")
case ("hour", Some("second")) =>
IntervalUtils.fromDayTimeString(s, "hour", "second")
case ("minute", Some("second")) =>
IntervalUtils.fromDayTimeString(s, "minute", "second")
case (from, Some(t)) =>
throw new ParseException(s"Intervals FROM $from TO $t are not supported.", ctx)
override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext): CalendarInterval = {
withOrigin(ctx) {
val units = ctx.intervalUnit().asScala.map { unit =>
val u = unit.getText.toLowerCase(Locale.ROOT)
// Handle plural forms, e.g: yearS/monthS/weekS/dayS/hourS/minuteS/hourS/...
if (u.endsWith("s")) u.substring(0, u.length - 1) else u
}.toArray
val values = ctx.intervalValue().asScala.map { value =>
if (value.STRING() != null) {
string(value.STRING())
} else {
value.getText
}
}.toArray
try {
IntervalUtils.fromUnitStrings(units, values)
} catch {
case i: IllegalArgumentException =>
val e = new ParseException(i.getMessage, ctx)
e.setStackTrace(i.getStackTrace)
throw e
}
validate(interval != null, "No interval can be constructed", ctx)
interval
} catch {
// Handle Exceptions thrown by CalendarInterval
case e: IllegalArgumentException =>
val pe = new ParseException(e.getMessage, ctx)
pe.setStackTrace(e.getStackTrace)
throw pe
}
}
private def getIntervalValue(value: IntervalValueContext): String = {
if (value.STRING() != null) {
string(value.STRING())
} else {
value.getText
/**
* Creates a [[CalendarInterval]] with from-to unit, e.g. '2-1' YEAR TO MONTH.
*/
override def visitUnitToUnitInterval(ctx: UnitToUnitIntervalContext): CalendarInterval = {
withOrigin(ctx) {
val value = Option(ctx.intervalValue.STRING).map(string).getOrElse {
throw new ParseException("The value of from-to unit must be a string", ctx.intervalValue)
}
try {
val from = ctx.from.getText.toLowerCase(Locale.ROOT)
val to = ctx.to.getText.toLowerCase(Locale.ROOT)
(from, to) match {
case ("year", "month") =>
IntervalUtils.fromYearMonthString(value)
case ("day", "hour") =>
IntervalUtils.fromDayTimeString(value, "day", "hour")
case ("day", "minute") =>
IntervalUtils.fromDayTimeString(value, "day", "minute")
case ("day", "second") =>
IntervalUtils.fromDayTimeString(value, "day", "second")
case ("hour", "minute") =>
IntervalUtils.fromDayTimeString(value, "hour", "minute")
case ("hour", "second") =>
IntervalUtils.fromDayTimeString(value, "hour", "second")
case ("minute", "second") =>
IntervalUtils.fromDayTimeString(value, "minute", "second")
case _ =>
throw new ParseException(s"Intervals FROM $from TO $to are not supported.", ctx)
}
} catch {
// Handle Exceptions thrown by CalendarInterval
case e: IllegalArgumentException =>
val pe = new ParseException(e.getMessage, ctx)
pe.setStackTrace(e.getStackTrace)
throw pe
}
}
}
// Handle plural forms, e.g: yearS/monthS/weekS/dayS/hourS/minuteS/hourS/...
private def normalizeInternalUnit(s: String): String = {
if (s.endsWith("s")) s.substring(0, s.length - 1) else s
}
/* ********************************************************************************************
* DataType parsing
* ******************************************************************************************** */

View file

@ -644,7 +644,7 @@ class ExpressionParserSuite extends AnalysisTest {
// Non Existing unit
intercept("interval 10 nanoseconds",
"no viable alternative at input 'interval 10 nanoseconds'")
"no viable alternative at input '10 nanoseconds'")
// Year-Month intervals.
val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0")
@ -679,16 +679,13 @@ class ExpressionParserSuite extends AnalysisTest {
}
// Unknown FROM TO intervals
intercept("interval 10 month to second",
intercept("interval '10' month to second",
"Intervals FROM month TO second are not supported.")
// Composed intervals.
checkIntervals(
"3 months 4 days 22 seconds 1 millisecond",
Literal(new CalendarInterval(3, 4, 22001000L)))
checkIntervals(
"3 years '-1-10' year to month 3 weeks '1 0:0:2' day to second",
Literal(new CalendarInterval(14, 22, 2 * CalendarInterval.MICROS_PER_SECOND)))
}
test("SPARK-23264 Interval Compatibility tests") {

View file

@ -130,3 +130,26 @@ select interval '3 year 1 hour';
select integer '7';
select integer'7';
select integer '2147483648';
-- malformed interval literal
select interval;
select interval 1 fake_unit;
select interval 1 year to month;
select interval '1' year to second;
select interval '10-9' year to month '2-1' year to month;
select interval '10-9' year to month '12:11:10' hour to second;
select interval '1 15:11' day to minute '12:11:10' hour to second;
select interval 1 year '2-1' year to month;
select interval 1 year '12:11:10' hour to second;
select interval '10-9' year to month '1' year;
select interval '12:11:10' hour to second '1' year;
-- malformed interval literal with ansi mode
SET spark.sql.ansi.enabled=true;
select interval;
select interval 1 fake_unit;
select interval 1 year to month;
select 1 year to month;
select interval '1' year to second;
select '1' year to second;
select interval 1 year '2-1' year to month;
select 1 year '2-1' year to month;

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 62
-- Number of queries: 82
-- !query 0
@ -447,7 +447,7 @@ struct<>
-- !query 49 output
org.apache.spark.sql.catalyst.parser.ParseException
no viable alternative at input 'interval 10 nanoseconds'(line 1, pos 19)
no viable alternative at input '10 nanoseconds'(line 1, pos 19)
== SQL ==
select interval 10 nanoseconds
@ -572,3 +572,277 @@ Cannot parse the Int value: 2147483648, java.lang.NumberFormatException: For inp
== SQL ==
select integer '2147483648'
-------^^^
-- !query 62
select interval
-- !query 62 schema
struct<>
-- !query 62 output
org.apache.spark.sql.catalyst.parser.ParseException
at least one time unit should be given for interval literal(line 1, pos 7)
== SQL ==
select interval
-------^^^
-- !query 63
select interval 1 fake_unit
-- !query 63 schema
struct<>
-- !query 63 output
org.apache.spark.sql.catalyst.parser.ParseException
no viable alternative at input '1 fake_unit'(line 1, pos 18)
== SQL ==
select interval 1 fake_unit
------------------^^^
-- !query 64
select interval 1 year to month
-- !query 64 schema
struct<>
-- !query 64 output
org.apache.spark.sql.catalyst.parser.ParseException
The value of from-to unit must be a string(line 1, pos 16)
== SQL ==
select interval 1 year to month
----------------^^^
-- !query 65
select interval '1' year to second
-- !query 65 schema
struct<>
-- !query 65 output
org.apache.spark.sql.catalyst.parser.ParseException
Intervals FROM year TO second are not supported.(line 1, pos 16)
== SQL ==
select interval '1' year to second
----------------^^^
-- !query 66
select interval '10-9' year to month '2-1' year to month
-- !query 66 schema
struct<>
-- !query 66 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
== SQL ==
select interval '10-9' year to month '2-1' year to month
-------------------------------------^^^
-- !query 67
select interval '10-9' year to month '12:11:10' hour to second
-- !query 67 schema
struct<>
-- !query 67 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
== SQL ==
select interval '10-9' year to month '12:11:10' hour to second
-------------------------------------^^^
-- !query 68
select interval '1 15:11' day to minute '12:11:10' hour to second
-- !query 68 schema
struct<>
-- !query 68 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 40)
== SQL ==
select interval '1 15:11' day to minute '12:11:10' hour to second
----------------------------------------^^^
-- !query 69
select interval 1 year '2-1' year to month
-- !query 69 schema
struct<>
-- !query 69 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
== SQL ==
select interval 1 year '2-1' year to month
-----------------------^^^
-- !query 70
select interval 1 year '12:11:10' hour to second
-- !query 70 schema
struct<>
-- !query 70 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
== SQL ==
select interval 1 year '12:11:10' hour to second
-----------------------^^^
-- !query 71
select interval '10-9' year to month '1' year
-- !query 71 schema
struct<>
-- !query 71 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
== SQL ==
select interval '10-9' year to month '1' year
-------------------------------------^^^
-- !query 72
select interval '12:11:10' hour to second '1' year
-- !query 72 schema
struct<>
-- !query 72 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 42)
== SQL ==
select interval '12:11:10' hour to second '1' year
------------------------------------------^^^
-- !query 73
SET spark.sql.ansi.enabled=true
-- !query 73 schema
struct<key:string,value:string>
-- !query 73 output
spark.sql.ansi.enabled true
-- !query 74
select interval
-- !query 74 schema
struct<>
-- !query 74 output
org.apache.spark.sql.catalyst.parser.ParseException
at least one time unit should be given for interval literal(line 1, pos 7)
== SQL ==
select interval
-------^^^
-- !query 75
select interval 1 fake_unit
-- !query 75 schema
struct<>
-- !query 75 output
org.apache.spark.sql.catalyst.parser.ParseException
no viable alternative at input '1 fake_unit'(line 1, pos 18)
== SQL ==
select interval 1 fake_unit
------------------^^^
-- !query 76
select interval 1 year to month
-- !query 76 schema
struct<>
-- !query 76 output
org.apache.spark.sql.catalyst.parser.ParseException
The value of from-to unit must be a string(line 1, pos 16)
== SQL ==
select interval 1 year to month
----------------^^^
-- !query 77
select 1 year to month
-- !query 77 schema
struct<>
-- !query 77 output
org.apache.spark.sql.catalyst.parser.ParseException
The value of from-to unit must be a string(line 1, pos 7)
== SQL ==
select 1 year to month
-------^^^
-- !query 78
select interval '1' year to second
-- !query 78 schema
struct<>
-- !query 78 output
org.apache.spark.sql.catalyst.parser.ParseException
Intervals FROM year TO second are not supported.(line 1, pos 16)
== SQL ==
select interval '1' year to second
----------------^^^
-- !query 79
select '1' year to second
-- !query 79 schema
struct<>
-- !query 79 output
org.apache.spark.sql.catalyst.parser.ParseException
Intervals FROM year TO second are not supported.(line 1, pos 7)
== SQL ==
select '1' year to second
-------^^^
-- !query 80
select interval 1 year '2-1' year to month
-- !query 80 schema
struct<>
-- !query 80 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
== SQL ==
select interval 1 year '2-1' year to month
-----------------------^^^
-- !query 81
select 1 year '2-1' year to month
-- !query 81 schema
struct<>
-- !query 81 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 14)
== SQL ==
select 1 year '2-1' year to month
--------------^^^

View file

@ -1564,17 +1564,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
}
e.message.contains("Cannot save interval data type into external storage")
})
val e1 = intercept[AnalysisException] {
sql("select interval")
}
assert(e1.message.contains("at least one time unit should be given for interval literal"))
// Currently we don't yet support nanosecond
val e2 = intercept[AnalysisException] {
sql("select interval 23 nanosecond")
}
assert(e2.message.contains("no viable alternative at input 'interval 23 nanosecond'"))
}
test("SPARK-8945: add and subtract expressions for interval type") {