[SPARK-29623][SQL] do not allow multiple unit TO unit statements in interval literal syntax

### What changes were proposed in this pull request?

re-arrange the parser rules to make it clear that multiple unit TO unit statement like `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH` is not allowed.

### Why are the changes needed?

This is definitely an accident that we support such a weird syntax in the past. It's not supported by any other DBs and I can't think of any use case of it. Also no test covers this syntax in the current codebase.

### Does this PR introduce any user-facing change?

Yes, and a migration guide item is added.

### How was this patch tested?

new tests.

Closes #26285 from cloud-fan/syntax.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Wenchen Fan 2019-11-02 21:35:56 +08:00
parent 28ccd31aee
commit 31ae446e9c
7 changed files with 406 additions and 91 deletions

View file

@ -218,6 +218,8 @@ license: |
- Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`.
- Since Spark 3.0, the interval literal syntax does not allow multiple from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception.
## Upgrading from Spark SQL 2.4 to 2.4.1 ## Upgrading from Spark SQL 2.4 to 2.4.1
- The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was

View file

@ -80,7 +80,7 @@ singleTableSchema
; ;
singleInterval singleInterval
: INTERVAL? (intervalValue intervalUnit)+ EOF : INTERVAL? multiUnitsInterval EOF
; ;
statement statement
@ -759,12 +759,24 @@ booleanValue
; ;
interval interval
: {ansi}? INTERVAL? intervalField+ : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
| {!ansi}? INTERVAL intervalField* | {ansi}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
; ;
intervalField errorCapturingMultiUnitsInterval
: value=intervalValue unit=intervalUnit (TO to=intervalUnit)? : multiUnitsInterval unitToUnitInterval?
;
multiUnitsInterval
: (intervalValue intervalUnit)+
;
errorCapturingUnitToUnitInterval
: body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)?
;
unitToUnitInterval
: value=intervalValue from=intervalUnit TO to=intervalUnit
; ;
intervalValue intervalValue

View file

@ -102,20 +102,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
} }
override def visitSingleInterval(ctx: SingleIntervalContext): CalendarInterval = { override def visitSingleInterval(ctx: SingleIntervalContext): CalendarInterval = {
withOrigin(ctx) { withOrigin(ctx)(visitMultiUnitsInterval(ctx.multiUnitsInterval))
val units = ctx.intervalUnit().asScala.map {
u => normalizeInternalUnit(u.getText.toLowerCase(Locale.ROOT))
}.toArray
val values = ctx.intervalValue().asScala.map(getIntervalValue).toArray
try {
IntervalUtils.fromUnitStrings(units, values)
} catch {
case i: IllegalArgumentException =>
val e = new ParseException(i.getMessage, ctx)
e.setStackTrace(i.getStackTrace)
throw e
}
}
} }
/* ******************************************************************************************** /* ********************************************************************************************
@ -1940,71 +1927,102 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
} }
/** /**
* Create a [[CalendarInterval]] literal expression. An interval expression can contain multiple * Create a [[CalendarInterval]] literal expression. Two syntaxes are supported:
* unit value pairs, for instance: interval 2 months 2 days. * - multiple unit value pairs, for instance: interval 2 months 2 days.
* - from-to unit, for instance: interval '1-2' year to month.
*/ */
override def visitInterval(ctx: IntervalContext): Literal = withOrigin(ctx) { override def visitInterval(ctx: IntervalContext): Literal = withOrigin(ctx) {
val intervals = ctx.intervalField.asScala.map(visitIntervalField) if (ctx.errorCapturingMultiUnitsInterval != null) {
validate(intervals.nonEmpty, "at least one time unit should be given for interval literal", ctx) val innerCtx = ctx.errorCapturingMultiUnitsInterval
Literal(intervals.reduce(_.add(_))) if (innerCtx.unitToUnitInterval != null) {
throw new ParseException(
"Can only have a single from-to unit in the interval literal syntax",
innerCtx.unitToUnitInterval)
}
Literal(visitMultiUnitsInterval(innerCtx.multiUnitsInterval), CalendarIntervalType)
} else if (ctx.errorCapturingUnitToUnitInterval != null) {
val innerCtx = ctx.errorCapturingUnitToUnitInterval
if (innerCtx.error1 != null || innerCtx.error2 != null) {
val errorCtx = if (innerCtx.error1 != null) innerCtx.error1 else innerCtx.error2
throw new ParseException(
"Can only have a single from-to unit in the interval literal syntax",
errorCtx)
}
Literal(visitUnitToUnitInterval(innerCtx.body), CalendarIntervalType)
} else {
throw new ParseException("at least one time unit should be given for interval literal", ctx)
}
} }
/** /**
* Create a [[CalendarInterval]] for a unit value pair. Two unit configuration types are * Creates a [[CalendarInterval]] with multiple unit value pairs, e.g. 1 YEAR 2 DAYS.
* supported:
* - Single unit.
* - From-To unit ('YEAR TO MONTH', 'DAY TO HOUR', 'DAY TO MINUTE', 'DAY TO SECOND',
* 'HOUR TO MINUTE', 'HOUR TO SECOND' and 'MINUTE TO SECOND' are supported).
*/ */
override def visitIntervalField(ctx: IntervalFieldContext): CalendarInterval = withOrigin(ctx) { override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext): CalendarInterval = {
import ctx._ withOrigin(ctx) {
val s = getIntervalValue(value) val units = ctx.intervalUnit().asScala.map { unit =>
try { val u = unit.getText.toLowerCase(Locale.ROOT)
val unitText = unit.getText.toLowerCase(Locale.ROOT) // Handle plural forms, e.g: yearS/monthS/weekS/dayS/hourS/minuteS/hourS/...
val interval = (unitText, Option(to).map(_.getText.toLowerCase(Locale.ROOT))) match { if (u.endsWith("s")) u.substring(0, u.length - 1) else u
case (u, None) => }.toArray
IntervalUtils.fromUnitStrings(Array(normalizeInternalUnit(u)), Array(s))
case ("year", Some("month")) => val values = ctx.intervalValue().asScala.map { value =>
IntervalUtils.fromYearMonthString(s) if (value.STRING() != null) {
case ("day", Some("hour")) => string(value.STRING())
IntervalUtils.fromDayTimeString(s, "day", "hour") } else {
case ("day", Some("minute")) => value.getText
IntervalUtils.fromDayTimeString(s, "day", "minute") }
case ("day", Some("second")) => }.toArray
IntervalUtils.fromDayTimeString(s, "day", "second")
case ("hour", Some("minute")) => try {
IntervalUtils.fromDayTimeString(s, "hour", "minute") IntervalUtils.fromUnitStrings(units, values)
case ("hour", Some("second")) => } catch {
IntervalUtils.fromDayTimeString(s, "hour", "second") case i: IllegalArgumentException =>
case ("minute", Some("second")) => val e = new ParseException(i.getMessage, ctx)
IntervalUtils.fromDayTimeString(s, "minute", "second") e.setStackTrace(i.getStackTrace)
case (from, Some(t)) => throw e
throw new ParseException(s"Intervals FROM $from TO $t are not supported.", ctx)
} }
validate(interval != null, "No interval can be constructed", ctx)
interval
} catch {
// Handle Exceptions thrown by CalendarInterval
case e: IllegalArgumentException =>
val pe = new ParseException(e.getMessage, ctx)
pe.setStackTrace(e.getStackTrace)
throw pe
} }
} }
private def getIntervalValue(value: IntervalValueContext): String = { /**
if (value.STRING() != null) { * Creates a [[CalendarInterval]] with from-to unit, e.g. '2-1' YEAR TO MONTH.
string(value.STRING()) */
} else { override def visitUnitToUnitInterval(ctx: UnitToUnitIntervalContext): CalendarInterval = {
value.getText withOrigin(ctx) {
val value = Option(ctx.intervalValue.STRING).map(string).getOrElse {
throw new ParseException("The value of from-to unit must be a string", ctx.intervalValue)
}
try {
val from = ctx.from.getText.toLowerCase(Locale.ROOT)
val to = ctx.to.getText.toLowerCase(Locale.ROOT)
(from, to) match {
case ("year", "month") =>
IntervalUtils.fromYearMonthString(value)
case ("day", "hour") =>
IntervalUtils.fromDayTimeString(value, "day", "hour")
case ("day", "minute") =>
IntervalUtils.fromDayTimeString(value, "day", "minute")
case ("day", "second") =>
IntervalUtils.fromDayTimeString(value, "day", "second")
case ("hour", "minute") =>
IntervalUtils.fromDayTimeString(value, "hour", "minute")
case ("hour", "second") =>
IntervalUtils.fromDayTimeString(value, "hour", "second")
case ("minute", "second") =>
IntervalUtils.fromDayTimeString(value, "minute", "second")
case _ =>
throw new ParseException(s"Intervals FROM $from TO $to are not supported.", ctx)
}
} catch {
// Handle Exceptions thrown by CalendarInterval
case e: IllegalArgumentException =>
val pe = new ParseException(e.getMessage, ctx)
pe.setStackTrace(e.getStackTrace)
throw pe
}
} }
} }
// Handle plural forms, e.g: yearS/monthS/weekS/dayS/hourS/minuteS/hourS/...
private def normalizeInternalUnit(s: String): String = {
if (s.endsWith("s")) s.substring(0, s.length - 1) else s
}
/* ******************************************************************************************** /* ********************************************************************************************
* DataType parsing * DataType parsing
* ******************************************************************************************** */ * ******************************************************************************************** */

View file

@ -644,7 +644,7 @@ class ExpressionParserSuite extends AnalysisTest {
// Non Existing unit // Non Existing unit
intercept("interval 10 nanoseconds", intercept("interval 10 nanoseconds",
"no viable alternative at input 'interval 10 nanoseconds'") "no viable alternative at input '10 nanoseconds'")
// Year-Month intervals. // Year-Month intervals.
val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0") val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0")
@ -679,16 +679,13 @@ class ExpressionParserSuite extends AnalysisTest {
} }
// Unknown FROM TO intervals // Unknown FROM TO intervals
intercept("interval 10 month to second", intercept("interval '10' month to second",
"Intervals FROM month TO second are not supported.") "Intervals FROM month TO second are not supported.")
// Composed intervals. // Composed intervals.
checkIntervals( checkIntervals(
"3 months 4 days 22 seconds 1 millisecond", "3 months 4 days 22 seconds 1 millisecond",
Literal(new CalendarInterval(3, 4, 22001000L))) Literal(new CalendarInterval(3, 4, 22001000L)))
checkIntervals(
"3 years '-1-10' year to month 3 weeks '1 0:0:2' day to second",
Literal(new CalendarInterval(14, 22, 2 * CalendarInterval.MICROS_PER_SECOND)))
} }
test("SPARK-23264 Interval Compatibility tests") { test("SPARK-23264 Interval Compatibility tests") {

View file

@ -130,3 +130,26 @@ select interval '3 year 1 hour';
select integer '7'; select integer '7';
select integer'7'; select integer'7';
select integer '2147483648'; select integer '2147483648';
-- malformed interval literal
select interval;
select interval 1 fake_unit;
select interval 1 year to month;
select interval '1' year to second;
select interval '10-9' year to month '2-1' year to month;
select interval '10-9' year to month '12:11:10' hour to second;
select interval '1 15:11' day to minute '12:11:10' hour to second;
select interval 1 year '2-1' year to month;
select interval 1 year '12:11:10' hour to second;
select interval '10-9' year to month '1' year;
select interval '12:11:10' hour to second '1' year;
-- malformed interval literal with ansi mode
SET spark.sql.ansi.enabled=true;
select interval;
select interval 1 fake_unit;
select interval 1 year to month;
select 1 year to month;
select interval '1' year to second;
select '1' year to second;
select interval 1 year '2-1' year to month;
select 1 year '2-1' year to month;

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite -- Automatically generated by SQLQueryTestSuite
-- Number of queries: 62 -- Number of queries: 82
-- !query 0 -- !query 0
@ -447,7 +447,7 @@ struct<>
-- !query 49 output -- !query 49 output
org.apache.spark.sql.catalyst.parser.ParseException org.apache.spark.sql.catalyst.parser.ParseException
no viable alternative at input 'interval 10 nanoseconds'(line 1, pos 19) no viable alternative at input '10 nanoseconds'(line 1, pos 19)
== SQL == == SQL ==
select interval 10 nanoseconds select interval 10 nanoseconds
@ -572,3 +572,277 @@ Cannot parse the Int value: 2147483648, java.lang.NumberFormatException: For inp
== SQL == == SQL ==
select integer '2147483648' select integer '2147483648'
-------^^^ -------^^^
-- !query 62
select interval
-- !query 62 schema
struct<>
-- !query 62 output
org.apache.spark.sql.catalyst.parser.ParseException
at least one time unit should be given for interval literal(line 1, pos 7)
== SQL ==
select interval
-------^^^
-- !query 63
select interval 1 fake_unit
-- !query 63 schema
struct<>
-- !query 63 output
org.apache.spark.sql.catalyst.parser.ParseException
no viable alternative at input '1 fake_unit'(line 1, pos 18)
== SQL ==
select interval 1 fake_unit
------------------^^^
-- !query 64
select interval 1 year to month
-- !query 64 schema
struct<>
-- !query 64 output
org.apache.spark.sql.catalyst.parser.ParseException
The value of from-to unit must be a string(line 1, pos 16)
== SQL ==
select interval 1 year to month
----------------^^^
-- !query 65
select interval '1' year to second
-- !query 65 schema
struct<>
-- !query 65 output
org.apache.spark.sql.catalyst.parser.ParseException
Intervals FROM year TO second are not supported.(line 1, pos 16)
== SQL ==
select interval '1' year to second
----------------^^^
-- !query 66
select interval '10-9' year to month '2-1' year to month
-- !query 66 schema
struct<>
-- !query 66 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
== SQL ==
select interval '10-9' year to month '2-1' year to month
-------------------------------------^^^
-- !query 67
select interval '10-9' year to month '12:11:10' hour to second
-- !query 67 schema
struct<>
-- !query 67 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
== SQL ==
select interval '10-9' year to month '12:11:10' hour to second
-------------------------------------^^^
-- !query 68
select interval '1 15:11' day to minute '12:11:10' hour to second
-- !query 68 schema
struct<>
-- !query 68 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 40)
== SQL ==
select interval '1 15:11' day to minute '12:11:10' hour to second
----------------------------------------^^^
-- !query 69
select interval 1 year '2-1' year to month
-- !query 69 schema
struct<>
-- !query 69 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
== SQL ==
select interval 1 year '2-1' year to month
-----------------------^^^
-- !query 70
select interval 1 year '12:11:10' hour to second
-- !query 70 schema
struct<>
-- !query 70 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
== SQL ==
select interval 1 year '12:11:10' hour to second
-----------------------^^^
-- !query 71
select interval '10-9' year to month '1' year
-- !query 71 schema
struct<>
-- !query 71 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
== SQL ==
select interval '10-9' year to month '1' year
-------------------------------------^^^
-- !query 72
select interval '12:11:10' hour to second '1' year
-- !query 72 schema
struct<>
-- !query 72 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 42)
== SQL ==
select interval '12:11:10' hour to second '1' year
------------------------------------------^^^
-- !query 73
SET spark.sql.ansi.enabled=true
-- !query 73 schema
struct<key:string,value:string>
-- !query 73 output
spark.sql.ansi.enabled true
-- !query 74
select interval
-- !query 74 schema
struct<>
-- !query 74 output
org.apache.spark.sql.catalyst.parser.ParseException
at least one time unit should be given for interval literal(line 1, pos 7)
== SQL ==
select interval
-------^^^
-- !query 75
select interval 1 fake_unit
-- !query 75 schema
struct<>
-- !query 75 output
org.apache.spark.sql.catalyst.parser.ParseException
no viable alternative at input '1 fake_unit'(line 1, pos 18)
== SQL ==
select interval 1 fake_unit
------------------^^^
-- !query 76
select interval 1 year to month
-- !query 76 schema
struct<>
-- !query 76 output
org.apache.spark.sql.catalyst.parser.ParseException
The value of from-to unit must be a string(line 1, pos 16)
== SQL ==
select interval 1 year to month
----------------^^^
-- !query 77
select 1 year to month
-- !query 77 schema
struct<>
-- !query 77 output
org.apache.spark.sql.catalyst.parser.ParseException
The value of from-to unit must be a string(line 1, pos 7)
== SQL ==
select 1 year to month
-------^^^
-- !query 78
select interval '1' year to second
-- !query 78 schema
struct<>
-- !query 78 output
org.apache.spark.sql.catalyst.parser.ParseException
Intervals FROM year TO second are not supported.(line 1, pos 16)
== SQL ==
select interval '1' year to second
----------------^^^
-- !query 79
select '1' year to second
-- !query 79 schema
struct<>
-- !query 79 output
org.apache.spark.sql.catalyst.parser.ParseException
Intervals FROM year TO second are not supported.(line 1, pos 7)
== SQL ==
select '1' year to second
-------^^^
-- !query 80
select interval 1 year '2-1' year to month
-- !query 80 schema
struct<>
-- !query 80 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
== SQL ==
select interval 1 year '2-1' year to month
-----------------------^^^
-- !query 81
select 1 year '2-1' year to month
-- !query 81 schema
struct<>
-- !query 81 output
org.apache.spark.sql.catalyst.parser.ParseException
Can only have a single from-to unit in the interval literal syntax(line 1, pos 14)
== SQL ==
select 1 year '2-1' year to month
--------------^^^

View file

@ -1564,17 +1564,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
} }
e.message.contains("Cannot save interval data type into external storage") e.message.contains("Cannot save interval data type into external storage")
}) })
val e1 = intercept[AnalysisException] {
sql("select interval")
}
assert(e1.message.contains("at least one time unit should be given for interval literal"))
// Currently we don't yet support nanosecond
val e2 = intercept[AnalysisException] {
sql("select interval 23 nanosecond")
}
assert(e2.message.contains("no viable alternative at input 'interval 23 nanosecond'"))
} }
test("SPARK-8945: add and subtract expressions for interval type") { test("SPARK-8945: add and subtract expressions for interval type") {