From d4de01f567cf8d8a9ca880c7ed50eb572274e267 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sun, 10 Nov 2019 10:10:04 -0800 Subject: [PATCH] [SPARK-29408][SQL] Support `-` before `interval` in interval literals ### What changes were proposed in this pull request? - `SqlBase.g4` is modified to support a negative sign `-` in the interval type constructor from a string and in interval literals - Negate interval in `AstBuilder` if a sign presents. - Interval related SQL statements are moved from `inputs/datetime.sql` to new file `inputs/interval.sql` For example: ```sql spark-sql> select -interval '-1 month 1 day -1 second'; 1 months -1 days 1 seconds spark-sql> select -interval -1 month 1 day -1 second; 1 months -1 days 1 seconds ``` ### Why are the changes needed? For feature parity with PostgreSQL which supports that: ```sql # select -interval '-1 month 1 day -1 second'; ?column? ------------------------- 1 mon -1 days +00:00:01 (1 row) ``` ### Does this PR introduce any user-facing change? No ### How was this patch tested? - Added tests to `ExpressionParserSuite` - by `interval.sql` Closes #26438 from MaxGekk/negative-interval. Authored-by: Maxim Gekk Signed-off-by: Dongjoon Hyun --- .../spark/sql/catalyst/parser/SqlBase.g4 | 4 +- .../sql/catalyst/parser/AstBuilder.scala | 18 ++++- .../parser/ExpressionParserSuite.scala | 17 ++++- .../resources/sql-tests/inputs/datetime.sql | 11 --- .../resources/sql-tests/inputs/interval.sql | 15 ++++ .../sql-tests/results/ansi/interval.sql.out | 8 +- .../sql-tests/results/datetime.sql.out | 58 +-------------- .../sql-tests/results/interval.sql.out | 74 ++++++++++++++++++- 8 files changed, 123 insertions(+), 82 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 113f41183e..a4a23d4b33 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -767,7 +767,7 @@ primaryExpression constant : NULL #nullLiteral | interval #intervalLiteral - | identifier STRING #typeConstructor + | negativeSign=MINUS? identifier STRING #typeConstructor | number #numericLiteral | booleanValue #booleanLiteral | STRING+ #stringLiteral @@ -790,7 +790,7 @@ booleanValue ; interval - : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)? + : negativeSign=MINUS? INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)? | {ansi}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) ; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d95d23b321..00a1964c95 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1881,7 +1881,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging ex.setStackTrace(e.getStackTrace) throw ex } - Literal(interval, CalendarIntervalType) + Literal(applyNegativeSign(ctx.negativeSign, interval), CalendarIntervalType) case "X" => val padding = if (value.length % 2 != 0) "0" else "" Literal(DatatypeConverter.parseHexBinary(padding + value)) @@ -2025,6 +2025,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } } + private def applyNegativeSign(sign: Token, interval: CalendarInterval): CalendarInterval = { + if (sign != null && sign.getText == "-") { + IntervalUtils.negate(interval) + } else { + interval + } + } + /** * Create a [[CalendarInterval]] literal expression. Two syntaxes are supported: * - multiple unit value pairs, for instance: interval 2 months 2 days. @@ -2038,7 +2046,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging "Can only have a single from-to unit in the interval literal syntax", innerCtx.unitToUnitInterval) } - Literal(visitMultiUnitsInterval(innerCtx.multiUnitsInterval), CalendarIntervalType) + val interval = applyNegativeSign( + ctx.negativeSign, + visitMultiUnitsInterval(innerCtx.multiUnitsInterval)) + Literal(interval, CalendarIntervalType) } else if (ctx.errorCapturingUnitToUnitInterval != null) { val innerCtx = ctx.errorCapturingUnitToUnitInterval if (innerCtx.error1 != null || innerCtx.error2 != null) { @@ -2047,7 +2058,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging "Can only have a single from-to unit in the interval literal syntax", errorCtx) } - Literal(visitUnitToUnitInterval(innerCtx.body), CalendarIntervalType) + val interval = applyNegativeSign(ctx.negativeSign, visitUnitToUnitInterval(innerCtx.body)) + Literal(interval, CalendarIntervalType) } else { throw new ParseException("at least one time unit should be given for interval literal", ctx) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 6296597fe1..a707b456c6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -438,6 +438,9 @@ class ExpressionParserSuite extends AnalysisTest { assertEqual("InterVal 'interval 3 month 1 hour'", intervalLiteral) assertEqual("INTERVAL '3 month 1 hour'", intervalLiteral) intercept("Interval 'interval 3 monthsss 1 hoursss'", "Cannot parse the INTERVAL value") + assertEqual( + "-interval '3 month 1 hour'", + Literal(IntervalUtils.fromString("interval -3 month -1 hour"))) // Binary. assertEqual("X'A'", Literal(Array(0x0a).map(_.toByte))) @@ -604,11 +607,17 @@ class ExpressionParserSuite extends AnalysisTest { test("intervals") { def checkIntervals(intervalValue: String, expected: Literal): Unit = { - assertEqual(s"interval $intervalValue", expected) + Seq( + "" -> expected, + "-" -> expected.copy( + value = IntervalUtils.negate(expected.value.asInstanceOf[CalendarInterval])) + ).foreach { case (sign, expectedLiteral) => + assertEqual(s"${sign}interval $intervalValue", expectedLiteral) - // SPARK-23264 Support interval values without INTERVAL clauses if ANSI SQL enabled - withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { - assertEqual(intervalValue, expected) + // SPARK-23264 Support interval values without INTERVAL clauses if ANSI SQL enabled + withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + assertEqual(intervalValue, expected) + } } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index 09eb185bd4..0e22af1fbd 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -36,14 +36,3 @@ select date '2001-10-01' - 7; select date '2001-10-01' - date '2001-09-28'; select date'2020-01-01' - timestamp'2019-10-06 10:11:12.345678'; select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01'; - --- interval operations -select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15'); -select interval 4 month 2 weeks 3 microseconds * 1.5; -select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5; - --- interval operation with null and zero case -select interval '2 seconds' / 0; -select interval '2 seconds' / null; -select interval '2 seconds' * null; -select null * interval '2 seconds'; diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index 7babe05ef3..14153c6f1c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -41,3 +41,18 @@ select max(cast(v as interval)) from VALUES ('1 seconds'), ('4 seconds'), ('3 se -- min select min(cast(v as interval)) from VALUES ('1 seconds'), ('4 seconds'), ('3 seconds') t(v); + +-- multiply and divide an interval by a number +select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15'); +select interval 4 month 2 weeks 3 microseconds * 1.5; +select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5; + +-- interval operation with null and zero case +select interval '2 seconds' / 0; +select interval '2 seconds' / null; +select interval '2 seconds' * null; +select null * interval '2 seconds'; + +-- interval with a negative sign +select -interval '-1 month 1 day -1 second'; +select -interval -1 month 1 day -1 second; diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index c6392617e2..0085cacf0d 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -118,7 +118,7 @@ select interval '2-2' year to month + dateval from interval_arithmetic -- !query 9 schema -struct +struct -- !query 9 output 2012-01-01 2009-11-01 2014-03-01 2014-03-01 2009-11-01 2009-11-01 2014-03-01 @@ -150,7 +150,7 @@ select interval '2-2' year to month + tsval from interval_arithmetic -- !query 11 schema -struct +struct -- !query 11 output 2012-01-01 00:00:00 2009-11-01 00:00:00 2014-03-01 00:00:00 2014-03-01 00:00:00 2009-11-01 00:00:00 2009-11-01 00:00:00 2014-03-01 00:00:00 @@ -204,7 +204,7 @@ select interval '99 11:22:33.123456789' day to second + dateval from interval_arithmetic -- !query 15 schema -struct +struct -- !query 15 output 2012-01-01 2011-09-23 2012-04-09 2012-04-09 2011-09-23 2011-09-23 2012-04-09 @@ -236,7 +236,7 @@ select interval '99 11:22:33.123456789' day to second + tsval from interval_arithmetic -- !query 17 schema -struct +struct -- !query 17 output 2012-01-01 00:00:00 2011-09-23 12:37:26.876544 2012-04-09 11:22:33.123456 2012-04-09 11:22:33.123456 2011-09-23 12:37:26.876544 2011-09-23 12:37:26.876544 2012-04-09 11:22:33.123456 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index b5ea7d66fd..a40f5acb6d 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 24 +-- Number of queries: 17 -- !query 0 @@ -145,59 +145,3 @@ select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01' struct -- !query 16 output -2078 hours -48 minutes -47.654322 seconds - - --- !query 17 -select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15') --- !query 17 schema -struct --- !query 17 output -30 hours 33 minutes 36.003006 seconds - - --- !query 18 -select interval 4 month 2 weeks 3 microseconds * 1.5 --- !query 18 schema -struct --- !query 18 output -6 months 21 days 0.000005 seconds - - --- !query 19 -select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5 --- !query 19 schema -struct --- !query 19 output -16 hours - - --- !query 20 -select interval '2 seconds' / 0 --- !query 20 schema -struct --- !query 20 output -NULL - - --- !query 21 -select interval '2 seconds' / null --- !query 21 schema -struct --- !query 21 output -NULL - - --- !query 22 -select interval '2 seconds' * null --- !query 22 schema -struct --- !query 22 output -NULL - - --- !query 23 -select null * interval '2 seconds' --- !query 23 schema -struct --- !query 23 output -NULL diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 58de1331c1..b8b6b22eb7 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 22 +-- Number of queries: 31 -- !query 0 @@ -178,3 +178,75 @@ select min(cast(v as interval)) from VALUES ('1 seconds'), ('4 seconds'), ('3 se struct -- !query 21 output 1 seconds + + +-- !query 22 +select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15') +-- !query 22 schema +struct +-- !query 22 output +30 hours 33 minutes 36.003006 seconds + + +-- !query 23 +select interval 4 month 2 weeks 3 microseconds * 1.5 +-- !query 23 schema +struct +-- !query 23 output +6 months 21 days 0.000005 seconds + + +-- !query 24 +select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5 +-- !query 24 schema +struct +-- !query 24 output +16 hours + + +-- !query 25 +select interval '2 seconds' / 0 +-- !query 25 schema +struct +-- !query 25 output +NULL + + +-- !query 26 +select interval '2 seconds' / null +-- !query 26 schema +struct +-- !query 26 output +NULL + + +-- !query 27 +select interval '2 seconds' * null +-- !query 27 schema +struct +-- !query 27 output +NULL + + +-- !query 28 +select null * interval '2 seconds' +-- !query 28 schema +struct +-- !query 28 output +NULL + + +-- !query 29 +select -interval '-1 month 1 day -1 second' +-- !query 29 schema +struct<1 months -1 days 1 seconds:interval> +-- !query 29 output +1 months -1 days 1 seconds + + +-- !query 30 +select -interval -1 month 1 day -1 second +-- !query 30 schema +struct<1 months -1 days 1 seconds:interval> +-- !query 30 output +1 months -1 days 1 seconds