[SPARK-28310][SQL] Support (FIRST_VALUE|LAST_VALUE)(expr[ (IGNORE|RESPECT) NULLS]?) syntax

## What changes were proposed in this pull request?
According to the ANSI SQL 2011
![image](https://user-images.githubusercontent.com/698621/60855327-d01c6900-a235-11e9-9a1b-d438615a4673.png)

Below are Teradata, Oracle, Redshift which already support this grammar.

- Teradata - https://docs.teradata.com/reader/756LNiPSFdY~4JcCCcR5Cw/SUwCpTupqmlBJvi2mipOaA
- Oracle - https://docs.oracle.com/en/database/oracle/oracle-database/18/sqlrf/FIRST_VALUE.html#GUID-D454EC3F-370C-4C64-9B11-33FCB10D95EC
- Redshift – https://docs.aws.amazon.com/redshift/latest/dg/r_WF_first_value.html

- Postgresql didn't implement this grammar:
https://www.postgresql.org/docs/devel/functions-window.html

  >The SQL standard defines a RESPECT NULLS or IGNORE NULLS option for lead, lag, first_value, last_value, and nth_value. This is not implemented in PostgreSQL: the behavior is always the same as the standard's default, namely RESPECT NULLS.

## How was this patch tested?
UT.

Closes #25082 from lipzhu/SPARK-28310.

Authored-by: Zhu, Lipeng <lipzhu@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
Zhu, Lipeng 2019-07-10 07:41:05 -07:00 committed by Dongjoon Hyun
parent bbc2be4f42
commit b89c3de1a4
4 changed files with 26 additions and 2 deletions

View file

@ -117,6 +117,7 @@ Below is a list of all the keywords in Spark SQL.
<tr><td>FIELDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
<tr><td>FILEFORMAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
<tr><td>FIRST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
<tr><td>FIRST_VALUE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
<tr><td>FOLLOWING</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
<tr><td>FOR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
<tr><td>FOREIGN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
@ -151,6 +152,7 @@ Below is a list of all the keywords in Spark SQL.
<tr><td>JOIN</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
<tr><td>KEYS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
<tr><td>LAST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
<tr><td>LAST_VALUE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
<tr><td>LATERAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
<tr><td>LAZY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
<tr><td>LEADING</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
@ -219,6 +221,7 @@ Below is a list of all the keywords in Spark SQL.
<tr><td>REPAIR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
<tr><td>REPLACE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
<tr><td>RESET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
<tr><td>RESPECT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
<tr><td>RESTRICT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
<tr><td>REVOKE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
<tr><td>RIGHT</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>

View file

@ -680,8 +680,8 @@ primaryExpression
| CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
| CAST '(' expression AS dataType ')' #cast
| STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')' #struct
| FIRST '(' expression (IGNORE NULLS)? ')' #first
| LAST '(' expression (IGNORE NULLS)? ')' #last
| (FIRST | FIRST_VALUE) '(' expression ((IGNORE | RESPECT) NULLS)? ')' #first
| (LAST | LAST_VALUE) '(' expression ((IGNORE | RESPECT) NULLS)? ')' #last
| POSITION '(' substr=valueExpression IN str=valueExpression ')' #position
| constant #constantDefault
| ASTERISK #star
@ -1023,6 +1023,7 @@ ansiNonReserved
| REPAIR
| REPLACE
| RESET
| RESPECT
| RESTRICT
| REVOKE
| RLIKE
@ -1184,6 +1185,7 @@ nonReserved
| FIELDS
| FILEFORMAT
| FIRST
| FIRST_VALUE
| FOLLOWING
| FOR
| FOREIGN
@ -1214,6 +1216,7 @@ nonReserved
| ITEMS
| KEYS
| LAST
| LAST_VALUE
| LATERAL
| LAZY
| LEADING
@ -1278,6 +1281,7 @@ nonReserved
| REPAIR
| REPLACE
| RESET
| RESPECT
| RESTRICT
| REVOKE
| RLIKE
@ -1435,6 +1439,7 @@ FETCH: 'FETCH';
FIELDS: 'FIELDS';
FILEFORMAT: 'FILEFORMAT';
FIRST: 'FIRST';
FIRST_VALUE: 'FIRST_VALUE';
FOLLOWING: 'FOLLOWING';
FOR: 'FOR';
FOREIGN: 'FOREIGN';
@ -1469,6 +1474,7 @@ ITEMS: 'ITEMS';
JOIN: 'JOIN';
KEYS: 'KEYS';
LAST: 'LAST';
LAST_VALUE: 'LAST_VALUE';
LATERAL: 'LATERAL';
LAZY: 'LAZY';
LEADING: 'LEADING';
@ -1536,6 +1542,7 @@ RENAME: 'RENAME';
REPAIR: 'REPAIR';
REPLACE: 'REPLACE';
RESET: 'RESET';
RESPECT: 'RESPECT';
RESTRICT: 'RESTRICT';
REVOKE: 'REVOKE';
RIGHT: 'RIGHT';

View file

@ -737,6 +737,15 @@ class ExpressionParserSuite extends AnalysisTest {
assertEqual("last(a)", Last('a, Literal(false)).toAggregateExpression())
}
test("Support respect nulls keywords for first_value and last_value") {
assertEqual("first_value(a ignore nulls)", First('a, Literal(true)).toAggregateExpression())
assertEqual("first_value(a respect nulls)", First('a, Literal(false)).toAggregateExpression())
assertEqual("first_value(a)", First('a, Literal(false)).toAggregateExpression())
assertEqual("last_value(a ignore nulls)", Last('a, Literal(true)).toAggregateExpression())
assertEqual("last_value(a respect nulls)", Last('a, Literal(false)).toAggregateExpression())
assertEqual("last_value(a)", Last('a, Literal(false)).toAggregateExpression())
}
test("timestamp literals") {
DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone.getID) {

View file

@ -381,6 +381,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
"fields",
"fileformat",
"first",
"first_value",
"following",
"for",
"foreign",
@ -415,6 +416,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
"join",
"keys",
"last",
"last_value",
"lateral",
"lazy",
"leading",
@ -483,6 +485,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
"repair",
"replace",
"reset",
"respect",
"restrict",
"revoke",
"right",
@ -579,6 +582,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
"except",
"false",
"fetch",
"first_value",
"for",
"foreign",
"from",
@ -593,6 +597,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
"into",
"join",
"is",
"last_value",
"leading",
"left",
"minute",