[SPARK-28310][SQL] Support (FIRST_VALUE|LAST_VALUE)(expr[ (IGNORE|RESPECT) NULLS]?) syntax
## What changes were proposed in this pull request? According to the ANSI SQL 2011 ![image](https://user-images.githubusercontent.com/698621/60855327-d01c6900-a235-11e9-9a1b-d438615a4673.png) Below are Teradata, Oracle, Redshift which already support this grammar. - Teradata - https://docs.teradata.com/reader/756LNiPSFdY~4JcCCcR5Cw/SUwCpTupqmlBJvi2mipOaA - Oracle - https://docs.oracle.com/en/database/oracle/oracle-database/18/sqlrf/FIRST_VALUE.html#GUID-D454EC3F-370C-4C64-9B11-33FCB10D95EC - Redshift – https://docs.aws.amazon.com/redshift/latest/dg/r_WF_first_value.html - Postgresql didn't implement this grammar: https://www.postgresql.org/docs/devel/functions-window.html >The SQL standard defines a RESPECT NULLS or IGNORE NULLS option for lead, lag, first_value, last_value, and nth_value. This is not implemented in PostgreSQL: the behavior is always the same as the standard's default, namely RESPECT NULLS. ## How was this patch tested? UT. Closes #25082 from lipzhu/SPARK-28310. Authored-by: Zhu, Lipeng <lipzhu@ebay.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
bbc2be4f42
commit
b89c3de1a4
|
@ -117,6 +117,7 @@ Below is a list of all the keywords in Spark SQL.
|
|||
<tr><td>FIELDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
|
||||
<tr><td>FILEFORMAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
|
||||
<tr><td>FIRST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
|
||||
<tr><td>FIRST_VALUE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
|
||||
<tr><td>FOLLOWING</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
|
||||
<tr><td>FOR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
|
||||
<tr><td>FOREIGN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
|
||||
|
@ -151,6 +152,7 @@ Below is a list of all the keywords in Spark SQL.
|
|||
<tr><td>JOIN</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
|
||||
<tr><td>KEYS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
|
||||
<tr><td>LAST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
|
||||
<tr><td>LAST_VALUE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
|
||||
<tr><td>LATERAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
|
||||
<tr><td>LAZY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
|
||||
<tr><td>LEADING</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
|
||||
|
@ -219,6 +221,7 @@ Below is a list of all the keywords in Spark SQL.
|
|||
<tr><td>REPAIR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
|
||||
<tr><td>REPLACE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
|
||||
<tr><td>RESET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
|
||||
<tr><td>RESPECT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
|
||||
<tr><td>RESTRICT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
|
||||
<tr><td>REVOKE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
|
||||
<tr><td>RIGHT</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
|
||||
|
|
|
@ -680,8 +680,8 @@ primaryExpression
|
|||
| CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
|
||||
| CAST '(' expression AS dataType ')' #cast
|
||||
| STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')' #struct
|
||||
| FIRST '(' expression (IGNORE NULLS)? ')' #first
|
||||
| LAST '(' expression (IGNORE NULLS)? ')' #last
|
||||
| (FIRST | FIRST_VALUE) '(' expression ((IGNORE | RESPECT) NULLS)? ')' #first
|
||||
| (LAST | LAST_VALUE) '(' expression ((IGNORE | RESPECT) NULLS)? ')' #last
|
||||
| POSITION '(' substr=valueExpression IN str=valueExpression ')' #position
|
||||
| constant #constantDefault
|
||||
| ASTERISK #star
|
||||
|
@ -1023,6 +1023,7 @@ ansiNonReserved
|
|||
| REPAIR
|
||||
| REPLACE
|
||||
| RESET
|
||||
| RESPECT
|
||||
| RESTRICT
|
||||
| REVOKE
|
||||
| RLIKE
|
||||
|
@ -1184,6 +1185,7 @@ nonReserved
|
|||
| FIELDS
|
||||
| FILEFORMAT
|
||||
| FIRST
|
||||
| FIRST_VALUE
|
||||
| FOLLOWING
|
||||
| FOR
|
||||
| FOREIGN
|
||||
|
@ -1214,6 +1216,7 @@ nonReserved
|
|||
| ITEMS
|
||||
| KEYS
|
||||
| LAST
|
||||
| LAST_VALUE
|
||||
| LATERAL
|
||||
| LAZY
|
||||
| LEADING
|
||||
|
@ -1278,6 +1281,7 @@ nonReserved
|
|||
| REPAIR
|
||||
| REPLACE
|
||||
| RESET
|
||||
| RESPECT
|
||||
| RESTRICT
|
||||
| REVOKE
|
||||
| RLIKE
|
||||
|
@ -1435,6 +1439,7 @@ FETCH: 'FETCH';
|
|||
FIELDS: 'FIELDS';
|
||||
FILEFORMAT: 'FILEFORMAT';
|
||||
FIRST: 'FIRST';
|
||||
FIRST_VALUE: 'FIRST_VALUE';
|
||||
FOLLOWING: 'FOLLOWING';
|
||||
FOR: 'FOR';
|
||||
FOREIGN: 'FOREIGN';
|
||||
|
@ -1469,6 +1474,7 @@ ITEMS: 'ITEMS';
|
|||
JOIN: 'JOIN';
|
||||
KEYS: 'KEYS';
|
||||
LAST: 'LAST';
|
||||
LAST_VALUE: 'LAST_VALUE';
|
||||
LATERAL: 'LATERAL';
|
||||
LAZY: 'LAZY';
|
||||
LEADING: 'LEADING';
|
||||
|
@ -1536,6 +1542,7 @@ RENAME: 'RENAME';
|
|||
REPAIR: 'REPAIR';
|
||||
REPLACE: 'REPLACE';
|
||||
RESET: 'RESET';
|
||||
RESPECT: 'RESPECT';
|
||||
RESTRICT: 'RESTRICT';
|
||||
REVOKE: 'REVOKE';
|
||||
RIGHT: 'RIGHT';
|
||||
|
|
|
@ -737,6 +737,15 @@ class ExpressionParserSuite extends AnalysisTest {
|
|||
assertEqual("last(a)", Last('a, Literal(false)).toAggregateExpression())
|
||||
}
|
||||
|
||||
test("Support respect nulls keywords for first_value and last_value") {
|
||||
assertEqual("first_value(a ignore nulls)", First('a, Literal(true)).toAggregateExpression())
|
||||
assertEqual("first_value(a respect nulls)", First('a, Literal(false)).toAggregateExpression())
|
||||
assertEqual("first_value(a)", First('a, Literal(false)).toAggregateExpression())
|
||||
assertEqual("last_value(a ignore nulls)", Last('a, Literal(true)).toAggregateExpression())
|
||||
assertEqual("last_value(a respect nulls)", Last('a, Literal(false)).toAggregateExpression())
|
||||
assertEqual("last_value(a)", Last('a, Literal(false)).toAggregateExpression())
|
||||
}
|
||||
|
||||
test("timestamp literals") {
|
||||
DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
|
||||
withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone.getID) {
|
||||
|
|
|
@ -381,6 +381,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
|
|||
"fields",
|
||||
"fileformat",
|
||||
"first",
|
||||
"first_value",
|
||||
"following",
|
||||
"for",
|
||||
"foreign",
|
||||
|
@ -415,6 +416,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
|
|||
"join",
|
||||
"keys",
|
||||
"last",
|
||||
"last_value",
|
||||
"lateral",
|
||||
"lazy",
|
||||
"leading",
|
||||
|
@ -483,6 +485,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
|
|||
"repair",
|
||||
"replace",
|
||||
"reset",
|
||||
"respect",
|
||||
"restrict",
|
||||
"revoke",
|
||||
"right",
|
||||
|
@ -579,6 +582,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
|
|||
"except",
|
||||
"false",
|
||||
"fetch",
|
||||
"first_value",
|
||||
"for",
|
||||
"foreign",
|
||||
"from",
|
||||
|
@ -593,6 +597,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
|
|||
"into",
|
||||
"join",
|
||||
"is",
|
||||
"last_value",
|
||||
"leading",
|
||||
"left",
|
||||
"minute",
|
||||
|
|
Loading…
Reference in a new issue