[SPARK-27951][SQL][FOLLOWUP] Improve the window function nth_value

### What changes were proposed in this pull request?
https://github.com/apache/spark/pull/29604 supports the ANSI SQL NTH_VALUE.
We should override the `prettyName` and `sql`.

### Why are the changes needed?
Make the name of nth_value correct.
To show the ignoreNulls parameter correctly.

### Does this PR introduce _any_ user-facing change?
'No'.

### How was this patch tested?
Jenkins test.

Closes #29886 from beliefer/improve-nth_value.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
gengjiaan 2020-09-29 09:54:43 +09:00 committed by HyukjinKwon
parent 173da5bf11
commit a53fc9b7ae
3 changed files with 5 additions and 3 deletions

View file

@ -669,7 +669,9 @@ case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Bool
override lazy val evaluateExpression: AttributeReference = result
override def toString: String = s"$prettyName($input, $offset)${if (ignoreNulls) " ignore nulls"}"
override def prettyName: String = "nth_value"
override def sql: String =
s"$prettyName(${input.sql}, ${offsetExpr.sql})${if (ignoreNulls) " ignore nulls" else ""}"
}
/**

View file

@ -191,7 +191,7 @@
| org.apache.spark.sql.catalyst.expressions.Not | ! | SELECT ! true | struct<(NOT true):boolean> |
| org.apache.spark.sql.catalyst.expressions.Not | not | SELECT not true | struct<(NOT true):boolean> |
| org.apache.spark.sql.catalyst.expressions.Now | now | SELECT now() | struct<now():timestamp> |
| org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,nthvalue(b, 2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
| org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,nth_value(b, 2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
| org.apache.spark.sql.catalyst.expressions.NullIf | nullif | SELECT nullif(2, 2) | struct<nullif(2, 2):int> |
| org.apache.spark.sql.catalyst.expressions.Nvl | nvl | SELECT nvl(NULL, array('2')) | struct<nvl(NULL, array(2)):array<string>> |
| org.apache.spark.sql.catalyst.expressions.Nvl2 | nvl2 | SELECT nvl2(NULL, 2, 1) | struct<nvl2(NULL, 2, 1):int> |

View file

@ -391,7 +391,7 @@ SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'nthvalue(spark_catalog.default.tenk1.`four`, 0)' due to data type mismatch: The 'offset' argument of nth_value must be greater than zero but it is 0.; line 1 pos 7
cannot resolve 'nth_value(spark_catalog.default.tenk1.`four`, 0)' due to data type mismatch: The 'offset' argument of nth_value must be greater than zero but it is 0.; line 1 pos 7
-- !query