[SPARK-30789][SQL] Support (IGNORE | RESPECT) NULLS for LEAD/LAG/NTH_VALUE/FIRST_VALUE/LAST_VALUE
### What changes were proposed in this pull request? All of `LEAD`/`LAG`/`NTH_VALUE`/`FIRST_VALUE`/`LAST_VALUE` should support IGNORE NULLS | RESPECT NULLS. For example: ``` LEAD (value_expr [, offset ]) [ IGNORE NULLS | RESPECT NULLS ] OVER ( [ PARTITION BY window_partition ] ORDER BY window_ordering ) ``` ``` LAG (value_expr [, offset ]) [ IGNORE NULLS | RESPECT NULLS ] OVER ( [ PARTITION BY window_partition ] ORDER BY window_ordering ) ``` ``` NTH_VALUE (expr, offset) [ IGNORE NULLS | RESPECT NULLS ] OVER ( [ PARTITION BY window_partition ] [ ORDER BY window_ordering frame_clause ] ) ``` The mainstream database or engine supports this syntax contains: **Oracle** https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/NTH_VALUE.html#GUID-F8A0E88C-67E5-4AA6-9515-95D03A7F9EA0 **Redshift** https://docs.aws.amazon.com/redshift/latest/dg/r_WF_NTH.html **Presto** https://prestodb.io/docs/current/functions/window.html **DB2** https://www.ibm.com/support/knowledgecenter/SSGU8G_14.1.0/com.ibm.sqls.doc/ids_sqs_1513.htm **Teradata** https://docs.teradata.com/r/756LNiPSFdY~4JcCCcR5Cw/GjCT6l7trjkIEjt~7Dhx4w **Snowflake** https://docs.snowflake.com/en/sql-reference/functions/lead.html https://docs.snowflake.com/en/sql-reference/functions/lag.html https://docs.snowflake.com/en/sql-reference/functions/nth_value.html https://docs.snowflake.com/en/sql-reference/functions/first_value.html https://docs.snowflake.com/en/sql-reference/functions/last_value.html **Exasol** https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/lead.htm https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/lag.htm https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/nth_value.htm https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/first_value.htm https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/last_value.htm ### Why are the changes needed? Support `(IGNORE | RESPECT) NULLS` for `LEAD`/`LAG`/`NTH_VALUE`/`FIRST_VALUE`/`LAST_VALUE `is very useful. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Jenkins test Closes #30943 from beliefer/SPARK-30789. Lead-authored-by: gengjiaan <gengjiaan@360.cn> Co-authored-by: beliefer <beliefer@163.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
2afd1fb492
commit
ba974ea8e4
|
@ -363,6 +363,7 @@ Below is a list of all the keywords in Spark SQL.
|
|||
|REPAIR|non-reserved|non-reserved|non-reserved|
|
||||
|REPLACE|non-reserved|non-reserved|non-reserved|
|
||||
|RESET|non-reserved|non-reserved|non-reserved|
|
||||
|RESPECT|non-reserved|non-reserved|non-reserved|
|
||||
|RESTRICT|non-reserved|non-reserved|non-reserved|
|
||||
|REVOKE|non-reserved|non-reserved|reserved|
|
||||
|RIGHT|reserved|strict-non-reserved|reserved|
|
||||
|
|
|
@ -803,7 +803,8 @@ primaryExpression
|
|||
| '(' namedExpression (',' namedExpression)+ ')' #rowConstructor
|
||||
| '(' query ')' #subqueryExpression
|
||||
| functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
|
||||
(FILTER '(' WHERE where=booleanExpression ')')? (OVER windowSpec)? #functionCall
|
||||
(FILTER '(' WHERE where=booleanExpression ')')?
|
||||
(nullsOption=(IGNORE | RESPECT) NULLS)? ( OVER windowSpec)? #functionCall
|
||||
| identifier '->' expression #lambda
|
||||
| '(' identifier (',' identifier)+ ')' '->' expression #lambda
|
||||
| value=primaryExpression '[' index=valueExpression ']' #subscript
|
||||
|
@ -1143,6 +1144,7 @@ ansiNonReserved
|
|||
| REPAIR
|
||||
| REPLACE
|
||||
| RESET
|
||||
| RESPECT
|
||||
| RESTRICT
|
||||
| REVOKE
|
||||
| RLIKE
|
||||
|
@ -1397,6 +1399,7 @@ nonReserved
|
|||
| REPAIR
|
||||
| REPLACE
|
||||
| RESET
|
||||
| RESPECT
|
||||
| RESTRICT
|
||||
| REVOKE
|
||||
| RLIKE
|
||||
|
@ -1651,6 +1654,7 @@ RENAME: 'RENAME';
|
|||
REPAIR: 'REPAIR';
|
||||
REPLACE: 'REPLACE';
|
||||
RESET: 'RESET';
|
||||
RESPECT: 'RESPECT';
|
||||
RESTRICT: 'RESTRICT';
|
||||
REVOKE: 'REVOKE';
|
||||
RIGHT: 'RIGHT';
|
||||
|
|
|
@ -268,6 +268,10 @@ object QueryCompilationErrors {
|
|||
s"but $prettyName is not an aggregate function")
|
||||
}
|
||||
|
||||
def ignoreNullsWithUnsupportedFunctionError(prettyName: String): Throwable = {
|
||||
new AnalysisException(s"Function $prettyName does not support IGNORE NULLS")
|
||||
}
|
||||
|
||||
def nonDeterministicFilterInAggregateError(): Throwable = {
|
||||
new AnalysisException("FILTER expression is non-deterministic, " +
|
||||
"it cannot be used in aggregate functions")
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.spark.sql.AnalysisException
|
|||
import org.apache.spark.sql.catalyst._
|
||||
import org.apache.spark.sql.catalyst.catalog._
|
||||
import org.apache.spark.sql.catalyst.encoders.OuterScopes
|
||||
import org.apache.spark.sql.catalyst.expressions._
|
||||
import org.apache.spark.sql.catalyst.expressions.{FrameLessOffsetWindowFunction, _}
|
||||
import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
|
||||
import org.apache.spark.sql.catalyst.expressions.aggregate._
|
||||
import org.apache.spark.sql.catalyst.expressions.objects._
|
||||
|
@ -2113,7 +2113,7 @@ class Analyzer(override val catalogManager: CatalogManager)
|
|||
name, other.getClass.getCanonicalName)
|
||||
}
|
||||
}
|
||||
case u @ UnresolvedFunction(funcId, arguments, isDistinct, filter) =>
|
||||
case u @ UnresolvedFunction(funcId, arguments, isDistinct, filter, ignoreNulls) =>
|
||||
withPosition(u) {
|
||||
v1SessionCatalog.lookupFunction(funcId, arguments) match {
|
||||
// AggregateWindowFunctions are AggregateFunctions that can only be evaluated within
|
||||
|
@ -2123,19 +2123,58 @@ class Analyzer(override val catalogManager: CatalogManager)
|
|||
if (isDistinct || filter.isDefined) {
|
||||
throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
|
||||
wf.prettyName)
|
||||
} else if (ignoreNulls) {
|
||||
wf match {
|
||||
case nthValue: NthValue =>
|
||||
nthValue.copy(ignoreNulls = ignoreNulls)
|
||||
case _ =>
|
||||
throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
|
||||
wf.prettyName)
|
||||
}
|
||||
} else {
|
||||
wf
|
||||
}
|
||||
case owf: FrameLessOffsetWindowFunction =>
|
||||
if (isDistinct || filter.isDefined) {
|
||||
throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
|
||||
owf.prettyName)
|
||||
} else if (ignoreNulls) {
|
||||
owf match {
|
||||
case lead: Lead =>
|
||||
lead.copy(ignoreNulls = ignoreNulls)
|
||||
case lag: Lag =>
|
||||
lag.copy(ignoreNulls = ignoreNulls)
|
||||
case _ =>
|
||||
throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
|
||||
owf.prettyName)
|
||||
}
|
||||
} else {
|
||||
owf
|
||||
}
|
||||
// We get an aggregate function, we need to wrap it in an AggregateExpression.
|
||||
case agg: AggregateFunction =>
|
||||
if (filter.isDefined && !filter.get.deterministic) {
|
||||
throw QueryCompilationErrors.nonDeterministicFilterInAggregateError
|
||||
}
|
||||
if (ignoreNulls) {
|
||||
val aggFunc = agg match {
|
||||
case first: First => first.copy(ignoreNulls = ignoreNulls)
|
||||
case last: Last => last.copy(ignoreNulls = ignoreNulls)
|
||||
case _ =>
|
||||
throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
|
||||
agg.prettyName)
|
||||
}
|
||||
AggregateExpression(aggFunc, Complete, isDistinct, filter)
|
||||
} else {
|
||||
AggregateExpression(agg, Complete, isDistinct, filter)
|
||||
}
|
||||
// This function is not an aggregate function, just return the resolved one.
|
||||
case other if (isDistinct || filter.isDefined) =>
|
||||
throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
|
||||
other.prettyName)
|
||||
case other if (ignoreNulls) =>
|
||||
throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
|
||||
other.prettyName)
|
||||
case e: String2TrimExpression if arguments.size == 2 =>
|
||||
if (trimWarningEnabled.get) {
|
||||
log.warn("Two-parameter TRIM/LTRIM/RTRIM function signatures are deprecated." +
|
||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.catalog.SessionCatalog
|
|||
import org.apache.spark.sql.catalyst.expressions._
|
||||
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
|
||||
import org.apache.spark.sql.catalyst.rules.Rule
|
||||
import org.apache.spark.sql.errors.QueryCompilationErrors
|
||||
import org.apache.spark.sql.types.DataType
|
||||
|
||||
/**
|
||||
|
@ -32,13 +33,16 @@ import org.apache.spark.sql.types.DataType
|
|||
case class ResolveHigherOrderFunctions(catalog: SessionCatalog) extends Rule[LogicalPlan] {
|
||||
|
||||
override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveExpressions {
|
||||
case u @ UnresolvedFunction(fn, children, false, filter)
|
||||
case u @ UnresolvedFunction(fn, children, false, filter, ignoreNulls)
|
||||
if hasLambdaAndResolvedArguments(children) =>
|
||||
withPosition(u) {
|
||||
catalog.lookupFunction(fn, children) match {
|
||||
case func: HigherOrderFunction =>
|
||||
filter.foreach(_.failAnalysis("FILTER predicate specified, " +
|
||||
s"but ${func.prettyName} is not an aggregate function"))
|
||||
if (ignoreNulls) {
|
||||
throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(func.prettyName)
|
||||
}
|
||||
func
|
||||
case other => other.failAnalysis(
|
||||
"A lambda function should only be used in a higher order function. However, " +
|
||||
|
|
|
@ -258,7 +258,8 @@ case class UnresolvedFunction(
|
|||
name: FunctionIdentifier,
|
||||
arguments: Seq[Expression],
|
||||
isDistinct: Boolean,
|
||||
filter: Option[Expression] = None)
|
||||
filter: Option[Expression] = None,
|
||||
ignoreNulls: Boolean = false)
|
||||
extends Expression with Unevaluable {
|
||||
|
||||
override def children: Seq[Expression] = arguments ++ filter.toSeq
|
||||
|
|
|
@ -1697,8 +1697,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
|
|||
expressions
|
||||
}
|
||||
val filter = Option(ctx.where).map(expression(_))
|
||||
val ignoreNulls =
|
||||
Option(ctx.nullsOption).map(_.getType == SqlBaseParser.IGNORE).getOrElse(false)
|
||||
val function = UnresolvedFunction(
|
||||
getFunctionIdentifier(ctx.functionName), arguments, isDistinct, filter)
|
||||
getFunctionIdentifier(ctx.functionName), arguments, isDistinct, filter, ignoreNulls)
|
||||
|
||||
// Check if the function is evaluated in a windowed context.
|
||||
ctx.windowSpec match {
|
||||
|
|
|
@ -212,6 +212,26 @@ class AnalysisErrorSuite extends AnalysisTest {
|
|||
CatalystSqlParser.parsePlan("SELECT count(a) FILTER (WHERE rand(int(c)) > 1) FROM TaBlE2"),
|
||||
"FILTER expression is non-deterministic, it cannot be used in aggregate functions" :: Nil)
|
||||
|
||||
errorTest(
|
||||
"function don't support ignore nulls",
|
||||
CatalystSqlParser.parsePlan("SELECT hex(a) IGNORE NULLS FROM TaBlE2"),
|
||||
"Function hex does not support IGNORE NULLS" :: Nil)
|
||||
|
||||
errorTest(
|
||||
"some window function don't support ignore nulls",
|
||||
CatalystSqlParser.parsePlan("SELECT percent_rank(a) IGNORE NULLS FROM TaBlE2"),
|
||||
"Function percent_rank does not support IGNORE NULLS" :: Nil)
|
||||
|
||||
errorTest(
|
||||
"aggregate function don't support ignore nulls",
|
||||
CatalystSqlParser.parsePlan("SELECT count(a) IGNORE NULLS FROM TaBlE2"),
|
||||
"Function count does not support IGNORE NULLS" :: Nil)
|
||||
|
||||
errorTest(
|
||||
"higher order function don't support ignore nulls",
|
||||
CatalystSqlParser.parsePlan("SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) " +
|
||||
"IGNORE NULLS"), "Function aggregate does not support IGNORE NULLS" :: Nil)
|
||||
|
||||
errorTest(
|
||||
"nested aggregate functions",
|
||||
testRelation.groupBy($"a")(
|
||||
|
|
|
@ -36,6 +36,18 @@ CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES
|
|||
('Barry Jones','SCM',10586)
|
||||
AS basic_pays(employee_name, department, salary);
|
||||
|
||||
CREATE OR REPLACE TEMPORARY VIEW test_ignore_null AS SELECT * FROM VALUES
|
||||
('a', 0, null),
|
||||
('a', 1, 'x'),
|
||||
('b', 2, null),
|
||||
('c', 3, null),
|
||||
('a', 4, 'y'),
|
||||
('b', 5, null),
|
||||
('a', 6, 'z'),
|
||||
('a', 7, 'v'),
|
||||
('a', 8, null)
|
||||
AS test_ignore_null(content, id, v);
|
||||
|
||||
-- RowsBetween
|
||||
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
|
||||
ORDER BY cate, val;
|
||||
|
@ -263,3 +275,137 @@ WINDOW
|
|||
w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
|
||||
w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING)
|
||||
ORDER BY salary DESC;
|
||||
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
lead(v, 0) IGNORE NULLS OVER w lead_0,
|
||||
lead(v, 1) IGNORE NULLS OVER w lead_1,
|
||||
lead(v, 2) IGNORE NULLS OVER w lead_2,
|
||||
lead(v, 3) IGNORE NULLS OVER w lead_3,
|
||||
lag(v, 0) IGNORE NULLS OVER w lag_0,
|
||||
lag(v, 1) IGNORE NULLS OVER w lag_1,
|
||||
lag(v, 2) IGNORE NULLS OVER w lag_2,
|
||||
lag(v, 3) IGNORE NULLS OVER w lag_3,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id)
|
||||
ORDER BY id;
|
||||
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
|
||||
ORDER BY id;
|
||||
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
|
||||
ORDER BY id;
|
||||
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING)
|
||||
ORDER BY id;
|
||||
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
|
||||
ORDER BY id;
|
||||
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
|
||||
ORDER BY id;
|
||||
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
|
||||
ORDER BY id;
|
||||
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
|
||||
ORDER BY id;
|
||||
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
|
||||
ORDER BY id;
|
|
@ -1,5 +1,5 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 36
|
||||
-- Number of queries: 46
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -46,6 +46,24 @@ struct<>
|
|||
|
||||
|
||||
|
||||
-- !query
|
||||
CREATE OR REPLACE TEMPORARY VIEW test_ignore_null AS SELECT * FROM VALUES
|
||||
('a', 0, null),
|
||||
('a', 1, 'x'),
|
||||
('b', 2, null),
|
||||
('c', 3, null),
|
||||
('a', 4, 'y'),
|
||||
('b', 5, null),
|
||||
('a', 6, 'z'),
|
||||
('a', 7, 'v'),
|
||||
('a', 8, null)
|
||||
AS test_ignore_null(content, id, v)
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
|
||||
ORDER BY cate, val
|
||||
|
@ -776,3 +794,263 @@ WINDOW
|
|||
w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
|
||||
w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING)
|
||||
ORDER BY salary DESC
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
lead(v, 0) IGNORE NULLS OVER w lead_0,
|
||||
lead(v, 1) IGNORE NULLS OVER w lead_1,
|
||||
lead(v, 2) IGNORE NULLS OVER w lead_2,
|
||||
lead(v, 3) IGNORE NULLS OVER w lead_3,
|
||||
lag(v, 0) IGNORE NULLS OVER w lag_0,
|
||||
lag(v, 1) IGNORE NULLS OVER w lag_1,
|
||||
lag(v, 2) IGNORE NULLS OVER w lag_2,
|
||||
lag(v, 3) IGNORE NULLS OVER w lag_3,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id)
|
||||
ORDER BY id
|
||||
-- !query schema
|
||||
struct<content:string,id:int,v:string,lead_0:string,lead_1:string,lead_2:string,lead_3:string,lag_0:string,lag_1:string,lag_2:string,lag_3:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
|
||||
-- !query output
|
||||
a 0 NULL NULL x y z NULL NULL NULL NULL NULL NULL NULL NULL NULL
|
||||
a 1 x x y z v x NULL NULL NULL x NULL NULL x x
|
||||
b 2 NULL NULL y z v NULL x NULL NULL x NULL NULL x x
|
||||
c 3 NULL NULL y z v NULL x NULL NULL x NULL NULL x x
|
||||
a 4 y y z v NULL y x NULL NULL x y NULL x y
|
||||
b 5 NULL NULL z v NULL NULL y x NULL x y NULL x y
|
||||
a 6 z z v NULL NULL z y x NULL x y z x z
|
||||
a 7 v v NULL NULL NULL v z y x x y z x v
|
||||
a 8 NULL NULL NULL NULL NULL NULL v z y x y z x v
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
|
||||
ORDER BY id
|
||||
-- !query schema
|
||||
struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
|
||||
-- !query output
|
||||
a 0 NULL NULL NULL NULL NULL NULL
|
||||
a 1 x x NULL NULL x x
|
||||
b 2 NULL x NULL NULL x x
|
||||
c 3 NULL x NULL NULL x x
|
||||
a 4 y x y NULL x y
|
||||
b 5 NULL x y NULL x y
|
||||
a 6 z x y z x z
|
||||
a 7 v x y z x v
|
||||
a 8 NULL x y z x v
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
|
||||
ORDER BY id
|
||||
-- !query schema
|
||||
struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
|
||||
-- !query output
|
||||
a 0 NULL NULL NULL NULL NULL NULL
|
||||
a 1 x x NULL NULL x x
|
||||
b 2 NULL x NULL NULL x x
|
||||
c 3 NULL x NULL NULL x x
|
||||
a 4 y x y NULL x y
|
||||
b 5 NULL x y NULL x y
|
||||
a 6 z x y z x z
|
||||
a 7 v x y z x v
|
||||
a 8 NULL x y z x v
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING)
|
||||
ORDER BY id
|
||||
-- !query schema
|
||||
struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
|
||||
-- !query output
|
||||
a 0 NULL x NULL NULL x x
|
||||
a 1 x x NULL NULL x x
|
||||
b 2 NULL x y NULL x y
|
||||
c 3 NULL x y NULL x y
|
||||
a 4 y y z NULL y z
|
||||
b 5 NULL y z v y v
|
||||
a 6 z y z v y v
|
||||
a 7 v z v NULL z v
|
||||
a 8 NULL z v NULL z v
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
|
||||
ORDER BY id
|
||||
-- !query schema
|
||||
struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
|
||||
-- !query output
|
||||
a 0 NULL x NULL NULL x x
|
||||
a 1 x x NULL NULL x x
|
||||
b 2 NULL x y NULL x y
|
||||
c 3 NULL x y NULL x y
|
||||
a 4 y y z NULL y z
|
||||
b 5 NULL y z v y v
|
||||
a 6 z y z v y v
|
||||
a 7 v z v NULL z v
|
||||
a 8 NULL z v NULL z v
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
|
||||
ORDER BY id
|
||||
-- !query schema
|
||||
struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
|
||||
-- !query output
|
||||
a 0 NULL x y z x v
|
||||
a 1 x x y z x v
|
||||
b 2 NULL y z v y v
|
||||
c 3 NULL y z v y v
|
||||
a 4 y y z v y v
|
||||
b 5 NULL z v NULL z v
|
||||
a 6 z z v NULL z v
|
||||
a 7 v v NULL NULL v v
|
||||
a 8 NULL NULL NULL NULL NULL NULL
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
|
||||
ORDER BY id
|
||||
-- !query schema
|
||||
struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
|
||||
-- !query output
|
||||
a 0 NULL x y z x v
|
||||
a 1 x x y z x v
|
||||
b 2 NULL x y z x v
|
||||
c 3 NULL x y z x v
|
||||
a 4 y x y z x v
|
||||
b 5 NULL x y z x v
|
||||
a 6 z x y z x v
|
||||
a 7 v x y z x v
|
||||
a 8 NULL x y z x v
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
|
||||
ORDER BY id
|
||||
-- !query schema
|
||||
struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
|
||||
-- !query output
|
||||
a 0 NULL x y z x v
|
||||
a 1 x x y z x v
|
||||
b 2 NULL x y z x v
|
||||
c 3 NULL x y z x v
|
||||
a 4 y x y z x v
|
||||
b 5 NULL x y z x v
|
||||
a 6 z x y z x v
|
||||
a 7 v x y z x v
|
||||
a 8 NULL x y z x v
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT
|
||||
content,
|
||||
id,
|
||||
v,
|
||||
nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
|
||||
nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
|
||||
nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
|
||||
first_value(v) IGNORE NULLS OVER w first_value,
|
||||
last_value(v) IGNORE NULLS OVER w last_value
|
||||
FROM
|
||||
test_ignore_null
|
||||
WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
|
||||
ORDER BY id
|
||||
-- !query schema
|
||||
struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
|
||||
-- !query output
|
||||
a 0 NULL x NULL NULL x x
|
||||
a 1 x x NULL NULL x x
|
||||
b 2 NULL x NULL NULL x x
|
||||
c 3 NULL x y NULL x y
|
||||
a 4 y x y NULL x y
|
||||
b 5 NULL x y z x z
|
||||
a 6 z x y z x v
|
||||
a 7 v x y z x v
|
||||
a 8 NULL x y z x v
|
Loading…
Reference in a new issue