From 8b4cc90c44d561b59bcb042025eae337657f10f9 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Thu, 2 Sep 2021 22:32:31 +0800 Subject: [PATCH] [SPARK-36637][SQL] Provide proper error message when use undefined window frame ### What changes were proposed in this pull request? Two case of using undefined window frame as below should provide proper error message 1. For case using undefined window frame with window function ``` SELECT nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays; ``` origin error message is ``` Window function nth_value(employee_name#x, 2, false) requires an OVER clause. ``` It's confused that in use use a window frame `w` but it's not defined. Now the error message is ``` Window specification w is not defined in the WINDOW clause. ``` 2. For case using undefined window frame with aggregation function ``` SELECT SUM(salary) OVER w sum_salary FROM basic_pays; ``` origin error message is ``` Error in query: unresolved operator 'Aggregate [unresolvedwindowexpression(sum(salary#2), WindowSpecReference(w)) AS sum_salary#34] +- SubqueryAlias spark_catalog.default.basic_pays +- HiveTableRelation [`default`.`employees`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [name#0, dept#1, salary#2, age#3], Partition Cols: []] ``` In this case, when convert GlobalAggregate, should skip UnresolvedWindowExpression Now the error message is ``` Window specification w is not defined in the WINDOW clause. ``` ### Why are the changes needed? Provide proper error message ### Does this PR introduce _any_ user-facing change? Yes, error messages are improved as described in desc ### How was this patch tested? Added UT Closes #33892 from AngersZhuuuu/SPARK-36637. Authored-by: Angerszhuuuu Signed-off-by: Wenchen Fan (cherry picked from commit 568ad6aa4435ce76ca3b5d9966e64259ea1f9b38) Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 15 +++++++++-- .../expressions/windowExpressions.scala | 4 ++- .../sql/catalyst/trees/TreePatterns.scala | 1 + .../resources/sql-tests/inputs/window.sql | 12 ++++++++- .../sql-tests/results/window.sql.out | 26 ++++++++++++++++++- 5 files changed, 53 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 92018eb106..fa6b2471b8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -437,8 +437,8 @@ class Analyzer(override val catalogManager: CatalogManager) * Substitute child plan with WindowSpecDefinitions. */ object WindowsSubstitution extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning( - _.containsPattern(WITH_WINDOW_DEFINITION), ruleId) { + def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsDownWithPruning( + _.containsAnyPattern(WITH_WINDOW_DEFINITION, UNRESOLVED_WINDOW_EXPRESSION), ruleId) { // Lookup WindowSpecDefinitions. This rule works with unresolved children. case WithWindowDefinition(windowDefinitions, child) => child.resolveExpressions { case UnresolvedWindowExpression(c, WindowSpecReference(windowName)) => @@ -446,6 +446,14 @@ class Analyzer(override val catalogManager: CatalogManager) throw QueryCompilationErrors.windowSpecificationNotDefinedError(windowName)) WindowExpression(c, windowSpecDefinition) } + + case p @ Project(projectList, _) => + projectList.foreach(_.transformDownWithPruning( + _.containsPattern(UNRESOLVED_WINDOW_EXPRESSION), ruleId) { + case UnresolvedWindowExpression(_, windowSpec) => + throw QueryCompilationErrors.windowSpecificationNotDefinedError(windowSpec.name) + }) + p } } @@ -2492,6 +2500,9 @@ class Analyzer(override val catalogManager: CatalogManager) expr.collect { case WindowExpression(ae: AggregateExpression, _) => ae case WindowExpression(e: PythonUDF, _) if PythonUDF.isGroupedAggPandasUDF(e) => e + case UnresolvedWindowExpression(ae: AggregateExpression, _) => ae + case UnresolvedWindowExpression(e: PythonUDF, _) + if PythonUDF.isGroupedAggPandasUDF(e) => e } }.toSet diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index fc2e4493c5..6396fde575 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateFunction, DeclarativeAggregate, NoOp} import org.apache.spark.sql.catalyst.trees.{BinaryLike, LeafLike, TernaryLike, UnaryLike} -import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, WINDOW_EXPRESSION} +import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UNRESOLVED_WINDOW_EXPRESSION, WINDOW_EXPRESSION} import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.types._ @@ -293,6 +293,8 @@ case class UnresolvedWindowExpression( override protected def withNewChildInternal(newChild: Expression): UnresolvedWindowExpression = copy(child = newChild) + + override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_WINDOW_EXPRESSION) } case class WindowExpression( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala index a63278cdd3..7322121e84 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala @@ -120,6 +120,7 @@ object TreePattern extends Enumeration { val UNRESOLVED_ORDINAL: Value = Value val UNRESOLVED_FUNCTION: Value = Value val UNRESOLVED_HINT: Value = Value + val UNRESOLVED_WINDOW_EXPRESSION: Value = Value // Unresolved Plan patterns (Alphabetically ordered) val UNRESOLVED_SUBQUERY_COLUMN_ALIAS: Value = Value diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql index 9766aafe46..666c0577f1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/window.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql @@ -429,4 +429,14 @@ SELECT FROM test_ignore_null WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) -ORDER BY id; \ No newline at end of file +ORDER BY id; + +SELECT + nth_value(employee_name, 2) OVER w second_highest_salary +FROM + basic_pays; + +SELECT + SUM(salary) OVER w sum_salary +FROM + basic_pays; diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index 999bcbe0fe..d781245227 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 53 +-- Number of queries: 55 -- !query @@ -1173,3 +1173,27 @@ b 5 NULL x y z x z a 6 z x y z x v a 7 v x y z x v a 8 NULL x y z x v + + +-- !query +SELECT + nth_value(employee_name, 2) OVER w second_highest_salary +FROM + basic_pays +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Window specification w is not defined in the WINDOW clause. + + +-- !query +SELECT + SUM(salary) OVER w sum_salary +FROM + basic_pays +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Window specification w is not defined in the WINDOW clause.