From b037930952a341f4ed956a8f1839852992feaadc Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Mon, 4 Jan 2021 05:44:00 +0000 Subject: [PATCH] [SPARK-33951][SQL] Distinguish the error between filter and distinct ### What changes were proposed in this pull request? The error messages for specifying filter and distinct for the aggregate function are mixed together and should be separated. This can increase readability and ease of use. ### Why are the changes needed? increase readability and ease of use. ### Does this PR introduce _any_ user-facing change? 'Yes'. ### How was this patch tested? Jenkins test Closes #30982 from beliefer/SPARK-33951. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Wenchen Fan --- .../spark/sql/QueryCompilationErrors.scala | 9 +--- .../sql/catalyst/analysis/Analyzer.scala | 45 +++++++++++-------- .../analysis/higherOrderFunctions.scala | 3 +- .../analysis/AnalysisErrorSuite.scala | 8 ++-- 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala index e4a1f3f8ef..f4c91327a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala @@ -263,13 +263,8 @@ object QueryCompilationErrors { s"its class is $classCanonicalName, which is not a generator.") } - def distinctOrFilterOnlyWithAggregateFunctionError(prettyName: String): Throwable = { - new AnalysisException("DISTINCT or FILTER specified, " + - s"but $prettyName is not an aggregate function") - } - - def ignoreNullsWithUnsupportedFunctionError(prettyName: String): Throwable = { - new AnalysisException(s"Function $prettyName does not support IGNORE NULLS") + def functionWithUnsupportedSyntaxError(prettyName: String, syntax: String): Throwable = { + new AnalysisException(s"Function $prettyName does not support $syntax") } def nonDeterministicFilterInAggregateError(): Throwable = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 5e86368f6f..fdd1cd0146 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2120,24 +2120,30 @@ class Analyzer(override val catalogManager: CatalogManager) // the context of a Window clause. They do not need to be wrapped in an // AggregateExpression. case wf: AggregateWindowFunction => - if (isDistinct || filter.isDefined) { - throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError( - wf.prettyName) + if (isDistinct) { + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + wf.prettyName, "DISTINCT") + } else if (filter.isDefined) { + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + wf.prettyName, "FILTER clause") } else if (ignoreNulls) { wf match { case nthValue: NthValue => nthValue.copy(ignoreNulls = ignoreNulls) case _ => - throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError( - wf.prettyName) + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + wf.prettyName, "IGNORE NULLS") } } else { wf } case owf: FrameLessOffsetWindowFunction => - if (isDistinct || filter.isDefined) { - throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError( - owf.prettyName) + if (isDistinct) { + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + owf.prettyName, "DISTINCT") + } else if (filter.isDefined) { + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + owf.prettyName, "FILTER clause") } else if (ignoreNulls) { owf match { case lead: Lead => @@ -2145,8 +2151,8 @@ class Analyzer(override val catalogManager: CatalogManager) case lag: Lag => lag.copy(ignoreNulls = ignoreNulls) case _ => - throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError( - owf.prettyName) + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + owf.prettyName, "IGNORE NULLS") } } else { owf @@ -2161,20 +2167,23 @@ class Analyzer(override val catalogManager: CatalogManager) case first: First => first.copy(ignoreNulls = ignoreNulls) case last: Last => last.copy(ignoreNulls = ignoreNulls) case _ => - throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError( - agg.prettyName) + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + agg.prettyName, "IGNORE NULLS") } AggregateExpression(aggFunc, Complete, isDistinct, filter) } else { AggregateExpression(agg, Complete, isDistinct, filter) } // This function is not an aggregate function, just return the resolved one. - case other if (isDistinct || filter.isDefined) => - throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError( - other.prettyName) - case other if (ignoreNulls) => - throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError( - other.prettyName) + case other if isDistinct => + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + other.prettyName, "DISTINCT") + case other if filter.isDefined => + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + other.prettyName, "FILTER clause") + case other if ignoreNulls => + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + other.prettyName, "IGNORE NULLS") case e: String2TrimExpression if arguments.size == 2 => if (trimWarningEnabled.get) { log.warn("Two-parameter TRIM/LTRIM/RTRIM function signatures are deprecated." + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala index 6115b4ed5a..7d74c0d1cd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala @@ -41,7 +41,8 @@ case class ResolveHigherOrderFunctions(catalog: SessionCatalog) extends Rule[Log filter.foreach(_.failAnalysis("FILTER predicate specified, " + s"but ${func.prettyName} is not an aggregate function")) if (ignoreNulls) { - throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(func.prettyName) + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + func.prettyName, "IGNORE NULLS") } func case other => other.failAnalysis( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index ec2a8a41bf..01d223d18b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -184,22 +184,22 @@ class AnalysisErrorSuite extends AnalysisTest { errorTest( "distinct function", CatalystSqlParser.parsePlan("SELECT hex(DISTINCT a) FROM TaBlE"), - "DISTINCT or FILTER specified, but hex is not an aggregate function" :: Nil) + "Function hex does not support DISTINCT" :: Nil) errorTest( "non aggregate function with filter predicate", CatalystSqlParser.parsePlan("SELECT hex(a) FILTER (WHERE c = 1) FROM TaBlE2"), - "DISTINCT or FILTER specified, but hex is not an aggregate function" :: Nil) + "Function hex does not support FILTER clause" :: Nil) errorTest( "distinct window function", CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) OVER () FROM TaBlE"), - "DISTINCT or FILTER specified, but percent_rank is not an aggregate function" :: Nil) + "Function percent_rank does not support DISTINCT" :: Nil) errorTest( "window function with filter predicate", CatalystSqlParser.parsePlan("SELECT percent_rank(a) FILTER (WHERE c > 1) OVER () FROM TaBlE2"), - "DISTINCT or FILTER specified, but percent_rank is not an aggregate function" :: Nil) + "Function percent_rank does not support FILTER clause" :: Nil) errorTest( "higher order function with filter predicate",