[SPARK-21132][SQL] DISTINCT modifier of function arguments should not be silently ignored
### What changes were proposed in this pull request? We should not silently ignore `DISTINCT` when they are not supported in the function arguments. This PR is to block these cases and issue the error messages. ### How was this patch tested? Added test cases for both regular functions and window functions Author: Xiao Li <gatorsmile@gmail.com> Closes #18340 from gatorsmile/firstCount.
This commit is contained in:
parent
ea542d29b2
commit
9413b84b5a
|
@ -1206,11 +1206,21 @@ class Analyzer(
|
||||||
// AggregateWindowFunctions are AggregateFunctions that can only be evaluated within
|
// AggregateWindowFunctions are AggregateFunctions that can only be evaluated within
|
||||||
// the context of a Window clause. They do not need to be wrapped in an
|
// the context of a Window clause. They do not need to be wrapped in an
|
||||||
// AggregateExpression.
|
// AggregateExpression.
|
||||||
case wf: AggregateWindowFunction => wf
|
case wf: AggregateWindowFunction =>
|
||||||
|
if (isDistinct) {
|
||||||
|
failAnalysis(s"${wf.prettyName} does not support the modifier DISTINCT")
|
||||||
|
} else {
|
||||||
|
wf
|
||||||
|
}
|
||||||
// We get an aggregate function, we need to wrap it in an AggregateExpression.
|
// We get an aggregate function, we need to wrap it in an AggregateExpression.
|
||||||
case agg: AggregateFunction => AggregateExpression(agg, Complete, isDistinct)
|
case agg: AggregateFunction => AggregateExpression(agg, Complete, isDistinct)
|
||||||
// This function is not an aggregate function, just return the resolved one.
|
// This function is not an aggregate function, just return the resolved one.
|
||||||
case other => other
|
case other =>
|
||||||
|
if (isDistinct) {
|
||||||
|
failAnalysis(s"${other.prettyName} does not support the modifier DISTINCT")
|
||||||
|
} else {
|
||||||
|
other
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,8 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
|
||||||
import org.apache.spark.sql.catalyst.dsl.plans._
|
import org.apache.spark.sql.catalyst.dsl.plans._
|
||||||
import org.apache.spark.sql.catalyst.expressions._
|
import org.apache.spark.sql.catalyst.expressions._
|
||||||
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Count, Max}
|
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Count, Max}
|
||||||
import org.apache.spark.sql.catalyst.plans.{Cross, Inner, LeftOuter, RightOuter}
|
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
|
||||||
|
import org.apache.spark.sql.catalyst.plans.{Cross, LeftOuter, RightOuter}
|
||||||
import org.apache.spark.sql.catalyst.plans.logical._
|
import org.apache.spark.sql.catalyst.plans.logical._
|
||||||
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData}
|
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData}
|
||||||
import org.apache.spark.sql.types._
|
import org.apache.spark.sql.types._
|
||||||
|
@ -152,7 +153,7 @@ class AnalysisErrorSuite extends AnalysisTest {
|
||||||
"not supported within a window function" :: Nil)
|
"not supported within a window function" :: Nil)
|
||||||
|
|
||||||
errorTest(
|
errorTest(
|
||||||
"distinct window function",
|
"distinct aggregate function in window",
|
||||||
testRelation2.select(
|
testRelation2.select(
|
||||||
WindowExpression(
|
WindowExpression(
|
||||||
AggregateExpression(Count(UnresolvedAttribute("b")), Complete, isDistinct = true),
|
AggregateExpression(Count(UnresolvedAttribute("b")), Complete, isDistinct = true),
|
||||||
|
@ -162,6 +163,16 @@ class AnalysisErrorSuite extends AnalysisTest {
|
||||||
UnspecifiedFrame)).as('window)),
|
UnspecifiedFrame)).as('window)),
|
||||||
"Distinct window functions are not supported" :: Nil)
|
"Distinct window functions are not supported" :: Nil)
|
||||||
|
|
||||||
|
errorTest(
|
||||||
|
"distinct function",
|
||||||
|
CatalystSqlParser.parsePlan("SELECT hex(DISTINCT a) FROM TaBlE"),
|
||||||
|
"hex does not support the modifier DISTINCT" :: Nil)
|
||||||
|
|
||||||
|
errorTest(
|
||||||
|
"distinct window function",
|
||||||
|
CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) over () FROM TaBlE"),
|
||||||
|
"percent_rank does not support the modifier DISTINCT" :: Nil)
|
||||||
|
|
||||||
errorTest(
|
errorTest(
|
||||||
"nested aggregate functions",
|
"nested aggregate functions",
|
||||||
testRelation.groupBy('a)(
|
testRelation.groupBy('a)(
|
||||||
|
|
|
@ -17,10 +17,11 @@
|
||||||
|
|
||||||
package org.apache.spark.sql.catalyst.analysis
|
package org.apache.spark.sql.catalyst.analysis
|
||||||
|
|
||||||
|
import java.net.URI
|
||||||
import java.util.Locale
|
import java.util.Locale
|
||||||
|
|
||||||
import org.apache.spark.sql.AnalysisException
|
import org.apache.spark.sql.AnalysisException
|
||||||
import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
|
import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
|
||||||
import org.apache.spark.sql.catalyst.plans.PlanTest
|
import org.apache.spark.sql.catalyst.plans.PlanTest
|
||||||
import org.apache.spark.sql.catalyst.plans.logical._
|
import org.apache.spark.sql.catalyst.plans.logical._
|
||||||
import org.apache.spark.sql.internal.SQLConf
|
import org.apache.spark.sql.internal.SQLConf
|
||||||
|
@ -32,7 +33,10 @@ trait AnalysisTest extends PlanTest {
|
||||||
|
|
||||||
private def makeAnalyzer(caseSensitive: Boolean): Analyzer = {
|
private def makeAnalyzer(caseSensitive: Boolean): Analyzer = {
|
||||||
val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
|
val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
|
||||||
val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
|
val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf)
|
||||||
|
catalog.createDatabase(
|
||||||
|
CatalogDatabase("default", "", new URI("loc"), Map.empty),
|
||||||
|
ignoreIfExists = false)
|
||||||
catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true)
|
catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true)
|
||||||
catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true)
|
catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true)
|
||||||
catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true)
|
catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true)
|
||||||
|
|
Loading…
Reference in a new issue