[SPARK-21132][SQL] DISTINCT modifier of function arguments should not be silently ignored

### What changes were proposed in this pull request?
We should not silently ignore `DISTINCT` when they are not supported in the function arguments. This PR is to block these cases and issue the error messages.

### How was this patch tested?
Added test cases for both regular functions and window functions

Author: Xiao Li <gatorsmile@gmail.com>

Closes #18340 from gatorsmile/firstCount.
This commit is contained in:
Xiao Li 2017-06-19 15:51:21 +08:00 committed by Wenchen Fan
parent ea542d29b2
commit 9413b84b5a
3 changed files with 31 additions and 6 deletions

View file

@ -1206,11 +1206,21 @@ class Analyzer(
// AggregateWindowFunctions are AggregateFunctions that can only be evaluated within // AggregateWindowFunctions are AggregateFunctions that can only be evaluated within
// the context of a Window clause. They do not need to be wrapped in an // the context of a Window clause. They do not need to be wrapped in an
// AggregateExpression. // AggregateExpression.
case wf: AggregateWindowFunction => wf case wf: AggregateWindowFunction =>
if (isDistinct) {
failAnalysis(s"${wf.prettyName} does not support the modifier DISTINCT")
} else {
wf
}
// We get an aggregate function, we need to wrap it in an AggregateExpression. // We get an aggregate function, we need to wrap it in an AggregateExpression.
case agg: AggregateFunction => AggregateExpression(agg, Complete, isDistinct) case agg: AggregateFunction => AggregateExpression(agg, Complete, isDistinct)
// This function is not an aggregate function, just return the resolved one. // This function is not an aggregate function, just return the resolved one.
case other => other case other =>
if (isDistinct) {
failAnalysis(s"${other.prettyName} does not support the modifier DISTINCT")
} else {
other
}
} }
} }
} }

View file

@ -24,7 +24,8 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Count, Max} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Count, Max}
import org.apache.spark.sql.catalyst.plans.{Cross, Inner, LeftOuter, RightOuter} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.catalyst.plans.{Cross, LeftOuter, RightOuter}
import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData} import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData}
import org.apache.spark.sql.types._ import org.apache.spark.sql.types._
@ -152,7 +153,7 @@ class AnalysisErrorSuite extends AnalysisTest {
"not supported within a window function" :: Nil) "not supported within a window function" :: Nil)
errorTest( errorTest(
"distinct window function", "distinct aggregate function in window",
testRelation2.select( testRelation2.select(
WindowExpression( WindowExpression(
AggregateExpression(Count(UnresolvedAttribute("b")), Complete, isDistinct = true), AggregateExpression(Count(UnresolvedAttribute("b")), Complete, isDistinct = true),
@ -162,6 +163,16 @@ class AnalysisErrorSuite extends AnalysisTest {
UnspecifiedFrame)).as('window)), UnspecifiedFrame)).as('window)),
"Distinct window functions are not supported" :: Nil) "Distinct window functions are not supported" :: Nil)
errorTest(
"distinct function",
CatalystSqlParser.parsePlan("SELECT hex(DISTINCT a) FROM TaBlE"),
"hex does not support the modifier DISTINCT" :: Nil)
errorTest(
"distinct window function",
CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) over () FROM TaBlE"),
"percent_rank does not support the modifier DISTINCT" :: Nil)
errorTest( errorTest(
"nested aggregate functions", "nested aggregate functions",
testRelation.groupBy('a)( testRelation.groupBy('a)(

View file

@ -17,10 +17,11 @@
package org.apache.spark.sql.catalyst.analysis package org.apache.spark.sql.catalyst.analysis
import java.net.URI
import java.util.Locale import java.util.Locale
import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf
@ -32,7 +33,10 @@ trait AnalysisTest extends PlanTest {
private def makeAnalyzer(caseSensitive: Boolean): Analyzer = { private def makeAnalyzer(caseSensitive: Boolean): Analyzer = {
val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive) val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf)
catalog.createDatabase(
CatalogDatabase("default", "", new URI("loc"), Map.empty),
ignoreIfExists = false)
catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true) catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true)
catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true) catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true)
catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true) catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true)