[SPARK-21132][SQL] DISTINCT modifier of function arguments should not be silently ignored

### What changes were proposed in this pull request? We should not silently ignore `DISTINCT` when they are not supported in the function arguments. This PR is to block these cases and issue the error messages. ### How was this patch tested? Added test cases for both regular functions and window functions Author: Xiao Li <gatorsmile@gmail.com> Closes #18340 from gatorsmile/firstCount.
2017-06-19 15:51:21 +08:00 · 2017-06-19 15:51:21 +08:00 · 9413b84b5a
parent ea542d29b2
commit 9413b84b5a
3 changed files with 31 additions and 6 deletions
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@ -1206,11 +1206,21 @@ class Analyzer(
                // AggregateWindowFunctions are AggregateFunctions that can only be evaluated within
                // the context of a Window clause. They do not need to be wrapped in an
                // AggregateExpression.
-                case wf: AggregateWindowFunction => wf
+                case wf: AggregateWindowFunction =>
                  if (isDistinct) {
                    failAnalysis(s"${wf.prettyName} does not support the modifier DISTINCT")
                  } else {
                    wf
                  }
                // We get an aggregate function, we need to wrap it in an AggregateExpression.
                case agg: AggregateFunction => AggregateExpression(agg, Complete, isDistinct)
                // This function is not an aggregate function, just return the resolved one.
-                case other => other
+                case other =>
                  if (isDistinct) {
                    failAnalysis(s"${other.prettyName} does not support the modifier DISTINCT")
                  } else {
                    other
                  }
              }
            }
        }
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@ -24,7 +24,8 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Count, Max}
-import org.apache.spark.sql.catalyst.plans.{Cross, Inner, LeftOuter, RightOuter}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.{Cross, LeftOuter, RightOuter}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData}
 import org.apache.spark.sql.types._
@ -152,7 +153,7 @@ class AnalysisErrorSuite extends AnalysisTest {
    "not supported within a window function" :: Nil)
  errorTest(
-    "distinct window function",
+    "distinct aggregate function in window",
    testRelation2.select(
      WindowExpression(
        AggregateExpression(Count(UnresolvedAttribute("b")), Complete, isDistinct = true),
@ -162,6 +163,16 @@ class AnalysisErrorSuite extends AnalysisTest {
          UnspecifiedFrame)).as('window)),
    "Distinct window functions are not supported" :: Nil)
  errorTest(
    "distinct function",
    CatalystSqlParser.parsePlan("SELECT hex(DISTINCT a) FROM TaBlE"),
    "hex does not support the modifier DISTINCT" :: Nil)
  errorTest(
    "distinct window function",
    CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) over () FROM TaBlE"),
    "percent_rank does not support the modifier DISTINCT" :: Nil)
  errorTest(
    "nested aggregate functions",
    testRelation.groupBy('a)(
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@ -17,10 +17,11 @@
 package org.apache.spark.sql.catalyst.analysis
 import java.net.URI
 import java.util.Locale
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.internal.SQLConf
@ -32,7 +33,10 @@ trait AnalysisTest extends PlanTest {
  private def makeAnalyzer(caseSensitive: Boolean): Analyzer = {
    val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
-    val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
+    val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf)
    catalog.createDatabase(
      CatalogDatabase("default", "", new URI("loc"), Map.empty),
      ignoreIfExists = false)
    catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true)
    catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true)
    catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true)