[SPARK-21980][SQL] References in grouping functions should be indexed with semanticEquals

## What changes were proposed in this pull request? https://issues.apache.org/jira/browse/SPARK-21980 This PR fixes the issue in ResolveGroupingAnalytics rule, which indexes the column references in grouping functions without considering case sensitive configurations. The problem can be reproduced by: `val df = spark.createDataFrame(Seq((1, 1), (2, 1), (2, 2))).toDF("a", "b") df.cube("a").agg(grouping("A")).show()` ## How was this patch tested? unit tests Author: donnyzone <wellfengzhu@gmail.com> Closes #19202 from DonnyZone/ResolveGroupingAnalytics.
2017-09-13 10:06:53 -07:00 · 2017-09-13 10:06:53 -07:00 · 21c4450fb2
parent b6ef1f57bc
commit 21c4450fb2
2 changed files with 17 additions and 1 deletions
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@ -314,7 +314,7 @@ class Analyzer(
                s"grouping columns (${groupByExprs.mkString(",")})")
          }
        case e @ Grouping(col: Expression) =>
-          val idx = groupByExprs.indexOf(col)
+          val idx = groupByExprs.indexWhere(_.semanticEquals(col))
          if (idx >= 0) {
            Alias(Cast(BitwiseAnd(ShiftRight(gid, Literal(groupByExprs.length - 1 - idx)),
              Literal(1)), ByteType), toPrettySQL(e))()
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@ -190,6 +190,22 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
    )
  }

+  test("SPARK-21980: References in grouping functions should be indexed with semanticEquals") {
+    checkAnswer(
+      courseSales.cube("course", "year")
+        .agg(grouping("CouRse"), grouping("year")),
+      Row("Java", 2012, 0, 0) ::
+        Row("Java", 2013, 0, 0) ::
+        Row("Java", null, 0, 1) ::
+        Row("dotNET", 2012, 0, 0) ::
+        Row("dotNET", 2013, 0, 0) ::
+        Row("dotNET", null, 0, 1) ::
+        Row(null, 2012, 1, 0) ::
+        Row(null, 2013, 1, 0) ::
+        Row(null, null, 1, 1) :: Nil
+    )
+  }
+
  test("rollup overlapping columns") {
    checkAnswer(
      testData2.rollup($"a" + $"b" as "foo", $"b" as "bar").agg(sum($"a" - $"b") as "foo"),