[SPARK-21980][SQL] References in grouping functions should be indexed with semanticEquals

## What changes were proposed in this pull request?

https://issues.apache.org/jira/browse/SPARK-21980

This PR fixes the issue in ResolveGroupingAnalytics rule, which indexes the column references in grouping functions without considering case sensitive configurations.

The problem can be reproduced by:

`val df = spark.createDataFrame(Seq((1, 1), (2, 1), (2, 2))).toDF("a", "b")
 df.cube("a").agg(grouping("A")).show()`

## How was this patch tested?
unit tests

Author: donnyzone <wellfengzhu@gmail.com>

Closes #19202 from DonnyZone/ResolveGroupingAnalytics.
This commit is contained in:
donnyzone 2017-09-13 10:06:53 -07:00 committed by gatorsmile
parent b6ef1f57bc
commit 21c4450fb2
2 changed files with 17 additions and 1 deletions

View file

@ -314,7 +314,7 @@ class Analyzer(
s"grouping columns (${groupByExprs.mkString(",")})")
}
case e @ Grouping(col: Expression) =>
val idx = groupByExprs.indexOf(col)
val idx = groupByExprs.indexWhere(_.semanticEquals(col))
if (idx >= 0) {
Alias(Cast(BitwiseAnd(ShiftRight(gid, Literal(groupByExprs.length - 1 - idx)),
Literal(1)), ByteType), toPrettySQL(e))()

View file

@ -190,6 +190,22 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
)
}
test("SPARK-21980: References in grouping functions should be indexed with semanticEquals") {
checkAnswer(
courseSales.cube("course", "year")
.agg(grouping("CouRse"), grouping("year")),
Row("Java", 2012, 0, 0) ::
Row("Java", 2013, 0, 0) ::
Row("Java", null, 0, 1) ::
Row("dotNET", 2012, 0, 0) ::
Row("dotNET", 2013, 0, 0) ::
Row("dotNET", null, 0, 1) ::
Row(null, 2012, 1, 0) ::
Row(null, 2013, 1, 0) ::
Row(null, null, 1, 1) :: Nil
)
}
test("rollup overlapping columns") {
checkAnswer(
testData2.rollup($"a" + $"b" as "foo", $"b" as "bar").agg(sum($"a" - $"b") as "foo"),