[SPARK-21980][SQL] References in grouping functions should be indexed with semanticEquals
## What changes were proposed in this pull request? https://issues.apache.org/jira/browse/SPARK-21980 This PR fixes the issue in ResolveGroupingAnalytics rule, which indexes the column references in grouping functions without considering case sensitive configurations. The problem can be reproduced by: `val df = spark.createDataFrame(Seq((1, 1), (2, 1), (2, 2))).toDF("a", "b") df.cube("a").agg(grouping("A")).show()` ## How was this patch tested? unit tests Author: donnyzone <wellfengzhu@gmail.com> Closes #19202 from DonnyZone/ResolveGroupingAnalytics.
This commit is contained in:
parent
b6ef1f57bc
commit
21c4450fb2
|
@ -314,7 +314,7 @@ class Analyzer(
|
|||
s"grouping columns (${groupByExprs.mkString(",")})")
|
||||
}
|
||||
case e @ Grouping(col: Expression) =>
|
||||
val idx = groupByExprs.indexOf(col)
|
||||
val idx = groupByExprs.indexWhere(_.semanticEquals(col))
|
||||
if (idx >= 0) {
|
||||
Alias(Cast(BitwiseAnd(ShiftRight(gid, Literal(groupByExprs.length - 1 - idx)),
|
||||
Literal(1)), ByteType), toPrettySQL(e))()
|
||||
|
|
|
@ -190,6 +190,22 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
|
|||
)
|
||||
}
|
||||
|
||||
test("SPARK-21980: References in grouping functions should be indexed with semanticEquals") {
|
||||
checkAnswer(
|
||||
courseSales.cube("course", "year")
|
||||
.agg(grouping("CouRse"), grouping("year")),
|
||||
Row("Java", 2012, 0, 0) ::
|
||||
Row("Java", 2013, 0, 0) ::
|
||||
Row("Java", null, 0, 1) ::
|
||||
Row("dotNET", 2012, 0, 0) ::
|
||||
Row("dotNET", 2013, 0, 0) ::
|
||||
Row("dotNET", null, 0, 1) ::
|
||||
Row(null, 2012, 1, 0) ::
|
||||
Row(null, 2013, 1, 0) ::
|
||||
Row(null, null, 1, 1) :: Nil
|
||||
)
|
||||
}
|
||||
|
||||
test("rollup overlapping columns") {
|
||||
checkAnswer(
|
||||
testData2.rollup($"a" + $"b" as "foo", $"b" as "bar").agg(sum($"a" - $"b") as "foo"),
|
||||
|
|
Loading…
Reference in a new issue