[SPARK-36339][SQL] References to grouping that not part of aggregation should be replaced

### What changes were proposed in this pull request?

Currently, references to grouping sets are reported as errors after aggregated expressions, e.g.
```
SELECT count(name) c, name
FROM VALUES ('Alice'), ('Bob') people(name)
GROUP BY name GROUPING SETS(name);
```
Error in query: expression 'people.`name`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;;

### Why are the changes needed?

Fix the map anonymous function in the constructAggregateExprs function does not use underscores to avoid

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Unit tests.

Closes #33574 from gaoyajun02/SPARK-36339.

Lead-authored-by: gaoyajun02 <gaoyajun02@gmail.com>
Co-authored-by: gaoyajun02 <gaoyajun02@meituan.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 888f8f03c8)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
gaoyajun02 2021-08-06 16:34:37 +08:00 committed by Wenchen Fan
parent f3761bdb76
commit 33e4ce562a
2 changed files with 15 additions and 2 deletions

View file

@ -580,7 +580,7 @@ class Analyzer(override val catalogManager: CatalogManager)
aggregations: Seq[NamedExpression],
groupByAliases: Seq[Alias],
groupingAttrs: Seq[Expression],
gid: Attribute): Seq[NamedExpression] = aggregations.map {
gid: Attribute): Seq[NamedExpression] = aggregations.map { agg =>
// collect all the found AggregateExpression, so we can check an expression is part of
// any AggregateExpression or not.
val aggsBuffer = ArrayBuffer[Expression]()
@ -588,7 +588,7 @@ class Analyzer(override val catalogManager: CatalogManager)
def isPartOfAggregation(e: Expression): Boolean = {
aggsBuffer.exists(a => a.find(_ eq e).isDefined)
}
replaceGroupingFunc(_, groupByExprs, gid).transformDown {
replaceGroupingFunc(agg, groupByExprs, gid).transformDown {
// AggregateExpression should be computed on the unmodified value of its argument
// expressions, so we should not replace any references to grouping expression
// inside it.

View file

@ -3405,6 +3405,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
}
}
test("SPARK-36339: References to grouping attributes should be replaced") {
withTempView("t") {
Seq("a", "a", "b").toDF("x").createOrReplaceTempView("t")
checkAnswer(
sql(
"""
|select count(x) c, x from t
|group by x grouping sets(x)
""".stripMargin),
Seq(Row(2, "a"), Row(1, "b")))
}
}
test("SPARK-31166: UNION map<null, null> and other maps should not fail") {
checkAnswer(
sql("(SELECT map()) UNION ALL (SELECT map(1, 2))"),