[SPARK-36339][SQL] References to grouping that not part of aggregation should be replaced
### What changes were proposed in this pull request?
Currently, references to grouping sets are reported as errors after aggregated expressions, e.g.
```
SELECT count(name) c, name
FROM VALUES ('Alice'), ('Bob') people(name)
GROUP BY name GROUPING SETS(name);
```
Error in query: expression 'people.`name`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;;
### Why are the changes needed?
Fix the map anonymous function in the constructAggregateExprs function does not use underscores to avoid
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit tests.
Closes #33574 from gaoyajun02/SPARK-36339.
Lead-authored-by: gaoyajun02 <gaoyajun02@gmail.com>
Co-authored-by: gaoyajun02 <gaoyajun02@meituan.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 888f8f03c8
)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
f3761bdb76
commit
33e4ce562a
|
@ -580,7 +580,7 @@ class Analyzer(override val catalogManager: CatalogManager)
|
||||||
aggregations: Seq[NamedExpression],
|
aggregations: Seq[NamedExpression],
|
||||||
groupByAliases: Seq[Alias],
|
groupByAliases: Seq[Alias],
|
||||||
groupingAttrs: Seq[Expression],
|
groupingAttrs: Seq[Expression],
|
||||||
gid: Attribute): Seq[NamedExpression] = aggregations.map {
|
gid: Attribute): Seq[NamedExpression] = aggregations.map { agg =>
|
||||||
// collect all the found AggregateExpression, so we can check an expression is part of
|
// collect all the found AggregateExpression, so we can check an expression is part of
|
||||||
// any AggregateExpression or not.
|
// any AggregateExpression or not.
|
||||||
val aggsBuffer = ArrayBuffer[Expression]()
|
val aggsBuffer = ArrayBuffer[Expression]()
|
||||||
|
@ -588,7 +588,7 @@ class Analyzer(override val catalogManager: CatalogManager)
|
||||||
def isPartOfAggregation(e: Expression): Boolean = {
|
def isPartOfAggregation(e: Expression): Boolean = {
|
||||||
aggsBuffer.exists(a => a.find(_ eq e).isDefined)
|
aggsBuffer.exists(a => a.find(_ eq e).isDefined)
|
||||||
}
|
}
|
||||||
replaceGroupingFunc(_, groupByExprs, gid).transformDown {
|
replaceGroupingFunc(agg, groupByExprs, gid).transformDown {
|
||||||
// AggregateExpression should be computed on the unmodified value of its argument
|
// AggregateExpression should be computed on the unmodified value of its argument
|
||||||
// expressions, so we should not replace any references to grouping expression
|
// expressions, so we should not replace any references to grouping expression
|
||||||
// inside it.
|
// inside it.
|
||||||
|
|
|
@ -3405,6 +3405,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("SPARK-36339: References to grouping attributes should be replaced") {
|
||||||
|
withTempView("t") {
|
||||||
|
Seq("a", "a", "b").toDF("x").createOrReplaceTempView("t")
|
||||||
|
checkAnswer(
|
||||||
|
sql(
|
||||||
|
"""
|
||||||
|
|select count(x) c, x from t
|
||||||
|
|group by x grouping sets(x)
|
||||||
|
""".stripMargin),
|
||||||
|
Seq(Row(2, "a"), Row(1, "b")))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
test("SPARK-31166: UNION map<null, null> and other maps should not fail") {
|
test("SPARK-31166: UNION map<null, null> and other maps should not fail") {
|
||||||
checkAnswer(
|
checkAnswer(
|
||||||
sql("(SELECT map()) UNION ALL (SELECT map(1, 2))"),
|
sql("(SELECT map()) UNION ALL (SELECT map(1, 2))"),
|
||||||
|
|
Loading…
Reference in a new issue