[SPARK-36275][SQL] ResolveAggregateFunctions should works with nested fields

### What changes were proposed in this pull request?
This PR fixes an issue in `ResolveAggregateFunctions` where non-aggregated nested fields in ORDER BY and HAVING are not resolved correctly. This is because nested fields are resolved as aliases that fail to be semantically equal to any grouping/aggregate expressions.

### Why are the changes needed?
To fix an analyzer issue.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Unit tests.

Closes #33498 from allisonwang-db/spark-36275-resolve-agg-func.

Authored-by: allisonwang-db <allison.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 23a6ffa5dc)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
allisonwang-db 2021-07-28 13:35:17 +08:00 committed by Wenchen Fan
parent aea36aa977
commit 993ffafc3e
2 changed files with 30 additions and 2 deletions

View file

@ -2553,8 +2553,10 @@ class Analyzer(override val catalogManager: CatalogManager)
// a table `t` has two columns `c1` and `c2`, for query `SELECT ... FROM t
// GROUP BY c1 HAVING c2 = 0`, even though we can resolve column `c2` here, we
// should undo it later and fail with "Column c2 not found".
agg.child.resolve(u.nameParts, resolver).map(TempResolvedColumn(_, u.nameParts))
.getOrElse(u)
agg.child.resolve(u.nameParts, resolver).map({
case a: Alias => TempResolvedColumn(a.child, u.nameParts)
case o => TempResolvedColumn(o, u.nameParts)
}).getOrElse(u)
} catch {
case _: AnalysisException => u
}

View file

@ -1115,4 +1115,30 @@ class AnalysisSuite extends AnalysisTest with Matchers {
Seq("grouping_id() can only be used with GroupingSets/Cube/Rollup"),
false)
}
test("SPARK-36275: Resolve aggregate functions should work with nested fields") {
assertAnalysisSuccess(parsePlan(
"""
|SELECT c.x, SUM(c.y)
|FROM VALUES NAMED_STRUCT('x', 'A', 'y', 1), NAMED_STRUCT('x', 'A', 'y', 2) AS t(c)
|GROUP BY c.x
|HAVING c.x > 1
|""".stripMargin))
assertAnalysisSuccess(parsePlan(
"""
|SELECT c.x, SUM(c.y)
|FROM VALUES NAMED_STRUCT('x', 'A', 'y', 1), NAMED_STRUCT('x', 'A', 'y', 2) AS t(c)
|GROUP BY c.x
|ORDER BY c.x
|""".stripMargin))
assertAnalysisError(parsePlan(
"""
|SELECT c.x
|FROM VALUES NAMED_STRUCT('x', 'A', 'y', 1), NAMED_STRUCT('x', 'A', 'y', 2) AS t(c)
|GROUP BY c.x
|ORDER BY c.x + c.y
|""".stripMargin), "cannot resolve 'c.y' given input columns: [x]" :: Nil)
}
}