[SPARK-36275][SQL] ResolveAggregateFunctions should works with nested fields
### What changes were proposed in this pull request?
This PR fixes an issue in `ResolveAggregateFunctions` where non-aggregated nested fields in ORDER BY and HAVING are not resolved correctly. This is because nested fields are resolved as aliases that fail to be semantically equal to any grouping/aggregate expressions.
### Why are the changes needed?
To fix an analyzer issue.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit tests.
Closes #33498 from allisonwang-db/spark-36275-resolve-agg-func.
Authored-by: allisonwang-db <allison.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 23a6ffa5dc
)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
aea36aa977
commit
993ffafc3e
|
@ -2553,8 +2553,10 @@ class Analyzer(override val catalogManager: CatalogManager)
|
|||
// a table `t` has two columns `c1` and `c2`, for query `SELECT ... FROM t
|
||||
// GROUP BY c1 HAVING c2 = 0`, even though we can resolve column `c2` here, we
|
||||
// should undo it later and fail with "Column c2 not found".
|
||||
agg.child.resolve(u.nameParts, resolver).map(TempResolvedColumn(_, u.nameParts))
|
||||
.getOrElse(u)
|
||||
agg.child.resolve(u.nameParts, resolver).map({
|
||||
case a: Alias => TempResolvedColumn(a.child, u.nameParts)
|
||||
case o => TempResolvedColumn(o, u.nameParts)
|
||||
}).getOrElse(u)
|
||||
} catch {
|
||||
case _: AnalysisException => u
|
||||
}
|
||||
|
|
|
@ -1115,4 +1115,30 @@ class AnalysisSuite extends AnalysisTest with Matchers {
|
|||
Seq("grouping_id() can only be used with GroupingSets/Cube/Rollup"),
|
||||
false)
|
||||
}
|
||||
|
||||
test("SPARK-36275: Resolve aggregate functions should work with nested fields") {
|
||||
assertAnalysisSuccess(parsePlan(
|
||||
"""
|
||||
|SELECT c.x, SUM(c.y)
|
||||
|FROM VALUES NAMED_STRUCT('x', 'A', 'y', 1), NAMED_STRUCT('x', 'A', 'y', 2) AS t(c)
|
||||
|GROUP BY c.x
|
||||
|HAVING c.x > 1
|
||||
|""".stripMargin))
|
||||
|
||||
assertAnalysisSuccess(parsePlan(
|
||||
"""
|
||||
|SELECT c.x, SUM(c.y)
|
||||
|FROM VALUES NAMED_STRUCT('x', 'A', 'y', 1), NAMED_STRUCT('x', 'A', 'y', 2) AS t(c)
|
||||
|GROUP BY c.x
|
||||
|ORDER BY c.x
|
||||
|""".stripMargin))
|
||||
|
||||
assertAnalysisError(parsePlan(
|
||||
"""
|
||||
|SELECT c.x
|
||||
|FROM VALUES NAMED_STRUCT('x', 'A', 'y', 1), NAMED_STRUCT('x', 'A', 'y', 2) AS t(c)
|
||||
|GROUP BY c.x
|
||||
|ORDER BY c.x + c.y
|
||||
|""".stripMargin), "cannot resolve 'c.y' given input columns: [x]" :: Nil)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue