[SPARK-30870][SQL] Column pruning shouldn't alias a nested column if it means the whole structure
### What changes were proposed in this pull request? This PR fixes a bug in nested column aliasing by taking the data type of the referenced nested fields into account when calculating the number of extracted columns. After this PR this query runs without issues: ``` SELECT explodedvalue.* FROM VALUES array(named_struct('nested', named_struct('a', 1, 'b', 2))) AS (value) LATERAL VIEW explode(value) AS explodedvalue ``` This is a regression from Spark 2.4. ### Why are the changes needed? To fix a bug. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Added new UT. Closes #27675 from peter-toth/SPARK-30870. Authored-by: Peter Toth <peter.toth@gmail.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
293e5364e5
commit
1a4e2423b2
|
@ -129,7 +129,9 @@ object NestedColumnAliasing {
|
|||
// If all nested fields of `attr` are used, we don't need to introduce new aliases.
|
||||
// By default, ColumnPruning rule uses `attr` already.
|
||||
if (nestedFieldToAlias.nonEmpty &&
|
||||
nestedFieldToAlias.length < totalFieldNum(attr.dataType)) {
|
||||
nestedFieldToAlias
|
||||
.map { case (nestedField, _) => totalFieldNum(nestedField.dataType) }
|
||||
.sum < totalFieldNum(attr.dataType)) {
|
||||
Some(attr.exprId -> nestedFieldToAlias)
|
||||
} else {
|
||||
None
|
||||
|
|
|
@ -215,12 +215,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
|
|||
|
||||
val optimized = Optimize.execute(query)
|
||||
|
||||
val expected = nestedRelation
|
||||
.select(GetStructField('a, 0, Some("b")))
|
||||
.limit(5)
|
||||
.analyze
|
||||
|
||||
comparePlans(optimized, expected)
|
||||
comparePlans(optimized, query)
|
||||
}
|
||||
|
||||
test("nested field pruning for getting struct field in array of struct") {
|
||||
|
|
|
@ -3393,6 +3393,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
|
|||
)
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-30870: Column pruning shouldn't alias a nested column if it means the whole " +
|
||||
"structure") {
|
||||
val df = sql(
|
||||
"""
|
||||
|SELECT explodedvalue.field
|
||||
|FROM VALUES array(named_struct('field', named_struct('a', 1, 'b', 2))) AS (value)
|
||||
|LATERAL VIEW explode(value) AS explodedvalue
|
||||
""".stripMargin)
|
||||
checkAnswer(df, Row(Row(1, 2)) :: Nil)
|
||||
}
|
||||
}
|
||||
|
||||
case class Foo(bar: Option[String])
|
||||
|
|
Loading…
Reference in a new issue