[SPARK-30870][SQL] Column pruning shouldn't alias a nested column if it means the whole structure

### What changes were proposed in this pull request?
This PR fixes a bug in nested column aliasing by taking the data type of the referenced nested fields into account when calculating the number of extracted columns. After this PR this query runs without issues:
```
SELECT explodedvalue.*
FROM VALUES array(named_struct('nested', named_struct('a', 1, 'b', 2))) AS (value)
LATERAL VIEW explode(value) AS explodedvalue
```
This is a regression from Spark 2.4.

### Why are the changes needed?
To fix a bug.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
Added new UT.

Closes #27675 from peter-toth/SPARK-30870.

Authored-by: Peter Toth <peter.toth@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
Peter Toth 2020-02-24 13:46:21 -08:00 committed by Dongjoon Hyun
parent 293e5364e5
commit 1a4e2423b2
3 changed files with 15 additions and 7 deletions

View file

@ -129,7 +129,9 @@ object NestedColumnAliasing {
// If all nested fields of `attr` are used, we don't need to introduce new aliases.
// By default, ColumnPruning rule uses `attr` already.
if (nestedFieldToAlias.nonEmpty &&
nestedFieldToAlias.length < totalFieldNum(attr.dataType)) {
nestedFieldToAlias
.map { case (nestedField, _) => totalFieldNum(nestedField.dataType) }
.sum < totalFieldNum(attr.dataType)) {
Some(attr.exprId -> nestedFieldToAlias)
} else {
None

View file

@ -215,12 +215,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
val optimized = Optimize.execute(query)
val expected = nestedRelation
.select(GetStructField('a, 0, Some("b")))
.limit(5)
.analyze
comparePlans(optimized, expected)
comparePlans(optimized, query)
}
test("nested field pruning for getting struct field in array of struct") {

View file

@ -3393,6 +3393,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
)
}
}
test("SPARK-30870: Column pruning shouldn't alias a nested column if it means the whole " +
"structure") {
val df = sql(
"""
|SELECT explodedvalue.field
|FROM VALUES array(named_struct('field', named_struct('a', 1, 'b', 2))) AS (value)
|LATERAL VIEW explode(value) AS explodedvalue
""".stripMargin)
checkAnswer(df, Row(Row(1, 2)) :: Nil)
}
}
case class Foo(bar: Option[String])