[SPARK-18604][SQL] Make sure CollapseWindow returns the attributes in the same order.
## What changes were proposed in this pull request? The `CollapseWindow` optimizer rule changes the order of output attributes. This modifies the output of the plan, which the optimizer cannot do. This also breaks things like `collect()` for which we use a `RowEncoder` that assumes that the output attributes of the executed plan are equal to those outputted by the logical plan. ## How was this patch tested? I have updated an incorrect test in `CollapseWindowSuite`. Author: Herman van Hovell <hvanhovell@databricks.com> Closes #16027 from hvanhovell/SPARK-18604.
This commit is contained in:
parent
87141622ee
commit
454b804991
|
@ -545,7 +545,7 @@ object CollapseRepartition extends Rule[LogicalPlan] {
|
|||
object CollapseWindow extends Rule[LogicalPlan] {
|
||||
def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
|
||||
case w @ Window(we1, ps1, os1, Window(we2, ps2, os2, grandChild)) if ps1 == ps2 && os1 == os2 =>
|
||||
w.copy(windowExpressions = we1 ++ we2, child = grandChild)
|
||||
w.copy(windowExpressions = we2 ++ we1, child = grandChild)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -46,12 +46,15 @@ class CollapseWindowSuite extends PlanTest {
|
|||
.window(Seq(sum(b).as('sum_b)), partitionSpec1, orderSpec1)
|
||||
.window(Seq(avg(b).as('avg_b)), partitionSpec1, orderSpec1)
|
||||
|
||||
val optimized = Optimize.execute(query.analyze)
|
||||
val analyzed = query.analyze
|
||||
val optimized = Optimize.execute(analyzed)
|
||||
assert(analyzed.output === optimized.output)
|
||||
|
||||
val correctAnswer = testRelation.window(Seq(
|
||||
avg(b).as('avg_b),
|
||||
sum(b).as('sum_b),
|
||||
max(a).as('max_a),
|
||||
min(a).as('min_a)), partitionSpec1, orderSpec1)
|
||||
min(a).as('min_a),
|
||||
max(a).as('max_a),
|
||||
sum(b).as('sum_b),
|
||||
avg(b).as('avg_b)), partitionSpec1, orderSpec1)
|
||||
|
||||
comparePlans(optimized, correctAnswer)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue