diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index b6c89d484c..c87817e0fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -516,10 +516,34 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] { * Remove no-op operators from the query plan that do not make any modifications. */ object RemoveNoopOperators extends Rule[LogicalPlan] { + def restoreOriginalOutputNames( + projectList: Seq[NamedExpression], + originalNames: Seq[String]): Seq[NamedExpression] = { + projectList.zip(originalNames).map { + case (attr: Attribute, name) => attr.withName(name) + case (alias: Alias, name) => alias.withName(name) + case (other, _) => other + } + } + def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning( _.containsAnyPattern(PROJECT, WINDOW), ruleId) { // Eliminate no-op Projects - case p @ Project(_, child) if child.sameOutput(p) => child + case p @ Project(projectList, child) if child.sameOutput(p) => + val newChild = child match { + case p: Project => + p.copy(projectList = restoreOriginalOutputNames(p.projectList, projectList.map(_.name))) + case agg: Aggregate => + agg.copy(aggregateExpressions = + restoreOriginalOutputNames(agg.aggregateExpressions, projectList.map(_.name))) + case _ => + child + } + if (newChild.output.zip(projectList).forall { case (a1, a2) => a1.name == a2.name }) { + newChild + } else { + p + } // Eliminate no-op Window case w: Window if w.windowExpressions.isEmpty => w.child diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala index cedd21d2bf..943d207ddc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala @@ -54,4 +54,16 @@ class RemoveNoopOperatorsSuite extends PlanTest { comparePlans(optimized, testRelation) } + + test("SPARK-36353: RemoveNoopOperators should keep output schema") { + val query = testRelation + .select(('a + 'b).as("c")) + .analyze + val originalQuery = Project(Seq(query.output.head.withName("C")), query) + val optimized = Optimize.execute(originalQuery.analyze) + val result = testRelation + .select(('a + 'b).as("C")) + .analyze + comparePlans(optimized, result) + } }