[SPARK-23802][SQL] PropagateEmptyRelation can leave query plan in unresolved state
## What changes were proposed in this pull request? Add cast to nulls introduced by PropagateEmptyRelation so in cases they're part of coalesce they will not break its type checking rules ## How was this patch tested? Added unit test Author: Robert Kruszewski <robertk@palantir.com> Closes #20914 from robert3005/rk/propagate-empty-fix.
This commit is contained in:
parent
359375eff7
commit
5cfd5fabcd
|
@ -17,10 +17,12 @@
|
|||
|
||||
package org.apache.spark.sql.catalyst.optimizer
|
||||
|
||||
import org.apache.spark.sql.catalyst.analysis.CastSupport
|
||||
import org.apache.spark.sql.catalyst.expressions._
|
||||
import org.apache.spark.sql.catalyst.plans._
|
||||
import org.apache.spark.sql.catalyst.plans.logical._
|
||||
import org.apache.spark.sql.catalyst.rules._
|
||||
import org.apache.spark.sql.internal.SQLConf
|
||||
|
||||
/**
|
||||
* Collapse plans consisting empty local relations generated by [[PruneFilters]].
|
||||
|
@ -32,7 +34,7 @@ import org.apache.spark.sql.catalyst.rules._
|
|||
* - Aggregate with all empty children and at least one grouping expression.
|
||||
* - Generate(Explode) with all empty children. Others like Hive UDTF may return results.
|
||||
*/
|
||||
object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper {
|
||||
object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper with CastSupport {
|
||||
private def isEmptyLocalRelation(plan: LogicalPlan): Boolean = plan match {
|
||||
case p: LocalRelation => p.data.isEmpty
|
||||
case _ => false
|
||||
|
@ -43,7 +45,9 @@ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper {
|
|||
|
||||
// Construct a project list from plan's output, while the value is always NULL.
|
||||
private def nullValueProjectList(plan: LogicalPlan): Seq[NamedExpression] =
|
||||
plan.output.map{ a => Alias(Literal(null), a.name)(a.exprId) }
|
||||
plan.output.map{ a => Alias(cast(Literal(null), a.dataType), a.name)(a.exprId) }
|
||||
|
||||
override def conf: SQLConf = SQLConf.get
|
||||
|
||||
def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
|
||||
case p: Union if p.children.forall(isEmptyLocalRelation) =>
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.Literal
|
|||
import org.apache.spark.sql.catalyst.plans._
|
||||
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project}
|
||||
import org.apache.spark.sql.catalyst.rules.RuleExecutor
|
||||
import org.apache.spark.sql.types.StructType
|
||||
import org.apache.spark.sql.types.{IntegerType, StructType}
|
||||
|
||||
class PropagateEmptyRelationSuite extends PlanTest {
|
||||
object Optimize extends RuleExecutor[LogicalPlan] {
|
||||
|
@ -37,7 +37,8 @@ class PropagateEmptyRelationSuite extends PlanTest {
|
|||
ReplaceIntersectWithSemiJoin,
|
||||
PushDownPredicate,
|
||||
PruneFilters,
|
||||
PropagateEmptyRelation) :: Nil
|
||||
PropagateEmptyRelation,
|
||||
CollapseProject) :: Nil
|
||||
}
|
||||
|
||||
object OptimizeWithoutPropagateEmptyRelation extends RuleExecutor[LogicalPlan] {
|
||||
|
@ -48,7 +49,8 @@ class PropagateEmptyRelationSuite extends PlanTest {
|
|||
ReplaceExceptWithAntiJoin,
|
||||
ReplaceIntersectWithSemiJoin,
|
||||
PushDownPredicate,
|
||||
PruneFilters) :: Nil
|
||||
PruneFilters,
|
||||
CollapseProject) :: Nil
|
||||
}
|
||||
|
||||
val testRelation1 = LocalRelation.fromExternalRows(Seq('a.int), data = Seq(Row(1)))
|
||||
|
@ -79,9 +81,11 @@ class PropagateEmptyRelationSuite extends PlanTest {
|
|||
|
||||
(true, false, Inner, Some(LocalRelation('a.int, 'b.int))),
|
||||
(true, false, Cross, Some(LocalRelation('a.int, 'b.int))),
|
||||
(true, false, LeftOuter, Some(Project(Seq('a, Literal(null).as('b)), testRelation1).analyze)),
|
||||
(true, false, LeftOuter,
|
||||
Some(Project(Seq('a, Literal(null).cast(IntegerType).as('b)), testRelation1).analyze)),
|
||||
(true, false, RightOuter, Some(LocalRelation('a.int, 'b.int))),
|
||||
(true, false, FullOuter, Some(Project(Seq('a, Literal(null).as('b)), testRelation1).analyze)),
|
||||
(true, false, FullOuter,
|
||||
Some(Project(Seq('a, Literal(null).cast(IntegerType).as('b)), testRelation1).analyze)),
|
||||
(true, false, LeftAnti, Some(testRelation1)),
|
||||
(true, false, LeftSemi, Some(LocalRelation('a.int))),
|
||||
|
||||
|
@ -89,8 +93,9 @@ class PropagateEmptyRelationSuite extends PlanTest {
|
|||
(false, true, Cross, Some(LocalRelation('a.int, 'b.int))),
|
||||
(false, true, LeftOuter, Some(LocalRelation('a.int, 'b.int))),
|
||||
(false, true, RightOuter,
|
||||
Some(Project(Seq(Literal(null).as('a), 'b), testRelation2).analyze)),
|
||||
(false, true, FullOuter, Some(Project(Seq(Literal(null).as('a), 'b), testRelation2).analyze)),
|
||||
Some(Project(Seq(Literal(null).cast(IntegerType).as('a), 'b), testRelation2).analyze)),
|
||||
(false, true, FullOuter,
|
||||
Some(Project(Seq(Literal(null).cast(IntegerType).as('a), 'b), testRelation2).analyze)),
|
||||
(false, true, LeftAnti, Some(LocalRelation('a.int))),
|
||||
(false, true, LeftSemi, Some(LocalRelation('a.int))),
|
||||
|
||||
|
@ -209,4 +214,11 @@ class PropagateEmptyRelationSuite extends PlanTest {
|
|||
|
||||
comparePlans(optimized, correctAnswer)
|
||||
}
|
||||
|
||||
test("propagate empty relation keeps the plan resolved") {
|
||||
val query = testRelation1.join(
|
||||
LocalRelation('a.int, 'b.int), UsingJoin(FullOuter, "a" :: Nil), None)
|
||||
val optimized = Optimize.execute(query.analyze)
|
||||
assert(optimized.resolved)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue