[SPARK-36020][SQL] Check logical link in remove redundant projects
### What changes were proposed in this pull request? The `RemoveRedundantProjects` feature can conflict with the AQE broadcast threshold ([PR](https://github.com/apache/spark/pull/32391)) sometimes. After removing the project, the physical plan to logical plan link can be changed and we may have a `Project` above `LogicalQueryStage`. This breaks AQE broadcast threshold, because the stats of `Project` does not have the `isRuntime = true` flag, and thus still use the normal broadcast threshold. This PR updates `RemoveRedundantProjects` to not remove `ProjectExec` that has a different logical plan link than its child. ### Why are the changes needed? Make AQE broadcast threshold work in more cases. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new tests Closes #33222 from cloud-fan/aqe2. Lead-authored-by: Wenchen Fan <wenchen@databricks.com> Co-authored-by: Wenchen Fan <cloud0fan@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
2fff060b6d
commit
6b3ab8262f
|
@ -48,10 +48,8 @@ object RemoveRedundantProjects extends Rule[SparkPlan] {
|
||||||
private def removeProject(plan: SparkPlan, requireOrdering: Boolean): SparkPlan = {
|
private def removeProject(plan: SparkPlan, requireOrdering: Boolean): SparkPlan = {
|
||||||
plan match {
|
plan match {
|
||||||
case p @ ProjectExec(_, child) =>
|
case p @ ProjectExec(_, child) =>
|
||||||
if (isRedundant(p, child, requireOrdering)) {
|
if (isRedundant(p, child, requireOrdering) && canRemove(p, child)) {
|
||||||
val newPlan = removeProject(child, requireOrdering)
|
removeProject(child, requireOrdering)
|
||||||
newPlan.setLogicalLink(child.logicalLink.get)
|
|
||||||
newPlan
|
|
||||||
} else {
|
} else {
|
||||||
p.mapChildren(removeProject(_, false))
|
p.mapChildren(removeProject(_, false))
|
||||||
}
|
}
|
||||||
|
@ -110,4 +108,11 @@ object RemoveRedundantProjects extends Rule[SparkPlan] {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SPARK-36020: Currently a project can only be removed if (1) its logical link is empty or (2)
|
||||||
|
// its logical link is the same as the child's logical link. This is to ensure the physical
|
||||||
|
// plan node can correctly map to its logical plan node in AQE.
|
||||||
|
private def canRemove(project: ProjectExec, child: SparkPlan): Boolean = {
|
||||||
|
project.logicalLink.isEmpty || project.logicalLink.exists(child.logicalLink.contains)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
== Physical Plan ==
|
== Physical Plan ==
|
||||||
TakeOrderedAndProject (24)
|
TakeOrderedAndProject (25)
|
||||||
+- * HashAggregate (23)
|
+- * HashAggregate (24)
|
||||||
+- Exchange (22)
|
+- Exchange (23)
|
||||||
+- * HashAggregate (21)
|
+- * HashAggregate (22)
|
||||||
+- * Expand (20)
|
+- * Expand (21)
|
||||||
|
+- * Project (20)
|
||||||
+- * BroadcastNestedLoopJoin Inner BuildRight (19)
|
+- * BroadcastNestedLoopJoin Inner BuildRight (19)
|
||||||
:- * Project (15)
|
:- * Project (15)
|
||||||
: +- * SortMergeJoin Inner (14)
|
: +- * SortMergeJoin Inner (14)
|
||||||
|
@ -40,7 +41,7 @@ Input [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3]
|
||||||
Input [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3]
|
Input [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3]
|
||||||
Condition : isnotnull(inv_item_sk#1)
|
Condition : isnotnull(inv_item_sk#1)
|
||||||
|
|
||||||
(4) ReusedExchange [Reuses operator id: 29]
|
(4) ReusedExchange [Reuses operator id: 30]
|
||||||
Output [1]: [d_date_sk#5]
|
Output [1]: [d_date_sk#5]
|
||||||
|
|
||||||
(5) BroadcastHashJoin [codegen id : 2]
|
(5) BroadcastHashJoin [codegen id : 2]
|
||||||
|
@ -107,61 +108,65 @@ Arguments: IdentityBroadcastMode, [id=#13]
|
||||||
(19) BroadcastNestedLoopJoin [codegen id : 7]
|
(19) BroadcastNestedLoopJoin [codegen id : 7]
|
||||||
Join condition: None
|
Join condition: None
|
||||||
|
|
||||||
(20) Expand [codegen id : 7]
|
(20) Project [codegen id : 7]
|
||||||
|
Output [5]: [inv_quantity_on_hand#2, i_product_name#11, i_brand#8, i_class#9, i_category#10]
|
||||||
Input [5]: [inv_quantity_on_hand#2, i_brand#8, i_class#9, i_category#10, i_product_name#11]
|
Input [5]: [inv_quantity_on_hand#2, i_brand#8, i_class#9, i_category#10, i_product_name#11]
|
||||||
|
|
||||||
|
(21) Expand [codegen id : 7]
|
||||||
|
Input [5]: [inv_quantity_on_hand#2, i_product_name#11, i_brand#8, i_class#9, i_category#10]
|
||||||
Arguments: [[inv_quantity_on_hand#2, i_product_name#11, i_brand#8, i_class#9, i_category#10, 0], [inv_quantity_on_hand#2, i_product_name#11, i_brand#8, i_class#9, null, 1], [inv_quantity_on_hand#2, i_product_name#11, i_brand#8, null, null, 3], [inv_quantity_on_hand#2, i_product_name#11, null, null, null, 7], [inv_quantity_on_hand#2, null, null, null, null, 15]], [inv_quantity_on_hand#2, i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18]
|
Arguments: [[inv_quantity_on_hand#2, i_product_name#11, i_brand#8, i_class#9, i_category#10, 0], [inv_quantity_on_hand#2, i_product_name#11, i_brand#8, i_class#9, null, 1], [inv_quantity_on_hand#2, i_product_name#11, i_brand#8, null, null, 3], [inv_quantity_on_hand#2, i_product_name#11, null, null, null, 7], [inv_quantity_on_hand#2, null, null, null, null, 15]], [inv_quantity_on_hand#2, i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18]
|
||||||
|
|
||||||
(21) HashAggregate [codegen id : 7]
|
(22) HashAggregate [codegen id : 7]
|
||||||
Input [6]: [inv_quantity_on_hand#2, i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18]
|
Input [6]: [inv_quantity_on_hand#2, i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18]
|
||||||
Keys [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18]
|
Keys [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18]
|
||||||
Functions [1]: [partial_avg(inv_quantity_on_hand#2)]
|
Functions [1]: [partial_avg(inv_quantity_on_hand#2)]
|
||||||
Aggregate Attributes [2]: [sum#19, count#20]
|
Aggregate Attributes [2]: [sum#19, count#20]
|
||||||
Results [7]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, sum#21, count#22]
|
Results [7]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, sum#21, count#22]
|
||||||
|
|
||||||
(22) Exchange
|
(23) Exchange
|
||||||
Input [7]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, sum#21, count#22]
|
Input [7]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, sum#21, count#22]
|
||||||
Arguments: hashpartitioning(i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, 5), ENSURE_REQUIREMENTS, [id=#23]
|
Arguments: hashpartitioning(i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, 5), ENSURE_REQUIREMENTS, [id=#23]
|
||||||
|
|
||||||
(23) HashAggregate [codegen id : 8]
|
(24) HashAggregate [codegen id : 8]
|
||||||
Input [7]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, sum#21, count#22]
|
Input [7]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, sum#21, count#22]
|
||||||
Keys [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18]
|
Keys [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18]
|
||||||
Functions [1]: [avg(inv_quantity_on_hand#2)]
|
Functions [1]: [avg(inv_quantity_on_hand#2)]
|
||||||
Aggregate Attributes [1]: [avg(inv_quantity_on_hand#2)#24]
|
Aggregate Attributes [1]: [avg(inv_quantity_on_hand#2)#24]
|
||||||
Results [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, avg(inv_quantity_on_hand#2)#24 AS qoh#25]
|
Results [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, avg(inv_quantity_on_hand#2)#24 AS qoh#25]
|
||||||
|
|
||||||
(24) TakeOrderedAndProject
|
(25) TakeOrderedAndProject
|
||||||
Input [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, qoh#25]
|
Input [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, qoh#25]
|
||||||
Arguments: 100, [qoh#25 ASC NULLS FIRST, i_product_name#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, i_class#16 ASC NULLS FIRST, i_category#17 ASC NULLS FIRST], [i_product_name#14, i_brand#15, i_class#16, i_category#17, qoh#25]
|
Arguments: 100, [qoh#25 ASC NULLS FIRST, i_product_name#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, i_class#16 ASC NULLS FIRST, i_category#17 ASC NULLS FIRST], [i_product_name#14, i_brand#15, i_class#16, i_category#17, qoh#25]
|
||||||
|
|
||||||
===== Subqueries =====
|
===== Subqueries =====
|
||||||
|
|
||||||
Subquery:1 Hosting operator id = 1 Hosting Expression = inv_date_sk#3 IN dynamicpruning#4
|
Subquery:1 Hosting operator id = 1 Hosting Expression = inv_date_sk#3 IN dynamicpruning#4
|
||||||
BroadcastExchange (29)
|
BroadcastExchange (30)
|
||||||
+- * Project (28)
|
+- * Project (29)
|
||||||
+- * Filter (27)
|
+- * Filter (28)
|
||||||
+- * ColumnarToRow (26)
|
+- * ColumnarToRow (27)
|
||||||
+- Scan parquet default.date_dim (25)
|
+- Scan parquet default.date_dim (26)
|
||||||
|
|
||||||
|
|
||||||
(25) Scan parquet default.date_dim
|
(26) Scan parquet default.date_dim
|
||||||
Output [2]: [d_date_sk#5, d_month_seq#26]
|
Output [2]: [d_date_sk#5, d_month_seq#26]
|
||||||
Batched: true
|
Batched: true
|
||||||
Location [not included in comparison]/{warehouse_dir}/date_dim]
|
Location [not included in comparison]/{warehouse_dir}/date_dim]
|
||||||
PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)]
|
PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)]
|
||||||
ReadSchema: struct<d_date_sk:int,d_month_seq:int>
|
ReadSchema: struct<d_date_sk:int,d_month_seq:int>
|
||||||
|
|
||||||
(26) ColumnarToRow [codegen id : 1]
|
(27) ColumnarToRow [codegen id : 1]
|
||||||
Input [2]: [d_date_sk#5, d_month_seq#26]
|
Input [2]: [d_date_sk#5, d_month_seq#26]
|
||||||
|
|
||||||
(27) Filter [codegen id : 1]
|
(28) Filter [codegen id : 1]
|
||||||
Input [2]: [d_date_sk#5, d_month_seq#26]
|
Input [2]: [d_date_sk#5, d_month_seq#26]
|
||||||
Condition : (((isnotnull(d_month_seq#26) AND (d_month_seq#26 >= 1200)) AND (d_month_seq#26 <= 1211)) AND isnotnull(d_date_sk#5))
|
Condition : (((isnotnull(d_month_seq#26) AND (d_month_seq#26 >= 1200)) AND (d_month_seq#26 <= 1211)) AND isnotnull(d_date_sk#5))
|
||||||
|
|
||||||
(28) Project [codegen id : 1]
|
(29) Project [codegen id : 1]
|
||||||
Output [1]: [d_date_sk#5]
|
Output [1]: [d_date_sk#5]
|
||||||
Input [2]: [d_date_sk#5, d_month_seq#26]
|
Input [2]: [d_date_sk#5, d_month_seq#26]
|
||||||
|
|
||||||
(29) BroadcastExchange
|
(30) BroadcastExchange
|
||||||
Input [1]: [d_date_sk#5]
|
Input [1]: [d_date_sk#5]
|
||||||
Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27]
|
Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27]
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
|
||||||
WholeStageCodegen (7)
|
WholeStageCodegen (7)
|
||||||
HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,inv_quantity_on_hand] [sum,count,sum,count]
|
HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,inv_quantity_on_hand] [sum,count,sum,count]
|
||||||
Expand [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category]
|
Expand [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category]
|
||||||
|
Project [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category]
|
||||||
BroadcastNestedLoopJoin
|
BroadcastNestedLoopJoin
|
||||||
Project [inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name]
|
Project [inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name]
|
||||||
SortMergeJoin [inv_item_sk,i_item_sk]
|
SortMergeJoin [inv_item_sk,i_item_sk]
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
== Physical Plan ==
|
== Physical Plan ==
|
||||||
TakeOrderedAndProject (21)
|
TakeOrderedAndProject (22)
|
||||||
+- * HashAggregate (20)
|
+- * HashAggregate (21)
|
||||||
+- Exchange (19)
|
+- Exchange (20)
|
||||||
+- * HashAggregate (18)
|
+- * HashAggregate (19)
|
||||||
+- * Expand (17)
|
+- * Expand (18)
|
||||||
|
+- * Project (17)
|
||||||
+- * BroadcastNestedLoopJoin Inner BuildRight (16)
|
+- * BroadcastNestedLoopJoin Inner BuildRight (16)
|
||||||
:- * Project (12)
|
:- * Project (12)
|
||||||
: +- * BroadcastHashJoin Inner BuildRight (11)
|
: +- * BroadcastHashJoin Inner BuildRight (11)
|
||||||
|
@ -37,7 +38,7 @@ Input [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3]
|
||||||
Input [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3]
|
Input [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3]
|
||||||
Condition : isnotnull(inv_item_sk#1)
|
Condition : isnotnull(inv_item_sk#1)
|
||||||
|
|
||||||
(4) ReusedExchange [Reuses operator id: 26]
|
(4) ReusedExchange [Reuses operator id: 27]
|
||||||
Output [1]: [d_date_sk#5]
|
Output [1]: [d_date_sk#5]
|
||||||
|
|
||||||
(5) BroadcastHashJoin [codegen id : 4]
|
(5) BroadcastHashJoin [codegen id : 4]
|
||||||
|
@ -92,61 +93,65 @@ Arguments: IdentityBroadcastMode, [id=#12]
|
||||||
(16) BroadcastNestedLoopJoin [codegen id : 4]
|
(16) BroadcastNestedLoopJoin [codegen id : 4]
|
||||||
Join condition: None
|
Join condition: None
|
||||||
|
|
||||||
(17) Expand [codegen id : 4]
|
(17) Project [codegen id : 4]
|
||||||
|
Output [5]: [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9]
|
||||||
Input [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10]
|
Input [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10]
|
||||||
|
|
||||||
|
(18) Expand [codegen id : 4]
|
||||||
|
Input [5]: [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9]
|
||||||
Arguments: [[inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9, 0], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, null, 1], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, null, null, 3], [inv_quantity_on_hand#2, i_product_name#10, null, null, null, 7], [inv_quantity_on_hand#2, null, null, null, null, 15]], [inv_quantity_on_hand#2, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17]
|
Arguments: [[inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9, 0], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, null, 1], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, null, null, 3], [inv_quantity_on_hand#2, i_product_name#10, null, null, null, 7], [inv_quantity_on_hand#2, null, null, null, null, 15]], [inv_quantity_on_hand#2, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17]
|
||||||
|
|
||||||
(18) HashAggregate [codegen id : 4]
|
(19) HashAggregate [codegen id : 4]
|
||||||
Input [6]: [inv_quantity_on_hand#2, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17]
|
Input [6]: [inv_quantity_on_hand#2, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17]
|
||||||
Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17]
|
Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17]
|
||||||
Functions [1]: [partial_avg(inv_quantity_on_hand#2)]
|
Functions [1]: [partial_avg(inv_quantity_on_hand#2)]
|
||||||
Aggregate Attributes [2]: [sum#18, count#19]
|
Aggregate Attributes [2]: [sum#18, count#19]
|
||||||
Results [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21]
|
Results [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21]
|
||||||
|
|
||||||
(19) Exchange
|
(20) Exchange
|
||||||
Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21]
|
Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21]
|
||||||
Arguments: hashpartitioning(i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, 5), ENSURE_REQUIREMENTS, [id=#22]
|
Arguments: hashpartitioning(i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, 5), ENSURE_REQUIREMENTS, [id=#22]
|
||||||
|
|
||||||
(20) HashAggregate [codegen id : 5]
|
(21) HashAggregate [codegen id : 5]
|
||||||
Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21]
|
Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21]
|
||||||
Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17]
|
Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17]
|
||||||
Functions [1]: [avg(inv_quantity_on_hand#2)]
|
Functions [1]: [avg(inv_quantity_on_hand#2)]
|
||||||
Aggregate Attributes [1]: [avg(inv_quantity_on_hand#2)#23]
|
Aggregate Attributes [1]: [avg(inv_quantity_on_hand#2)#23]
|
||||||
Results [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, avg(inv_quantity_on_hand#2)#23 AS qoh#24]
|
Results [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, avg(inv_quantity_on_hand#2)#23 AS qoh#24]
|
||||||
|
|
||||||
(21) TakeOrderedAndProject
|
(22) TakeOrderedAndProject
|
||||||
Input [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#24]
|
Input [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#24]
|
||||||
Arguments: 100, [qoh#24 ASC NULLS FIRST, i_product_name#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_category#16 ASC NULLS FIRST], [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#24]
|
Arguments: 100, [qoh#24 ASC NULLS FIRST, i_product_name#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_category#16 ASC NULLS FIRST], [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#24]
|
||||||
|
|
||||||
===== Subqueries =====
|
===== Subqueries =====
|
||||||
|
|
||||||
Subquery:1 Hosting operator id = 1 Hosting Expression = inv_date_sk#3 IN dynamicpruning#4
|
Subquery:1 Hosting operator id = 1 Hosting Expression = inv_date_sk#3 IN dynamicpruning#4
|
||||||
BroadcastExchange (26)
|
BroadcastExchange (27)
|
||||||
+- * Project (25)
|
+- * Project (26)
|
||||||
+- * Filter (24)
|
+- * Filter (25)
|
||||||
+- * ColumnarToRow (23)
|
+- * ColumnarToRow (24)
|
||||||
+- Scan parquet default.date_dim (22)
|
+- Scan parquet default.date_dim (23)
|
||||||
|
|
||||||
|
|
||||||
(22) Scan parquet default.date_dim
|
(23) Scan parquet default.date_dim
|
||||||
Output [2]: [d_date_sk#5, d_month_seq#25]
|
Output [2]: [d_date_sk#5, d_month_seq#25]
|
||||||
Batched: true
|
Batched: true
|
||||||
Location [not included in comparison]/{warehouse_dir}/date_dim]
|
Location [not included in comparison]/{warehouse_dir}/date_dim]
|
||||||
PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)]
|
PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)]
|
||||||
ReadSchema: struct<d_date_sk:int,d_month_seq:int>
|
ReadSchema: struct<d_date_sk:int,d_month_seq:int>
|
||||||
|
|
||||||
(23) ColumnarToRow [codegen id : 1]
|
(24) ColumnarToRow [codegen id : 1]
|
||||||
Input [2]: [d_date_sk#5, d_month_seq#25]
|
Input [2]: [d_date_sk#5, d_month_seq#25]
|
||||||
|
|
||||||
(24) Filter [codegen id : 1]
|
(25) Filter [codegen id : 1]
|
||||||
Input [2]: [d_date_sk#5, d_month_seq#25]
|
Input [2]: [d_date_sk#5, d_month_seq#25]
|
||||||
Condition : (((isnotnull(d_month_seq#25) AND (d_month_seq#25 >= 1200)) AND (d_month_seq#25 <= 1211)) AND isnotnull(d_date_sk#5))
|
Condition : (((isnotnull(d_month_seq#25) AND (d_month_seq#25 >= 1200)) AND (d_month_seq#25 <= 1211)) AND isnotnull(d_date_sk#5))
|
||||||
|
|
||||||
(25) Project [codegen id : 1]
|
(26) Project [codegen id : 1]
|
||||||
Output [1]: [d_date_sk#5]
|
Output [1]: [d_date_sk#5]
|
||||||
Input [2]: [d_date_sk#5, d_month_seq#25]
|
Input [2]: [d_date_sk#5, d_month_seq#25]
|
||||||
|
|
||||||
(26) BroadcastExchange
|
(27) BroadcastExchange
|
||||||
Input [1]: [d_date_sk#5]
|
Input [1]: [d_date_sk#5]
|
||||||
Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26]
|
Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26]
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
|
||||||
WholeStageCodegen (4)
|
WholeStageCodegen (4)
|
||||||
HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,inv_quantity_on_hand] [sum,count,sum,count]
|
HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,inv_quantity_on_hand] [sum,count,sum,count]
|
||||||
Expand [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category]
|
Expand [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category]
|
||||||
|
Project [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category]
|
||||||
BroadcastNestedLoopJoin
|
BroadcastNestedLoopJoin
|
||||||
Project [inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name]
|
Project [inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name]
|
||||||
BroadcastHashJoin [inv_item_sk,i_item_sk]
|
BroadcastHashJoin [inv_item_sk,i_item_sk]
|
||||||
|
|
|
@ -131,7 +131,7 @@ class LogicalPlanTagInSparkPlanSuite extends TPCDSQuerySuite with DisableAdaptiv
|
||||||
}
|
}
|
||||||
|
|
||||||
private def getLogicalPlan(node: SparkPlan): LogicalPlan = {
|
private def getLogicalPlan(node: SparkPlan): LogicalPlan = {
|
||||||
node.getTagValue(SparkPlan.LOGICAL_PLAN_TAG).getOrElse {
|
node.logicalLink.getOrElse {
|
||||||
fail(node.getClass.getSimpleName + " does not have a logical plan link")
|
fail(node.getClass.getSimpleName + " does not have a logical plan link")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -216,8 +216,23 @@ abstract class RemoveRedundantProjectsSuiteBase
|
||||||
|ORDER BY t1.key, t2.key, s1, s2
|
|ORDER BY t1.key, t2.key, s1, s2
|
||||||
|LIMIT 10
|
|LIMIT 10
|
||||||
|""".stripMargin
|
|""".stripMargin
|
||||||
assertProjectExec(query, 0, 3)
|
// The Project above the Expand is not removed due to SPARK-36020.
|
||||||
|
assertProjectExec(query, 1, 3)
|
||||||
|
}
|
||||||
|
|
||||||
|
test("SPARK-36020: Project should not be removed when child's logical link is different") {
|
||||||
|
val query =
|
||||||
|
"""
|
||||||
|
|WITH t AS (
|
||||||
|
| SELECT key, a, b, c, explode(d) AS d FROM testView
|
||||||
|
|)
|
||||||
|
|SELECT t1.key, t1.d, t2.key
|
||||||
|
|FROM (SELECT d, key FROM t) t1
|
||||||
|
|JOIN testView t2 ON t1.key = t2.key
|
||||||
|
|""".stripMargin
|
||||||
|
// The ProjectExec above the GenerateExec should not be removed because
|
||||||
|
// they have different logical links.
|
||||||
|
assertProjectExec(query, enabled = 2, disabled = 3)
|
||||||
}
|
}
|
||||||
|
|
||||||
Seq("true", "false").foreach { codegenEnabled =>
|
Seq("true", "false").foreach { codegenEnabled =>
|
||||||
|
|
|
@ -1929,6 +1929,29 @@ class AdaptiveQueryExecSuite
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("SPARK-36020: Check logical link in remove redundant projects") {
|
||||||
|
withTempView("t") {
|
||||||
|
spark.range(10).selectExpr("id % 10 as key", "cast(id * 2 as int) as a",
|
||||||
|
"cast(id * 3 as int) as b", "array(id, id + 1, id + 3) as c").createOrReplaceTempView("t")
|
||||||
|
withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
|
||||||
|
SQLConf.ADAPTIVE_AUTO_BROADCASTJOIN_THRESHOLD.key -> "800") {
|
||||||
|
val query =
|
||||||
|
"""
|
||||||
|
|WITH tt AS (
|
||||||
|
| SELECT key, a, b, explode(c) AS c FROM t
|
||||||
|
|)
|
||||||
|
|SELECT t1.key, t1.c, t2.key, t2.c
|
||||||
|
|FROM (SELECT a, b, c, key FROM tt WHERE a > 1) t1
|
||||||
|
|JOIN (SELECT a, b, c, key FROM tt) t2
|
||||||
|
| ON t1.key = t2.key
|
||||||
|
|""".stripMargin
|
||||||
|
val (origin, adaptive) = runAdaptiveAndVerifyResult(query)
|
||||||
|
assert(findTopLevelSortMergeJoin(origin).size == 1)
|
||||||
|
assert(findTopLevelBroadcastHashJoin(adaptive).size == 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in a new issue