[SPARK-34593][SQL] Preserve broadcast nested loop join partitioning and ordering

### What changes were proposed in this pull request?

`BroadcastNestedLoopJoinExec` does not preserve `outputPartitioning` and `outputOrdering` right now. But it can preserve the streamed side partitioning and ordering when possible. This can help avoid shuffle and sort in later stage, if there's join and aggregation in the query. See example queries in added unit test in `JoinSuite.scala`.

In addition, fix a bunch of minor places in `BroadcastNestedLoopJoinExec.scala` for better style and readability.

### Why are the changes needed?

Avoid shuffle and sort for certain complicated query shape. Better query performance can be achieved.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added unit test in `JoinSuite.scala`.

Closes #31708 from c21/nested-join.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Cheng Su 2021-03-03 04:32:28 +00:00 committed by Wenchen Fan
parent 4e43819611
commit 5362f08125
10 changed files with 701 additions and 634 deletions

View file

@ -41,7 +41,7 @@ case class BroadcastNestedLoopJoinExec(
override lazy val metrics = Map(
"numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
/** BuildRight means the right relation <=> the broadcast relation. */
/** BuildRight means the right relation is the broadcast relation. */
private val (streamed, broadcast) = buildSide match {
case BuildRight => (left, right)
case BuildLeft => (right, left)
@ -49,7 +49,7 @@ case class BroadcastNestedLoopJoinExec(
override def simpleStringWithNodeId(): String = {
val opId = ExplainUtils.getOpId(this)
s"$nodeName $joinType ${buildSide} ($opId)".trim
s"$nodeName $joinType $buildSide ($opId)".trim
}
override def requiredChildDistribution: Seq[Distribution] = buildSide match {
@ -59,10 +59,22 @@ case class BroadcastNestedLoopJoinExec(
UnspecifiedDistribution :: BroadcastDistribution(IdentityBroadcastMode) :: Nil
}
override def outputPartitioning: Partitioning = (joinType, buildSide) match {
case (_: InnerLike, _) | (LeftOuter, BuildRight) | (RightOuter, BuildLeft) |
(LeftSemi, BuildRight) | (LeftAnti, BuildRight) => streamed.outputPartitioning
case _ => UnknownPartitioning(left.outputPartitioning.numPartitions)
}
override def outputOrdering: Seq[SortOrder] = (joinType, buildSide) match {
case (_: InnerLike, _) | (LeftOuter, BuildRight) | (RightOuter, BuildLeft) |
(LeftSemi, BuildRight) | (LeftAnti, BuildRight) => streamed.outputOrdering
case _ => Nil
}
private[this] def genResultProjection: UnsafeProjection = joinType match {
case LeftExistence(j) =>
case LeftExistence(_) =>
UnsafeProjection.create(output, output)
case other =>
case _ =>
// Always put the stream side on left to simplify implementation
// both of left and right side could be null
UnsafeProjection.create(
@ -183,7 +195,7 @@ case class BroadcastNestedLoopJoinExec(
* The implementation for these joins:
*
* LeftSemi with BuildRight
* Anti with BuildRight
* LeftAnti with BuildRight
*/
private def leftExistenceJoin(
relation: Broadcast[Array[InternalRow]],
@ -238,7 +250,6 @@ case class BroadcastNestedLoopJoinExec(
* ExistenceJoin with BuildLeft
*/
private def defaultJoin(relation: Broadcast[Array[InternalRow]]): RDD[InternalRow] = {
/** All rows that either match both-way, or rows from streamed joined with nulls. */
val streamRdd = streamed.execute()
val matchedBuildRows = streamRdd.mapPartitionsInternal { streamedIter =>
@ -275,7 +286,7 @@ case class BroadcastNestedLoopJoinExec(
i += 1
}
return sparkContext.makeRDD(buf)
case j: ExistenceJoin =>
case _: ExistenceJoin =>
val buf: CompactBuffer[InternalRow] = new CompactBuffer()
var i = 0
val rel = relation.value
@ -296,7 +307,7 @@ case class BroadcastNestedLoopJoinExec(
i += 1
}
return sparkContext.makeRDD(notMatched)
case o =>
case _ =>
}
val notMatchedBroadcastRows: Seq[InternalRow] = {
@ -358,7 +369,7 @@ case class BroadcastNestedLoopJoinExec(
leftExistenceJoin(broadcastedRelation, exists = true)
case (LeftAnti, BuildRight) =>
leftExistenceJoin(broadcastedRelation, exists = false)
case (j: ExistenceJoin, BuildRight) =>
case (_: ExistenceJoin, BuildRight) =>
existenceJoin(broadcastedRelation)
case _ =>
/**

View file

@ -1,74 +1,73 @@
== Physical Plan ==
* Sort (70)
+- Exchange (69)
+- * Project (68)
+- BroadcastNestedLoopJoin Inner BuildRight (67)
:- * HashAggregate (47)
: +- Exchange (46)
: +- * HashAggregate (45)
: +- * Project (44)
: +- * BroadcastHashJoin Inner BuildRight (43)
: :- * Project (31)
: : +- * BroadcastHashJoin Inner BuildRight (30)
: : :- * Project (24)
: : : +- * BroadcastHashJoin Inner BuildRight (23)
: : : :- * Project (17)
: : : : +- * BroadcastHashJoin Inner BuildRight (16)
: : : : :- * Project (10)
: : : : : +- * BroadcastHashJoin Inner BuildRight (9)
: : : : : :- * Filter (3)
: : : : : : +- * ColumnarToRow (2)
: : : : : : +- Scan parquet default.store_sales (1)
: : : : : +- BroadcastExchange (8)
: : : : : +- * Project (7)
: : : : : +- * Filter (6)
: : : : : +- * ColumnarToRow (5)
: : : : : +- Scan parquet default.date_dim (4)
: : : : +- BroadcastExchange (15)
: : : : +- * Project (14)
: : : : +- * Filter (13)
: : : : +- * ColumnarToRow (12)
: : : : +- Scan parquet default.item (11)
: : : +- BroadcastExchange (22)
: : : +- * Project (21)
: : : +- * Filter (20)
: : : +- * ColumnarToRow (19)
: : : +- Scan parquet default.promotion (18)
: : +- BroadcastExchange (29)
: : +- * Project (28)
: : +- * Filter (27)
: : +- * ColumnarToRow (26)
: : +- Scan parquet default.store (25)
: +- BroadcastExchange (42)
: +- * Project (41)
: +- * BroadcastHashJoin Inner BuildRight (40)
: :- * Filter (34)
: : +- * ColumnarToRow (33)
: : +- Scan parquet default.customer (32)
: +- BroadcastExchange (39)
: +- * Project (38)
: +- * Filter (37)
: +- * ColumnarToRow (36)
: +- Scan parquet default.customer_address (35)
+- BroadcastExchange (66)
+- * HashAggregate (65)
+- Exchange (64)
+- * HashAggregate (63)
+- * Project (62)
+- * BroadcastHashJoin Inner BuildRight (61)
:- * Project (59)
: +- * BroadcastHashJoin Inner BuildRight (58)
: :- * Project (56)
: : +- * BroadcastHashJoin Inner BuildRight (55)
: : :- * Project (53)
: : : +- * BroadcastHashJoin Inner BuildRight (52)
: : : :- * Filter (50)
: : : : +- * ColumnarToRow (49)
: : : : +- Scan parquet default.store_sales (48)
: : : +- ReusedExchange (51)
: : +- ReusedExchange (54)
: +- ReusedExchange (57)
+- ReusedExchange (60)
* Sort (69)
+- * Project (68)
+- BroadcastNestedLoopJoin Inner BuildRight (67)
:- * HashAggregate (47)
: +- Exchange (46)
: +- * HashAggregate (45)
: +- * Project (44)
: +- * BroadcastHashJoin Inner BuildRight (43)
: :- * Project (31)
: : +- * BroadcastHashJoin Inner BuildRight (30)
: : :- * Project (24)
: : : +- * BroadcastHashJoin Inner BuildRight (23)
: : : :- * Project (17)
: : : : +- * BroadcastHashJoin Inner BuildRight (16)
: : : : :- * Project (10)
: : : : : +- * BroadcastHashJoin Inner BuildRight (9)
: : : : : :- * Filter (3)
: : : : : : +- * ColumnarToRow (2)
: : : : : : +- Scan parquet default.store_sales (1)
: : : : : +- BroadcastExchange (8)
: : : : : +- * Project (7)
: : : : : +- * Filter (6)
: : : : : +- * ColumnarToRow (5)
: : : : : +- Scan parquet default.date_dim (4)
: : : : +- BroadcastExchange (15)
: : : : +- * Project (14)
: : : : +- * Filter (13)
: : : : +- * ColumnarToRow (12)
: : : : +- Scan parquet default.item (11)
: : : +- BroadcastExchange (22)
: : : +- * Project (21)
: : : +- * Filter (20)
: : : +- * ColumnarToRow (19)
: : : +- Scan parquet default.promotion (18)
: : +- BroadcastExchange (29)
: : +- * Project (28)
: : +- * Filter (27)
: : +- * ColumnarToRow (26)
: : +- Scan parquet default.store (25)
: +- BroadcastExchange (42)
: +- * Project (41)
: +- * BroadcastHashJoin Inner BuildRight (40)
: :- * Filter (34)
: : +- * ColumnarToRow (33)
: : +- Scan parquet default.customer (32)
: +- BroadcastExchange (39)
: +- * Project (38)
: +- * Filter (37)
: +- * ColumnarToRow (36)
: +- Scan parquet default.customer_address (35)
+- BroadcastExchange (66)
+- * HashAggregate (65)
+- Exchange (64)
+- * HashAggregate (63)
+- * Project (62)
+- * BroadcastHashJoin Inner BuildRight (61)
:- * Project (59)
: +- * BroadcastHashJoin Inner BuildRight (58)
: :- * Project (56)
: : +- * BroadcastHashJoin Inner BuildRight (55)
: : :- * Project (53)
: : : +- * BroadcastHashJoin Inner BuildRight (52)
: : : :- * Filter (50)
: : : : +- * ColumnarToRow (49)
: : : : +- Scan parquet default.store_sales (48)
: : : +- ReusedExchange (51)
: : +- ReusedExchange (54)
: +- ReusedExchange (57)
+- ReusedExchange (60)
(1) Scan parquet default.store_sales
@ -121,7 +120,7 @@ Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext
Output [2]: [i_item_sk#12, i_category#13]
Batched: true
Location [not included in comparison]/{warehouse_dir}/item]
PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)]
PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry ), IsNotNull(i_item_sk)]
ReadSchema: struct<i_item_sk:int,i_category:string>
(12) ColumnarToRow [codegen id : 2]
@ -129,7 +128,7 @@ Input [2]: [i_item_sk#12, i_category#13]
(13) Filter [codegen id : 2]
Input [2]: [i_item_sk#12, i_category#13]
Condition : ((isnotnull(i_category#13) AND (i_category#13 = Jewelry)) AND isnotnull(i_item_sk#12))
Condition : ((isnotnull(i_category#13) AND (i_category#13 = Jewelry )) AND isnotnull(i_item_sk#12))
(14) Project [codegen id : 2]
Output [1]: [i_item_sk#12]
@ -378,21 +377,17 @@ Join condition: None
Output [3]: [promotions#33, total#38, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#33 as decimal(15,4))) / promote_precision(cast(total#38 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
Input [2]: [promotions#33, total#38]
(69) Exchange
Input [3]: [promotions#33, total#38, ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
Arguments: rangepartitioning(promotions#33 ASC NULLS FIRST, total#38 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#41]
(70) Sort [codegen id : 17]
(69) Sort [codegen id : 16]
Input [3]: [promotions#33, total#38, ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
Arguments: [promotions#33 ASC NULLS FIRST, total#38 ASC NULLS FIRST], true, 0
===== Subqueries =====
Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7
ReusedExchange (71)
ReusedExchange (70)
(71) ReusedExchange [Reuses operator id: 8]
(70) ReusedExchange [Reuses operator id: 8]
Output [1]: [d_date_sk#8]
Subquery:2 Hosting operator id = 48 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7

View file

@ -1,107 +1,104 @@
WholeStageCodegen (17)
WholeStageCodegen (16)
Sort [promotions,total]
InputAdapter
Exchange [promotions,total] #1
WholeStageCodegen (16)
Project [promotions,total]
InputAdapter
BroadcastNestedLoopJoin
WholeStageCodegen (8)
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum]
InputAdapter
Exchange #2
WholeStageCodegen (7)
HashAggregate [ss_ext_sales_price] [sum,sum]
Project [ss_ext_sales_price]
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
Project [ss_customer_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_store_sk,s_store_sk]
Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_promo_sk,p_promo_sk]
Project [ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_item_sk,i_item_sk]
Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
Filter [ss_store_sk,ss_promo_sk,ss_customer_sk,ss_item_sk]
ColumnarToRow
InputAdapter
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
SubqueryBroadcast [d_date_sk] #1
ReusedExchange [d_date_sk] #3
InputAdapter
BroadcastExchange #3
WholeStageCodegen (1)
Project [d_date_sk]
Filter [d_year,d_moy,d_date_sk]
ColumnarToRow
InputAdapter
Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
InputAdapter
BroadcastExchange #4
WholeStageCodegen (2)
Project [i_item_sk]
Filter [i_category,i_item_sk]
ColumnarToRow
InputAdapter
Scan parquet default.item [i_item_sk,i_category]
InputAdapter
BroadcastExchange #5
WholeStageCodegen (3)
Project [p_promo_sk]
Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
ColumnarToRow
InputAdapter
Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
InputAdapter
BroadcastExchange #6
WholeStageCodegen (4)
Project [s_store_sk]
Filter [s_gmt_offset,s_store_sk]
ColumnarToRow
InputAdapter
Scan parquet default.store [s_store_sk,s_gmt_offset]
InputAdapter
BroadcastExchange #7
WholeStageCodegen (6)
Project [c_customer_sk]
BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
Filter [c_customer_sk,c_current_addr_sk]
Project [promotions,total]
InputAdapter
BroadcastNestedLoopJoin
WholeStageCodegen (8)
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum]
InputAdapter
Exchange #1
WholeStageCodegen (7)
HashAggregate [ss_ext_sales_price] [sum,sum]
Project [ss_ext_sales_price]
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
Project [ss_customer_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_store_sk,s_store_sk]
Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_promo_sk,p_promo_sk]
Project [ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_item_sk,i_item_sk]
Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
Filter [ss_store_sk,ss_promo_sk,ss_customer_sk,ss_item_sk]
ColumnarToRow
InputAdapter
Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
SubqueryBroadcast [d_date_sk] #1
ReusedExchange [d_date_sk] #2
InputAdapter
BroadcastExchange #8
WholeStageCodegen (5)
Project [ca_address_sk]
Filter [ca_gmt_offset,ca_address_sk]
BroadcastExchange #2
WholeStageCodegen (1)
Project [d_date_sk]
Filter [d_year,d_moy,d_date_sk]
ColumnarToRow
InputAdapter
Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
BroadcastExchange #9
WholeStageCodegen (15)
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum]
InputAdapter
Exchange #10
WholeStageCodegen (14)
HashAggregate [ss_ext_sales_price] [sum,sum]
Project [ss_ext_sales_price]
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
Project [ss_customer_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_store_sk,s_store_sk]
Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_item_sk,i_item_sk]
Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
Filter [ss_store_sk,ss_customer_sk,ss_item_sk]
Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
InputAdapter
BroadcastExchange #3
WholeStageCodegen (2)
Project [i_item_sk]
Filter [i_category,i_item_sk]
ColumnarToRow
InputAdapter
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
ReusedSubquery [d_date_sk] #1
InputAdapter
ReusedExchange [d_date_sk] #3
InputAdapter
ReusedExchange [i_item_sk] #4
InputAdapter
ReusedExchange [s_store_sk] #6
Scan parquet default.item [i_item_sk,i_category]
InputAdapter
ReusedExchange [c_customer_sk] #7
BroadcastExchange #4
WholeStageCodegen (3)
Project [p_promo_sk]
Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
ColumnarToRow
InputAdapter
Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
InputAdapter
BroadcastExchange #5
WholeStageCodegen (4)
Project [s_store_sk]
Filter [s_gmt_offset,s_store_sk]
ColumnarToRow
InputAdapter
Scan parquet default.store [s_store_sk,s_gmt_offset]
InputAdapter
BroadcastExchange #6
WholeStageCodegen (6)
Project [c_customer_sk]
BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
Filter [c_customer_sk,c_current_addr_sk]
ColumnarToRow
InputAdapter
Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
InputAdapter
BroadcastExchange #7
WholeStageCodegen (5)
Project [ca_address_sk]
Filter [ca_gmt_offset,ca_address_sk]
ColumnarToRow
InputAdapter
Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
BroadcastExchange #8
WholeStageCodegen (15)
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum]
InputAdapter
Exchange #9
WholeStageCodegen (14)
HashAggregate [ss_ext_sales_price] [sum,sum]
Project [ss_ext_sales_price]
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
Project [ss_customer_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_store_sk,s_store_sk]
Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_item_sk,i_item_sk]
Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
Filter [ss_store_sk,ss_customer_sk,ss_item_sk]
ColumnarToRow
InputAdapter
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
ReusedSubquery [d_date_sk] #1
InputAdapter
ReusedExchange [d_date_sk] #2
InputAdapter
ReusedExchange [i_item_sk] #3
InputAdapter
ReusedExchange [s_store_sk] #5
InputAdapter
ReusedExchange [c_customer_sk] #6

View file

@ -1,77 +1,76 @@
== Physical Plan ==
* Sort (73)
+- Exchange (72)
+- * Project (71)
+- BroadcastNestedLoopJoin Inner BuildRight (70)
:- * HashAggregate (47)
: +- Exchange (46)
: +- * HashAggregate (45)
: +- * Project (44)
: +- * BroadcastHashJoin Inner BuildRight (43)
: :- * Project (37)
: : +- * BroadcastHashJoin Inner BuildRight (36)
: : :- * Project (30)
: : : +- * BroadcastHashJoin Inner BuildRight (29)
: : : :- * Project (24)
: : : : +- * BroadcastHashJoin Inner BuildRight (23)
: : : : :- * Project (17)
: : : : : +- * BroadcastHashJoin Inner BuildRight (16)
: : : : : :- * Project (10)
: : : : : : +- * BroadcastHashJoin Inner BuildRight (9)
: : : : : : :- * Filter (3)
: : : : : : : +- * ColumnarToRow (2)
: : : : : : : +- Scan parquet default.store_sales (1)
: : : : : : +- BroadcastExchange (8)
: : : : : : +- * Project (7)
: : : : : : +- * Filter (6)
: : : : : : +- * ColumnarToRow (5)
: : : : : : +- Scan parquet default.store (4)
: : : : : +- BroadcastExchange (15)
: : : : : +- * Project (14)
: : : : : +- * Filter (13)
: : : : : +- * ColumnarToRow (12)
: : : : : +- Scan parquet default.promotion (11)
: : : : +- BroadcastExchange (22)
: : : : +- * Project (21)
: : : : +- * Filter (20)
: : : : +- * ColumnarToRow (19)
: : : : +- Scan parquet default.date_dim (18)
: : : +- BroadcastExchange (28)
: : : +- * Filter (27)
: : : +- * ColumnarToRow (26)
: : : +- Scan parquet default.customer (25)
: : +- BroadcastExchange (35)
: : +- * Project (34)
: : +- * Filter (33)
: : +- * ColumnarToRow (32)
: : +- Scan parquet default.customer_address (31)
: +- BroadcastExchange (42)
: +- * Project (41)
: +- * Filter (40)
: +- * ColumnarToRow (39)
: +- Scan parquet default.item (38)
+- BroadcastExchange (69)
+- * HashAggregate (68)
+- Exchange (67)
+- * HashAggregate (66)
+- * Project (65)
+- * BroadcastHashJoin Inner BuildRight (64)
:- * Project (62)
: +- * BroadcastHashJoin Inner BuildRight (61)
: :- * Project (59)
: : +- * BroadcastHashJoin Inner BuildRight (58)
: : :- * Project (56)
: : : +- * BroadcastHashJoin Inner BuildRight (55)
: : : :- * Project (53)
: : : : +- * BroadcastHashJoin Inner BuildRight (52)
: : : : :- * Filter (50)
: : : : : +- * ColumnarToRow (49)
: : : : : +- Scan parquet default.store_sales (48)
: : : : +- ReusedExchange (51)
: : : +- ReusedExchange (54)
: : +- ReusedExchange (57)
: +- ReusedExchange (60)
+- ReusedExchange (63)
* Sort (72)
+- * Project (71)
+- BroadcastNestedLoopJoin Inner BuildRight (70)
:- * HashAggregate (47)
: +- Exchange (46)
: +- * HashAggregate (45)
: +- * Project (44)
: +- * BroadcastHashJoin Inner BuildRight (43)
: :- * Project (37)
: : +- * BroadcastHashJoin Inner BuildRight (36)
: : :- * Project (30)
: : : +- * BroadcastHashJoin Inner BuildRight (29)
: : : :- * Project (24)
: : : : +- * BroadcastHashJoin Inner BuildRight (23)
: : : : :- * Project (17)
: : : : : +- * BroadcastHashJoin Inner BuildRight (16)
: : : : : :- * Project (10)
: : : : : : +- * BroadcastHashJoin Inner BuildRight (9)
: : : : : : :- * Filter (3)
: : : : : : : +- * ColumnarToRow (2)
: : : : : : : +- Scan parquet default.store_sales (1)
: : : : : : +- BroadcastExchange (8)
: : : : : : +- * Project (7)
: : : : : : +- * Filter (6)
: : : : : : +- * ColumnarToRow (5)
: : : : : : +- Scan parquet default.store (4)
: : : : : +- BroadcastExchange (15)
: : : : : +- * Project (14)
: : : : : +- * Filter (13)
: : : : : +- * ColumnarToRow (12)
: : : : : +- Scan parquet default.promotion (11)
: : : : +- BroadcastExchange (22)
: : : : +- * Project (21)
: : : : +- * Filter (20)
: : : : +- * ColumnarToRow (19)
: : : : +- Scan parquet default.date_dim (18)
: : : +- BroadcastExchange (28)
: : : +- * Filter (27)
: : : +- * ColumnarToRow (26)
: : : +- Scan parquet default.customer (25)
: : +- BroadcastExchange (35)
: : +- * Project (34)
: : +- * Filter (33)
: : +- * ColumnarToRow (32)
: : +- Scan parquet default.customer_address (31)
: +- BroadcastExchange (42)
: +- * Project (41)
: +- * Filter (40)
: +- * ColumnarToRow (39)
: +- Scan parquet default.item (38)
+- BroadcastExchange (69)
+- * HashAggregate (68)
+- Exchange (67)
+- * HashAggregate (66)
+- * Project (65)
+- * BroadcastHashJoin Inner BuildRight (64)
:- * Project (62)
: +- * BroadcastHashJoin Inner BuildRight (61)
: :- * Project (59)
: : +- * BroadcastHashJoin Inner BuildRight (58)
: : :- * Project (56)
: : : +- * BroadcastHashJoin Inner BuildRight (55)
: : : :- * Project (53)
: : : : +- * BroadcastHashJoin Inner BuildRight (52)
: : : : :- * Filter (50)
: : : : : +- * ColumnarToRow (49)
: : : : : +- Scan parquet default.store_sales (48)
: : : : +- ReusedExchange (51)
: : : +- ReusedExchange (54)
: : +- ReusedExchange (57)
: +- ReusedExchange (60)
+- ReusedExchange (63)
(1) Scan parquet default.store_sales
@ -244,7 +243,7 @@ Input [4]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#21, ca_address
Output [2]: [i_item_sk#26, i_category#27]
Batched: true
Location [not included in comparison]/{warehouse_dir}/item]
PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)]
PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry ), IsNotNull(i_item_sk)]
ReadSchema: struct<i_item_sk:int,i_category:string>
(39) ColumnarToRow [codegen id : 6]
@ -252,7 +251,7 @@ Input [2]: [i_item_sk#26, i_category#27]
(40) Filter [codegen id : 6]
Input [2]: [i_item_sk#26, i_category#27]
Condition : ((isnotnull(i_category#27) AND (i_category#27 = Jewelry)) AND isnotnull(i_item_sk#26))
Condition : ((isnotnull(i_category#27) AND (i_category#27 = Jewelry )) AND isnotnull(i_item_sk#26))
(41) Project [codegen id : 6]
Output [1]: [i_item_sk#26]
@ -393,21 +392,17 @@ Join condition: None
Output [3]: [promotions#33, total#38, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#33 as decimal(15,4))) / promote_precision(cast(total#38 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
Input [2]: [promotions#33, total#38]
(72) Exchange
Input [3]: [promotions#33, total#38, ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
Arguments: rangepartitioning(promotions#33 ASC NULLS FIRST, total#38 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#41]
(73) Sort [codegen id : 17]
(72) Sort [codegen id : 16]
Input [3]: [promotions#33, total#38, ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
Arguments: [promotions#33 ASC NULLS FIRST, total#38 ASC NULLS FIRST], true, 0
===== Subqueries =====
Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7
ReusedExchange (74)
ReusedExchange (73)
(74) ReusedExchange [Reuses operator id: 22]
(73) ReusedExchange [Reuses operator id: 22]
Output [1]: [d_date_sk#16]
Subquery:2 Hosting operator id = 48 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7

View file

@ -1,111 +1,108 @@
WholeStageCodegen (17)
WholeStageCodegen (16)
Sort [promotions,total]
InputAdapter
Exchange [promotions,total] #1
WholeStageCodegen (16)
Project [promotions,total]
InputAdapter
BroadcastNestedLoopJoin
WholeStageCodegen (8)
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum]
InputAdapter
Exchange #2
WholeStageCodegen (7)
HashAggregate [ss_ext_sales_price] [sum,sum]
Project [ss_ext_sales_price]
BroadcastHashJoin [ss_item_sk,i_item_sk]
Project [ss_item_sk,ss_ext_sales_price]
BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk]
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk]
BroadcastHashJoin [ss_promo_sk,p_promo_sk]
Project [ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
BroadcastHashJoin [ss_store_sk,s_store_sk]
Filter [ss_store_sk,ss_promo_sk,ss_customer_sk,ss_item_sk]
ColumnarToRow
InputAdapter
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
SubqueryBroadcast [d_date_sk] #1
ReusedExchange [d_date_sk] #3
InputAdapter
BroadcastExchange #4
WholeStageCodegen (1)
Project [s_store_sk]
Filter [s_gmt_offset,s_store_sk]
ColumnarToRow
InputAdapter
Scan parquet default.store [s_store_sk,s_gmt_offset]
InputAdapter
BroadcastExchange #5
WholeStageCodegen (2)
Project [p_promo_sk]
Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
ColumnarToRow
InputAdapter
Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
InputAdapter
BroadcastExchange #3
WholeStageCodegen (3)
Project [d_date_sk]
Filter [d_year,d_moy,d_date_sk]
ColumnarToRow
InputAdapter
Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
InputAdapter
BroadcastExchange #6
WholeStageCodegen (4)
Filter [c_customer_sk,c_current_addr_sk]
Project [promotions,total]
InputAdapter
BroadcastNestedLoopJoin
WholeStageCodegen (8)
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum]
InputAdapter
Exchange #1
WholeStageCodegen (7)
HashAggregate [ss_ext_sales_price] [sum,sum]
Project [ss_ext_sales_price]
BroadcastHashJoin [ss_item_sk,i_item_sk]
Project [ss_item_sk,ss_ext_sales_price]
BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk]
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk]
BroadcastHashJoin [ss_promo_sk,p_promo_sk]
Project [ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
BroadcastHashJoin [ss_store_sk,s_store_sk]
Filter [ss_store_sk,ss_promo_sk,ss_customer_sk,ss_item_sk]
ColumnarToRow
InputAdapter
Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
InputAdapter
BroadcastExchange #7
WholeStageCodegen (5)
Project [ca_address_sk]
Filter [ca_gmt_offset,ca_address_sk]
ColumnarToRow
InputAdapter
Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
InputAdapter
BroadcastExchange #8
WholeStageCodegen (6)
Project [i_item_sk]
Filter [i_category,i_item_sk]
ColumnarToRow
InputAdapter
Scan parquet default.item [i_item_sk,i_category]
BroadcastExchange #9
WholeStageCodegen (15)
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum]
InputAdapter
Exchange #10
WholeStageCodegen (14)
HashAggregate [ss_ext_sales_price] [sum,sum]
Project [ss_ext_sales_price]
BroadcastHashJoin [ss_item_sk,i_item_sk]
Project [ss_item_sk,ss_ext_sales_price]
BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk]
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk]
BroadcastHashJoin [ss_store_sk,s_store_sk]
Filter [ss_store_sk,ss_customer_sk,ss_item_sk]
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
SubqueryBroadcast [d_date_sk] #1
ReusedExchange [d_date_sk] #2
InputAdapter
BroadcastExchange #3
WholeStageCodegen (1)
Project [s_store_sk]
Filter [s_gmt_offset,s_store_sk]
ColumnarToRow
InputAdapter
Scan parquet default.store [s_store_sk,s_gmt_offset]
InputAdapter
BroadcastExchange #4
WholeStageCodegen (2)
Project [p_promo_sk]
Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
ColumnarToRow
InputAdapter
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
ReusedSubquery [d_date_sk] #1
InputAdapter
ReusedExchange [s_store_sk] #4
InputAdapter
ReusedExchange [d_date_sk] #3
InputAdapter
ReusedExchange [c_customer_sk,c_current_addr_sk] #6
Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
InputAdapter
ReusedExchange [ca_address_sk] #7
BroadcastExchange #2
WholeStageCodegen (3)
Project [d_date_sk]
Filter [d_year,d_moy,d_date_sk]
ColumnarToRow
InputAdapter
Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
InputAdapter
ReusedExchange [i_item_sk] #8
BroadcastExchange #5
WholeStageCodegen (4)
Filter [c_customer_sk,c_current_addr_sk]
ColumnarToRow
InputAdapter
Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
InputAdapter
BroadcastExchange #6
WholeStageCodegen (5)
Project [ca_address_sk]
Filter [ca_gmt_offset,ca_address_sk]
ColumnarToRow
InputAdapter
Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
InputAdapter
BroadcastExchange #7
WholeStageCodegen (6)
Project [i_item_sk]
Filter [i_category,i_item_sk]
ColumnarToRow
InputAdapter
Scan parquet default.item [i_item_sk,i_category]
BroadcastExchange #8
WholeStageCodegen (15)
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum]
InputAdapter
Exchange #9
WholeStageCodegen (14)
HashAggregate [ss_ext_sales_price] [sum,sum]
Project [ss_ext_sales_price]
BroadcastHashJoin [ss_item_sk,i_item_sk]
Project [ss_item_sk,ss_ext_sales_price]
BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk]
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price]
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk]
BroadcastHashJoin [ss_store_sk,s_store_sk]
Filter [ss_store_sk,ss_customer_sk,ss_item_sk]
ColumnarToRow
InputAdapter
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
ReusedSubquery [d_date_sk] #1
InputAdapter
ReusedExchange [s_store_sk] #3
InputAdapter
ReusedExchange [d_date_sk] #2
InputAdapter
ReusedExchange [c_customer_sk,c_current_addr_sk] #5
InputAdapter
ReusedExchange [ca_address_sk] #6
InputAdapter
ReusedExchange [i_item_sk] #7

View file

@ -1,57 +1,56 @@
== Physical Plan ==
* Sort (53)
+- Exchange (52)
+- * Project (51)
+- BroadcastNestedLoopJoin Inner BuildRight (50)
:- * HashAggregate (28)
: +- Exchange (27)
: +- * HashAggregate (26)
: +- * Project (25)
: +- * BroadcastHashJoin Inner BuildRight (24)
: :- * Project (18)
: : +- * BroadcastHashJoin Inner BuildRight (17)
: : :- * Project (11)
: : : +- * BroadcastHashJoin Inner BuildRight (10)
: : : :- * Project (4)
: : : : +- * Filter (3)
: : : : +- * ColumnarToRow (2)
: : : : +- Scan parquet default.web_sales (1)
: : : +- BroadcastExchange (9)
: : : +- * Project (8)
: : : +- * Filter (7)
: : : +- * ColumnarToRow (6)
: : : +- Scan parquet default.web_page (5)
: : +- BroadcastExchange (16)
: : +- * Project (15)
: : +- * Filter (14)
: : +- * ColumnarToRow (13)
: : +- Scan parquet default.household_demographics (12)
: +- BroadcastExchange (23)
: +- * Project (22)
: +- * Filter (21)
: +- * ColumnarToRow (20)
: +- Scan parquet default.time_dim (19)
+- BroadcastExchange (49)
+- * HashAggregate (48)
+- Exchange (47)
+- * HashAggregate (46)
+- * Project (45)
+- * BroadcastHashJoin Inner BuildRight (44)
:- * Project (38)
: +- * BroadcastHashJoin Inner BuildRight (37)
: :- * Project (35)
: : +- * BroadcastHashJoin Inner BuildRight (34)
: : :- * Project (32)
: : : +- * Filter (31)
: : : +- * ColumnarToRow (30)
: : : +- Scan parquet default.web_sales (29)
: : +- ReusedExchange (33)
: +- ReusedExchange (36)
+- BroadcastExchange (43)
+- * Project (42)
+- * Filter (41)
+- * ColumnarToRow (40)
+- Scan parquet default.time_dim (39)
* Sort (52)
+- * Project (51)
+- BroadcastNestedLoopJoin Inner BuildRight (50)
:- * HashAggregate (28)
: +- Exchange (27)
: +- * HashAggregate (26)
: +- * Project (25)
: +- * BroadcastHashJoin Inner BuildRight (24)
: :- * Project (18)
: : +- * BroadcastHashJoin Inner BuildRight (17)
: : :- * Project (11)
: : : +- * BroadcastHashJoin Inner BuildRight (10)
: : : :- * Project (4)
: : : : +- * Filter (3)
: : : : +- * ColumnarToRow (2)
: : : : +- Scan parquet default.web_sales (1)
: : : +- BroadcastExchange (9)
: : : +- * Project (8)
: : : +- * Filter (7)
: : : +- * ColumnarToRow (6)
: : : +- Scan parquet default.web_page (5)
: : +- BroadcastExchange (16)
: : +- * Project (15)
: : +- * Filter (14)
: : +- * ColumnarToRow (13)
: : +- Scan parquet default.household_demographics (12)
: +- BroadcastExchange (23)
: +- * Project (22)
: +- * Filter (21)
: +- * ColumnarToRow (20)
: +- Scan parquet default.time_dim (19)
+- BroadcastExchange (49)
+- * HashAggregate (48)
+- Exchange (47)
+- * HashAggregate (46)
+- * Project (45)
+- * BroadcastHashJoin Inner BuildRight (44)
:- * Project (38)
: +- * BroadcastHashJoin Inner BuildRight (37)
: :- * Project (35)
: : +- * BroadcastHashJoin Inner BuildRight (34)
: : :- * Project (32)
: : : +- * Filter (31)
: : : +- * ColumnarToRow (30)
: : : +- Scan parquet default.web_sales (29)
: : +- ReusedExchange (33)
: +- ReusedExchange (36)
+- BroadcastExchange (43)
+- * Project (42)
+- * Filter (41)
+- * ColumnarToRow (40)
+- Scan parquet default.time_dim (39)
(1) Scan parquet default.web_sales
@ -285,11 +284,7 @@ Join condition: None
Output [1]: [CheckOverflow((promote_precision(cast(amc#18 as decimal(15,4))) / promote_precision(cast(pmc#24 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#26]
Input [2]: [amc#18, pmc#24]
(52) Exchange
Input [1]: [am_pm_ratio#26]
Arguments: rangepartitioning(am_pm_ratio#26 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#27]
(53) Sort [codegen id : 12]
(52) Sort [codegen id : 11]
Input [1]: [am_pm_ratio#26]
Arguments: [am_pm_ratio#26 ASC NULLS FIRST], true, 0

View file

@ -1,79 +1,76 @@
WholeStageCodegen (12)
WholeStageCodegen (11)
Sort [am_pm_ratio]
InputAdapter
Exchange [am_pm_ratio] #1
WholeStageCodegen (11)
Project [amc,pmc]
InputAdapter
BroadcastNestedLoopJoin
WholeStageCodegen (5)
HashAggregate [count] [count(1),amc,count]
InputAdapter
Exchange #2
WholeStageCodegen (4)
HashAggregate [count,count]
Project
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
Project [ws_sold_time_sk]
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
Project [ws_sold_time_sk,ws_ship_hdemo_sk]
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
ColumnarToRow
InputAdapter
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
Project [amc,pmc]
InputAdapter
BroadcastNestedLoopJoin
WholeStageCodegen (5)
HashAggregate [count] [count(1),amc,count]
InputAdapter
Exchange #1
WholeStageCodegen (4)
HashAggregate [count,count]
Project
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
Project [ws_sold_time_sk]
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
Project [ws_sold_time_sk,ws_ship_hdemo_sk]
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
ColumnarToRow
InputAdapter
BroadcastExchange #3
WholeStageCodegen (1)
Project [wp_web_page_sk]
Filter [wp_char_count,wp_web_page_sk]
ColumnarToRow
InputAdapter
Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
InputAdapter
BroadcastExchange #4
WholeStageCodegen (2)
Project [hd_demo_sk]
Filter [hd_dep_count,hd_demo_sk]
ColumnarToRow
InputAdapter
Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
InputAdapter
BroadcastExchange #5
WholeStageCodegen (3)
Project [t_time_sk]
Filter [t_hour,t_time_sk]
ColumnarToRow
InputAdapter
Scan parquet default.time_dim [t_time_sk,t_hour]
BroadcastExchange #6
WholeStageCodegen (10)
HashAggregate [count] [count(1),pmc,count]
InputAdapter
Exchange #7
WholeStageCodegen (9)
HashAggregate [count,count]
Project
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
Project [ws_sold_time_sk]
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
Project [ws_sold_time_sk,ws_ship_hdemo_sk]
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
ColumnarToRow
InputAdapter
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
InputAdapter
ReusedExchange [wp_web_page_sk] #3
InputAdapter
ReusedExchange [hd_demo_sk] #4
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
InputAdapter
BroadcastExchange #8
WholeStageCodegen (8)
Project [t_time_sk]
Filter [t_hour,t_time_sk]
BroadcastExchange #2
WholeStageCodegen (1)
Project [wp_web_page_sk]
Filter [wp_char_count,wp_web_page_sk]
ColumnarToRow
InputAdapter
Scan parquet default.time_dim [t_time_sk,t_hour]
Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
InputAdapter
BroadcastExchange #3
WholeStageCodegen (2)
Project [hd_demo_sk]
Filter [hd_dep_count,hd_demo_sk]
ColumnarToRow
InputAdapter
Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
InputAdapter
BroadcastExchange #4
WholeStageCodegen (3)
Project [t_time_sk]
Filter [t_hour,t_time_sk]
ColumnarToRow
InputAdapter
Scan parquet default.time_dim [t_time_sk,t_hour]
BroadcastExchange #5
WholeStageCodegen (10)
HashAggregate [count] [count(1),pmc,count]
InputAdapter
Exchange #6
WholeStageCodegen (9)
HashAggregate [count,count]
Project
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
Project [ws_sold_time_sk]
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
Project [ws_sold_time_sk,ws_ship_hdemo_sk]
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
ColumnarToRow
InputAdapter
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
InputAdapter
ReusedExchange [wp_web_page_sk] #2
InputAdapter
ReusedExchange [hd_demo_sk] #3
InputAdapter
BroadcastExchange #7
WholeStageCodegen (8)
Project [t_time_sk]
Filter [t_hour,t_time_sk]
ColumnarToRow
InputAdapter
Scan parquet default.time_dim [t_time_sk,t_hour]

View file

@ -1,57 +1,56 @@
== Physical Plan ==
* Sort (53)
+- Exchange (52)
+- * Project (51)
+- BroadcastNestedLoopJoin Inner BuildRight (50)
:- * HashAggregate (28)
: +- Exchange (27)
: +- * HashAggregate (26)
: +- * Project (25)
: +- * BroadcastHashJoin Inner BuildRight (24)
: :- * Project (18)
: : +- * BroadcastHashJoin Inner BuildRight (17)
: : :- * Project (11)
: : : +- * BroadcastHashJoin Inner BuildRight (10)
: : : :- * Project (4)
: : : : +- * Filter (3)
: : : : +- * ColumnarToRow (2)
: : : : +- Scan parquet default.web_sales (1)
: : : +- BroadcastExchange (9)
: : : +- * Project (8)
: : : +- * Filter (7)
: : : +- * ColumnarToRow (6)
: : : +- Scan parquet default.household_demographics (5)
: : +- BroadcastExchange (16)
: : +- * Project (15)
: : +- * Filter (14)
: : +- * ColumnarToRow (13)
: : +- Scan parquet default.time_dim (12)
: +- BroadcastExchange (23)
: +- * Project (22)
: +- * Filter (21)
: +- * ColumnarToRow (20)
: +- Scan parquet default.web_page (19)
+- BroadcastExchange (49)
+- * HashAggregate (48)
+- Exchange (47)
+- * HashAggregate (46)
+- * Project (45)
+- * BroadcastHashJoin Inner BuildRight (44)
:- * Project (42)
: +- * BroadcastHashJoin Inner BuildRight (41)
: :- * Project (35)
: : +- * BroadcastHashJoin Inner BuildRight (34)
: : :- * Project (32)
: : : +- * Filter (31)
: : : +- * ColumnarToRow (30)
: : : +- Scan parquet default.web_sales (29)
: : +- ReusedExchange (33)
: +- BroadcastExchange (40)
: +- * Project (39)
: +- * Filter (38)
: +- * ColumnarToRow (37)
: +- Scan parquet default.time_dim (36)
+- ReusedExchange (43)
* Sort (52)
+- * Project (51)
+- BroadcastNestedLoopJoin Inner BuildRight (50)
:- * HashAggregate (28)
: +- Exchange (27)
: +- * HashAggregate (26)
: +- * Project (25)
: +- * BroadcastHashJoin Inner BuildRight (24)
: :- * Project (18)
: : +- * BroadcastHashJoin Inner BuildRight (17)
: : :- * Project (11)
: : : +- * BroadcastHashJoin Inner BuildRight (10)
: : : :- * Project (4)
: : : : +- * Filter (3)
: : : : +- * ColumnarToRow (2)
: : : : +- Scan parquet default.web_sales (1)
: : : +- BroadcastExchange (9)
: : : +- * Project (8)
: : : +- * Filter (7)
: : : +- * ColumnarToRow (6)
: : : +- Scan parquet default.household_demographics (5)
: : +- BroadcastExchange (16)
: : +- * Project (15)
: : +- * Filter (14)
: : +- * ColumnarToRow (13)
: : +- Scan parquet default.time_dim (12)
: +- BroadcastExchange (23)
: +- * Project (22)
: +- * Filter (21)
: +- * ColumnarToRow (20)
: +- Scan parquet default.web_page (19)
+- BroadcastExchange (49)
+- * HashAggregate (48)
+- Exchange (47)
+- * HashAggregate (46)
+- * Project (45)
+- * BroadcastHashJoin Inner BuildRight (44)
:- * Project (42)
: +- * BroadcastHashJoin Inner BuildRight (41)
: :- * Project (35)
: : +- * BroadcastHashJoin Inner BuildRight (34)
: : :- * Project (32)
: : : +- * Filter (31)
: : : +- * ColumnarToRow (30)
: : : +- Scan parquet default.web_sales (29)
: : +- ReusedExchange (33)
: +- BroadcastExchange (40)
: +- * Project (39)
: +- * Filter (38)
: +- * ColumnarToRow (37)
: +- Scan parquet default.time_dim (36)
+- ReusedExchange (43)
(1) Scan parquet default.web_sales
@ -285,11 +284,7 @@ Join condition: None
Output [1]: [CheckOverflow((promote_precision(cast(amc#18 as decimal(15,4))) / promote_precision(cast(pmc#24 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#26]
Input [2]: [amc#18, pmc#24]
(52) Exchange
Input [1]: [am_pm_ratio#26]
Arguments: rangepartitioning(am_pm_ratio#26 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#27]
(53) Sort [codegen id : 12]
(52) Sort [codegen id : 11]
Input [1]: [am_pm_ratio#26]
Arguments: [am_pm_ratio#26 ASC NULLS FIRST], true, 0

View file

@ -1,79 +1,76 @@
WholeStageCodegen (12)
WholeStageCodegen (11)
Sort [am_pm_ratio]
InputAdapter
Exchange [am_pm_ratio] #1
WholeStageCodegen (11)
Project [amc,pmc]
InputAdapter
BroadcastNestedLoopJoin
WholeStageCodegen (5)
HashAggregate [count] [count(1),amc,count]
InputAdapter
Exchange #2
WholeStageCodegen (4)
HashAggregate [count,count]
Project
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
Project [ws_web_page_sk]
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
Project [ws_sold_time_sk,ws_web_page_sk]
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
Project [amc,pmc]
InputAdapter
BroadcastNestedLoopJoin
WholeStageCodegen (5)
HashAggregate [count] [count(1),amc,count]
InputAdapter
Exchange #1
WholeStageCodegen (4)
HashAggregate [count,count]
Project
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
Project [ws_web_page_sk]
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
Project [ws_sold_time_sk,ws_web_page_sk]
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
ColumnarToRow
InputAdapter
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
InputAdapter
BroadcastExchange #2
WholeStageCodegen (1)
Project [hd_demo_sk]
Filter [hd_dep_count,hd_demo_sk]
ColumnarToRow
InputAdapter
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
InputAdapter
BroadcastExchange #3
WholeStageCodegen (1)
Project [hd_demo_sk]
Filter [hd_dep_count,hd_demo_sk]
ColumnarToRow
InputAdapter
Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
InputAdapter
BroadcastExchange #3
WholeStageCodegen (2)
Project [t_time_sk]
Filter [t_hour,t_time_sk]
ColumnarToRow
InputAdapter
Scan parquet default.time_dim [t_time_sk,t_hour]
InputAdapter
BroadcastExchange #4
WholeStageCodegen (3)
Project [wp_web_page_sk]
Filter [wp_char_count,wp_web_page_sk]
ColumnarToRow
InputAdapter
Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
BroadcastExchange #5
WholeStageCodegen (10)
HashAggregate [count] [count(1),pmc,count]
InputAdapter
Exchange #6
WholeStageCodegen (9)
HashAggregate [count,count]
Project
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
Project [ws_web_page_sk]
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
Project [ws_sold_time_sk,ws_web_page_sk]
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
ColumnarToRow
InputAdapter
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
InputAdapter
BroadcastExchange #4
WholeStageCodegen (2)
Project [t_time_sk]
Filter [t_hour,t_time_sk]
ColumnarToRow
InputAdapter
Scan parquet default.time_dim [t_time_sk,t_hour]
ReusedExchange [hd_demo_sk] #2
InputAdapter
BroadcastExchange #5
WholeStageCodegen (3)
Project [wp_web_page_sk]
Filter [wp_char_count,wp_web_page_sk]
BroadcastExchange #7
WholeStageCodegen (7)
Project [t_time_sk]
Filter [t_hour,t_time_sk]
ColumnarToRow
InputAdapter
Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
BroadcastExchange #6
WholeStageCodegen (10)
HashAggregate [count] [count(1),pmc,count]
InputAdapter
Exchange #7
WholeStageCodegen (9)
HashAggregate [count,count]
Project
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
Project [ws_web_page_sk]
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
Project [ws_sold_time_sk,ws_web_page_sk]
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
ColumnarToRow
InputAdapter
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
InputAdapter
ReusedExchange [hd_demo_sk] #3
InputAdapter
BroadcastExchange #8
WholeStageCodegen (7)
Project [t_time_sk]
Filter [t_hour,t_time_sk]
ColumnarToRow
InputAdapter
Scan parquet default.time_dim [t_time_sk,t_hour]
InputAdapter
ReusedExchange [wp_web_page_sk] #5
Scan parquet default.time_dim [t_time_sk,t_hour]
InputAdapter
ReusedExchange [wp_web_page_sk] #4

View file

@ -1296,4 +1296,92 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
}
}
}
test("SPARK-34593: Preserve broadcast nested loop join partitioning and ordering") {
withTable("t1", "t2", "t3", "t4", "t5") {
spark.range(15).toDF("k").write.bucketBy(4, "k").saveAsTable("t1")
spark.range(6).toDF("k").write.bucketBy(4, "k").saveAsTable("t2")
spark.range(8).toDF("k").write.saveAsTable("t3")
spark.range(9).toDF("k").write.saveAsTable("t4")
spark.range(11).toDF("k").write.saveAsTable("t5")
def getAggQuery(selectExpr: String, joinType: String): String = {
s"""
|SELECT k, COUNT(*)
|FROM (SELECT $selectExpr FROM t1 $joinType JOIN t2)
|GROUP BY k
""".stripMargin
}
// Test output partitioning is preserved
Seq("INNER", "LEFT OUTER", "RIGHT OUTER", "LEFT SEMI", "LEFT ANTI").foreach {
joinType =>
val selectExpr = if (joinType == "RIGHT OUTER") {
"/*+ BROADCAST(t1) */ t2.k AS k"
} else {
"/*+ BROADCAST(t2) */ t1.k as k"
}
val plan = sql(getAggQuery(selectExpr, joinType)).queryExecution.executedPlan
assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
// No extra shuffle before aggregation
assert(collect(plan) { case _: ShuffleExchangeExec => true }.size === 0)
}
// Test output partitioning is not preserved
Seq("LEFT OUTER", "RIGHT OUTER", "LEFT SEMI", "LEFT ANTI", "FULL OUTER").foreach {
joinType =>
val selectExpr = if (joinType == "RIGHT OUTER") {
"/*+ BROADCAST(t2) */ t1.k AS k"
} else {
"/*+ BROADCAST(t1) */ t1.k as k"
}
val plan = sql(getAggQuery(selectExpr, joinType)).queryExecution.executedPlan
assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
// Have shuffle before aggregation
assert(collect(plan) { case _: ShuffleExchangeExec => true }.size === 1)
}
def getJoinQuery(selectExpr: String, joinType: String): String = {
s"""
|SELECT /*+ MERGE(t3) */ t3.k
|FROM
|(
| SELECT $selectExpr
| FROM
| (SELECT /*+ MERGE(t4) */ t1.k AS k1 FROM t1 JOIN t4 ON t1.k = t4.k) AS left_t
| $joinType JOIN
| (SELECT /*+ MERGE(t5) */ t2.k AS k2 FROM t2 JOIN t5 ON t2.k = t5.k) AS right_t
|)
|JOIN t3
|ON t3.k = k0
""".stripMargin
}
// Test output ordering is preserved
Seq("INNER", "LEFT OUTER", "RIGHT OUTER", "LEFT SEMI", "LEFT ANTI").foreach {
joinType =>
val selectExpr = if (joinType == "RIGHT OUTER") {
"/*+ BROADCAST(left_t) */ k2 AS k0"
} else {
"/*+ BROADCAST(right_t) */ k1 as k0"
}
val plan = sql(getJoinQuery(selectExpr, joinType)).queryExecution.executedPlan
assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 3)
// No extra sort on left side before last sort merge join
assert(collect(plan) { case _: SortExec => true }.size === 5)
}
// Test output ordering is not preserved
Seq("LEFT OUTER", "FULL OUTER").foreach {
joinType =>
val selectExpr = "/*+ BROADCAST(left_t) */ k1 as k0"
val plan = sql(getJoinQuery(selectExpr, joinType)).queryExecution.executedPlan
assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 3)
// Have sort on left side before last sort merge join
assert(collect(plan) { case _: SortExec => true }.size === 6)
}
}
}
}