[SPARK-34593][SQL] Preserve broadcast nested loop join partitioning and ordering
### What changes were proposed in this pull request? `BroadcastNestedLoopJoinExec` does not preserve `outputPartitioning` and `outputOrdering` right now. But it can preserve the streamed side partitioning and ordering when possible. This can help avoid shuffle and sort in later stage, if there's join and aggregation in the query. See example queries in added unit test in `JoinSuite.scala`. In addition, fix a bunch of minor places in `BroadcastNestedLoopJoinExec.scala` for better style and readability. ### Why are the changes needed? Avoid shuffle and sort for certain complicated query shape. Better query performance can be achieved. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added unit test in `JoinSuite.scala`. Closes #31708 from c21/nested-join. Authored-by: Cheng Su <chengsu@fb.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
4e43819611
commit
5362f08125
|
@ -41,7 +41,7 @@ case class BroadcastNestedLoopJoinExec(
|
|||
override lazy val metrics = Map(
|
||||
"numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
|
||||
|
||||
/** BuildRight means the right relation <=> the broadcast relation. */
|
||||
/** BuildRight means the right relation is the broadcast relation. */
|
||||
private val (streamed, broadcast) = buildSide match {
|
||||
case BuildRight => (left, right)
|
||||
case BuildLeft => (right, left)
|
||||
|
@ -49,7 +49,7 @@ case class BroadcastNestedLoopJoinExec(
|
|||
|
||||
override def simpleStringWithNodeId(): String = {
|
||||
val opId = ExplainUtils.getOpId(this)
|
||||
s"$nodeName $joinType ${buildSide} ($opId)".trim
|
||||
s"$nodeName $joinType $buildSide ($opId)".trim
|
||||
}
|
||||
|
||||
override def requiredChildDistribution: Seq[Distribution] = buildSide match {
|
||||
|
@ -59,10 +59,22 @@ case class BroadcastNestedLoopJoinExec(
|
|||
UnspecifiedDistribution :: BroadcastDistribution(IdentityBroadcastMode) :: Nil
|
||||
}
|
||||
|
||||
override def outputPartitioning: Partitioning = (joinType, buildSide) match {
|
||||
case (_: InnerLike, _) | (LeftOuter, BuildRight) | (RightOuter, BuildLeft) |
|
||||
(LeftSemi, BuildRight) | (LeftAnti, BuildRight) => streamed.outputPartitioning
|
||||
case _ => UnknownPartitioning(left.outputPartitioning.numPartitions)
|
||||
}
|
||||
|
||||
override def outputOrdering: Seq[SortOrder] = (joinType, buildSide) match {
|
||||
case (_: InnerLike, _) | (LeftOuter, BuildRight) | (RightOuter, BuildLeft) |
|
||||
(LeftSemi, BuildRight) | (LeftAnti, BuildRight) => streamed.outputOrdering
|
||||
case _ => Nil
|
||||
}
|
||||
|
||||
private[this] def genResultProjection: UnsafeProjection = joinType match {
|
||||
case LeftExistence(j) =>
|
||||
case LeftExistence(_) =>
|
||||
UnsafeProjection.create(output, output)
|
||||
case other =>
|
||||
case _ =>
|
||||
// Always put the stream side on left to simplify implementation
|
||||
// both of left and right side could be null
|
||||
UnsafeProjection.create(
|
||||
|
@ -183,7 +195,7 @@ case class BroadcastNestedLoopJoinExec(
|
|||
* The implementation for these joins:
|
||||
*
|
||||
* LeftSemi with BuildRight
|
||||
* Anti with BuildRight
|
||||
* LeftAnti with BuildRight
|
||||
*/
|
||||
private def leftExistenceJoin(
|
||||
relation: Broadcast[Array[InternalRow]],
|
||||
|
@ -238,7 +250,6 @@ case class BroadcastNestedLoopJoinExec(
|
|||
* ExistenceJoin with BuildLeft
|
||||
*/
|
||||
private def defaultJoin(relation: Broadcast[Array[InternalRow]]): RDD[InternalRow] = {
|
||||
/** All rows that either match both-way, or rows from streamed joined with nulls. */
|
||||
val streamRdd = streamed.execute()
|
||||
|
||||
val matchedBuildRows = streamRdd.mapPartitionsInternal { streamedIter =>
|
||||
|
@ -275,7 +286,7 @@ case class BroadcastNestedLoopJoinExec(
|
|||
i += 1
|
||||
}
|
||||
return sparkContext.makeRDD(buf)
|
||||
case j: ExistenceJoin =>
|
||||
case _: ExistenceJoin =>
|
||||
val buf: CompactBuffer[InternalRow] = new CompactBuffer()
|
||||
var i = 0
|
||||
val rel = relation.value
|
||||
|
@ -296,7 +307,7 @@ case class BroadcastNestedLoopJoinExec(
|
|||
i += 1
|
||||
}
|
||||
return sparkContext.makeRDD(notMatched)
|
||||
case o =>
|
||||
case _ =>
|
||||
}
|
||||
|
||||
val notMatchedBroadcastRows: Seq[InternalRow] = {
|
||||
|
@ -358,7 +369,7 @@ case class BroadcastNestedLoopJoinExec(
|
|||
leftExistenceJoin(broadcastedRelation, exists = true)
|
||||
case (LeftAnti, BuildRight) =>
|
||||
leftExistenceJoin(broadcastedRelation, exists = false)
|
||||
case (j: ExistenceJoin, BuildRight) =>
|
||||
case (_: ExistenceJoin, BuildRight) =>
|
||||
existenceJoin(broadcastedRelation)
|
||||
case _ =>
|
||||
/**
|
||||
|
|
|
@ -1,74 +1,73 @@
|
|||
== Physical Plan ==
|
||||
* Sort (70)
|
||||
+- Exchange (69)
|
||||
+- * Project (68)
|
||||
+- BroadcastNestedLoopJoin Inner BuildRight (67)
|
||||
:- * HashAggregate (47)
|
||||
: +- Exchange (46)
|
||||
: +- * HashAggregate (45)
|
||||
: +- * Project (44)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (43)
|
||||
: :- * Project (31)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (30)
|
||||
: : :- * Project (24)
|
||||
: : : +- * BroadcastHashJoin Inner BuildRight (23)
|
||||
: : : :- * Project (17)
|
||||
: : : : +- * BroadcastHashJoin Inner BuildRight (16)
|
||||
: : : : :- * Project (10)
|
||||
: : : : : +- * BroadcastHashJoin Inner BuildRight (9)
|
||||
: : : : : :- * Filter (3)
|
||||
: : : : : : +- * ColumnarToRow (2)
|
||||
: : : : : : +- Scan parquet default.store_sales (1)
|
||||
: : : : : +- BroadcastExchange (8)
|
||||
: : : : : +- * Project (7)
|
||||
: : : : : +- * Filter (6)
|
||||
: : : : : +- * ColumnarToRow (5)
|
||||
: : : : : +- Scan parquet default.date_dim (4)
|
||||
: : : : +- BroadcastExchange (15)
|
||||
: : : : +- * Project (14)
|
||||
: : : : +- * Filter (13)
|
||||
: : : : +- * ColumnarToRow (12)
|
||||
: : : : +- Scan parquet default.item (11)
|
||||
: : : +- BroadcastExchange (22)
|
||||
: : : +- * Project (21)
|
||||
: : : +- * Filter (20)
|
||||
: : : +- * ColumnarToRow (19)
|
||||
: : : +- Scan parquet default.promotion (18)
|
||||
: : +- BroadcastExchange (29)
|
||||
: : +- * Project (28)
|
||||
: : +- * Filter (27)
|
||||
: : +- * ColumnarToRow (26)
|
||||
: : +- Scan parquet default.store (25)
|
||||
: +- BroadcastExchange (42)
|
||||
: +- * Project (41)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (40)
|
||||
: :- * Filter (34)
|
||||
: : +- * ColumnarToRow (33)
|
||||
: : +- Scan parquet default.customer (32)
|
||||
: +- BroadcastExchange (39)
|
||||
: +- * Project (38)
|
||||
: +- * Filter (37)
|
||||
: +- * ColumnarToRow (36)
|
||||
: +- Scan parquet default.customer_address (35)
|
||||
+- BroadcastExchange (66)
|
||||
+- * HashAggregate (65)
|
||||
+- Exchange (64)
|
||||
+- * HashAggregate (63)
|
||||
+- * Project (62)
|
||||
+- * BroadcastHashJoin Inner BuildRight (61)
|
||||
:- * Project (59)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (58)
|
||||
: :- * Project (56)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (55)
|
||||
: : :- * Project (53)
|
||||
: : : +- * BroadcastHashJoin Inner BuildRight (52)
|
||||
: : : :- * Filter (50)
|
||||
: : : : +- * ColumnarToRow (49)
|
||||
: : : : +- Scan parquet default.store_sales (48)
|
||||
: : : +- ReusedExchange (51)
|
||||
: : +- ReusedExchange (54)
|
||||
: +- ReusedExchange (57)
|
||||
+- ReusedExchange (60)
|
||||
* Sort (69)
|
||||
+- * Project (68)
|
||||
+- BroadcastNestedLoopJoin Inner BuildRight (67)
|
||||
:- * HashAggregate (47)
|
||||
: +- Exchange (46)
|
||||
: +- * HashAggregate (45)
|
||||
: +- * Project (44)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (43)
|
||||
: :- * Project (31)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (30)
|
||||
: : :- * Project (24)
|
||||
: : : +- * BroadcastHashJoin Inner BuildRight (23)
|
||||
: : : :- * Project (17)
|
||||
: : : : +- * BroadcastHashJoin Inner BuildRight (16)
|
||||
: : : : :- * Project (10)
|
||||
: : : : : +- * BroadcastHashJoin Inner BuildRight (9)
|
||||
: : : : : :- * Filter (3)
|
||||
: : : : : : +- * ColumnarToRow (2)
|
||||
: : : : : : +- Scan parquet default.store_sales (1)
|
||||
: : : : : +- BroadcastExchange (8)
|
||||
: : : : : +- * Project (7)
|
||||
: : : : : +- * Filter (6)
|
||||
: : : : : +- * ColumnarToRow (5)
|
||||
: : : : : +- Scan parquet default.date_dim (4)
|
||||
: : : : +- BroadcastExchange (15)
|
||||
: : : : +- * Project (14)
|
||||
: : : : +- * Filter (13)
|
||||
: : : : +- * ColumnarToRow (12)
|
||||
: : : : +- Scan parquet default.item (11)
|
||||
: : : +- BroadcastExchange (22)
|
||||
: : : +- * Project (21)
|
||||
: : : +- * Filter (20)
|
||||
: : : +- * ColumnarToRow (19)
|
||||
: : : +- Scan parquet default.promotion (18)
|
||||
: : +- BroadcastExchange (29)
|
||||
: : +- * Project (28)
|
||||
: : +- * Filter (27)
|
||||
: : +- * ColumnarToRow (26)
|
||||
: : +- Scan parquet default.store (25)
|
||||
: +- BroadcastExchange (42)
|
||||
: +- * Project (41)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (40)
|
||||
: :- * Filter (34)
|
||||
: : +- * ColumnarToRow (33)
|
||||
: : +- Scan parquet default.customer (32)
|
||||
: +- BroadcastExchange (39)
|
||||
: +- * Project (38)
|
||||
: +- * Filter (37)
|
||||
: +- * ColumnarToRow (36)
|
||||
: +- Scan parquet default.customer_address (35)
|
||||
+- BroadcastExchange (66)
|
||||
+- * HashAggregate (65)
|
||||
+- Exchange (64)
|
||||
+- * HashAggregate (63)
|
||||
+- * Project (62)
|
||||
+- * BroadcastHashJoin Inner BuildRight (61)
|
||||
:- * Project (59)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (58)
|
||||
: :- * Project (56)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (55)
|
||||
: : :- * Project (53)
|
||||
: : : +- * BroadcastHashJoin Inner BuildRight (52)
|
||||
: : : :- * Filter (50)
|
||||
: : : : +- * ColumnarToRow (49)
|
||||
: : : : +- Scan parquet default.store_sales (48)
|
||||
: : : +- ReusedExchange (51)
|
||||
: : +- ReusedExchange (54)
|
||||
: +- ReusedExchange (57)
|
||||
+- ReusedExchange (60)
|
||||
|
||||
|
||||
(1) Scan parquet default.store_sales
|
||||
|
@ -121,7 +120,7 @@ Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext
|
|||
Output [2]: [i_item_sk#12, i_category#13]
|
||||
Batched: true
|
||||
Location [not included in comparison]/{warehouse_dir}/item]
|
||||
PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)]
|
||||
PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry ), IsNotNull(i_item_sk)]
|
||||
ReadSchema: struct<i_item_sk:int,i_category:string>
|
||||
|
||||
(12) ColumnarToRow [codegen id : 2]
|
||||
|
@ -129,7 +128,7 @@ Input [2]: [i_item_sk#12, i_category#13]
|
|||
|
||||
(13) Filter [codegen id : 2]
|
||||
Input [2]: [i_item_sk#12, i_category#13]
|
||||
Condition : ((isnotnull(i_category#13) AND (i_category#13 = Jewelry)) AND isnotnull(i_item_sk#12))
|
||||
Condition : ((isnotnull(i_category#13) AND (i_category#13 = Jewelry )) AND isnotnull(i_item_sk#12))
|
||||
|
||||
(14) Project [codegen id : 2]
|
||||
Output [1]: [i_item_sk#12]
|
||||
|
@ -378,21 +377,17 @@ Join condition: None
|
|||
Output [3]: [promotions#33, total#38, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#33 as decimal(15,4))) / promote_precision(cast(total#38 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
|
||||
Input [2]: [promotions#33, total#38]
|
||||
|
||||
(69) Exchange
|
||||
Input [3]: [promotions#33, total#38, ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
|
||||
Arguments: rangepartitioning(promotions#33 ASC NULLS FIRST, total#38 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#41]
|
||||
|
||||
(70) Sort [codegen id : 17]
|
||||
(69) Sort [codegen id : 16]
|
||||
Input [3]: [promotions#33, total#38, ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
|
||||
Arguments: [promotions#33 ASC NULLS FIRST, total#38 ASC NULLS FIRST], true, 0
|
||||
|
||||
===== Subqueries =====
|
||||
|
||||
Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7
|
||||
ReusedExchange (71)
|
||||
ReusedExchange (70)
|
||||
|
||||
|
||||
(71) ReusedExchange [Reuses operator id: 8]
|
||||
(70) ReusedExchange [Reuses operator id: 8]
|
||||
Output [1]: [d_date_sk#8]
|
||||
|
||||
Subquery:2 Hosting operator id = 48 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7
|
||||
|
|
|
@ -1,107 +1,104 @@
|
|||
WholeStageCodegen (17)
|
||||
WholeStageCodegen (16)
|
||||
Sort [promotions,total]
|
||||
InputAdapter
|
||||
Exchange [promotions,total] #1
|
||||
WholeStageCodegen (16)
|
||||
Project [promotions,total]
|
||||
InputAdapter
|
||||
BroadcastNestedLoopJoin
|
||||
WholeStageCodegen (8)
|
||||
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum]
|
||||
InputAdapter
|
||||
Exchange #2
|
||||
WholeStageCodegen (7)
|
||||
HashAggregate [ss_ext_sales_price] [sum,sum]
|
||||
Project [ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
|
||||
Project [ss_customer_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_store_sk,s_store_sk]
|
||||
Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_promo_sk,p_promo_sk]
|
||||
Project [ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_item_sk,i_item_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
|
||||
Filter [ss_store_sk,ss_promo_sk,ss_customer_sk,ss_item_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
SubqueryBroadcast [d_date_sk] #1
|
||||
ReusedExchange [d_date_sk] #3
|
||||
InputAdapter
|
||||
BroadcastExchange #3
|
||||
WholeStageCodegen (1)
|
||||
Project [d_date_sk]
|
||||
Filter [d_year,d_moy,d_date_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
|
||||
InputAdapter
|
||||
BroadcastExchange #4
|
||||
WholeStageCodegen (2)
|
||||
Project [i_item_sk]
|
||||
Filter [i_category,i_item_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.item [i_item_sk,i_category]
|
||||
InputAdapter
|
||||
BroadcastExchange #5
|
||||
WholeStageCodegen (3)
|
||||
Project [p_promo_sk]
|
||||
Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
|
||||
InputAdapter
|
||||
BroadcastExchange #6
|
||||
WholeStageCodegen (4)
|
||||
Project [s_store_sk]
|
||||
Filter [s_gmt_offset,s_store_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.store [s_store_sk,s_gmt_offset]
|
||||
InputAdapter
|
||||
BroadcastExchange #7
|
||||
WholeStageCodegen (6)
|
||||
Project [c_customer_sk]
|
||||
BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
|
||||
Filter [c_customer_sk,c_current_addr_sk]
|
||||
Project [promotions,total]
|
||||
InputAdapter
|
||||
BroadcastNestedLoopJoin
|
||||
WholeStageCodegen (8)
|
||||
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum]
|
||||
InputAdapter
|
||||
Exchange #1
|
||||
WholeStageCodegen (7)
|
||||
HashAggregate [ss_ext_sales_price] [sum,sum]
|
||||
Project [ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
|
||||
Project [ss_customer_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_store_sk,s_store_sk]
|
||||
Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_promo_sk,p_promo_sk]
|
||||
Project [ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_item_sk,i_item_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
|
||||
Filter [ss_store_sk,ss_promo_sk,ss_customer_sk,ss_item_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
|
||||
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
SubqueryBroadcast [d_date_sk] #1
|
||||
ReusedExchange [d_date_sk] #2
|
||||
InputAdapter
|
||||
BroadcastExchange #8
|
||||
WholeStageCodegen (5)
|
||||
Project [ca_address_sk]
|
||||
Filter [ca_gmt_offset,ca_address_sk]
|
||||
BroadcastExchange #2
|
||||
WholeStageCodegen (1)
|
||||
Project [d_date_sk]
|
||||
Filter [d_year,d_moy,d_date_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
|
||||
BroadcastExchange #9
|
||||
WholeStageCodegen (15)
|
||||
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum]
|
||||
InputAdapter
|
||||
Exchange #10
|
||||
WholeStageCodegen (14)
|
||||
HashAggregate [ss_ext_sales_price] [sum,sum]
|
||||
Project [ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
|
||||
Project [ss_customer_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_store_sk,s_store_sk]
|
||||
Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_item_sk,i_item_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
|
||||
Filter [ss_store_sk,ss_customer_sk,ss_item_sk]
|
||||
Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
|
||||
InputAdapter
|
||||
BroadcastExchange #3
|
||||
WholeStageCodegen (2)
|
||||
Project [i_item_sk]
|
||||
Filter [i_category,i_item_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
ReusedSubquery [d_date_sk] #1
|
||||
InputAdapter
|
||||
ReusedExchange [d_date_sk] #3
|
||||
InputAdapter
|
||||
ReusedExchange [i_item_sk] #4
|
||||
InputAdapter
|
||||
ReusedExchange [s_store_sk] #6
|
||||
Scan parquet default.item [i_item_sk,i_category]
|
||||
InputAdapter
|
||||
ReusedExchange [c_customer_sk] #7
|
||||
BroadcastExchange #4
|
||||
WholeStageCodegen (3)
|
||||
Project [p_promo_sk]
|
||||
Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
|
||||
InputAdapter
|
||||
BroadcastExchange #5
|
||||
WholeStageCodegen (4)
|
||||
Project [s_store_sk]
|
||||
Filter [s_gmt_offset,s_store_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.store [s_store_sk,s_gmt_offset]
|
||||
InputAdapter
|
||||
BroadcastExchange #6
|
||||
WholeStageCodegen (6)
|
||||
Project [c_customer_sk]
|
||||
BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
|
||||
Filter [c_customer_sk,c_current_addr_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
|
||||
InputAdapter
|
||||
BroadcastExchange #7
|
||||
WholeStageCodegen (5)
|
||||
Project [ca_address_sk]
|
||||
Filter [ca_gmt_offset,ca_address_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
|
||||
BroadcastExchange #8
|
||||
WholeStageCodegen (15)
|
||||
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum]
|
||||
InputAdapter
|
||||
Exchange #9
|
||||
WholeStageCodegen (14)
|
||||
HashAggregate [ss_ext_sales_price] [sum,sum]
|
||||
Project [ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
|
||||
Project [ss_customer_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_store_sk,s_store_sk]
|
||||
Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_item_sk,i_item_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
|
||||
Filter [ss_store_sk,ss_customer_sk,ss_item_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
ReusedSubquery [d_date_sk] #1
|
||||
InputAdapter
|
||||
ReusedExchange [d_date_sk] #2
|
||||
InputAdapter
|
||||
ReusedExchange [i_item_sk] #3
|
||||
InputAdapter
|
||||
ReusedExchange [s_store_sk] #5
|
||||
InputAdapter
|
||||
ReusedExchange [c_customer_sk] #6
|
||||
|
|
|
@ -1,77 +1,76 @@
|
|||
== Physical Plan ==
|
||||
* Sort (73)
|
||||
+- Exchange (72)
|
||||
+- * Project (71)
|
||||
+- BroadcastNestedLoopJoin Inner BuildRight (70)
|
||||
:- * HashAggregate (47)
|
||||
: +- Exchange (46)
|
||||
: +- * HashAggregate (45)
|
||||
: +- * Project (44)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (43)
|
||||
: :- * Project (37)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (36)
|
||||
: : :- * Project (30)
|
||||
: : : +- * BroadcastHashJoin Inner BuildRight (29)
|
||||
: : : :- * Project (24)
|
||||
: : : : +- * BroadcastHashJoin Inner BuildRight (23)
|
||||
: : : : :- * Project (17)
|
||||
: : : : : +- * BroadcastHashJoin Inner BuildRight (16)
|
||||
: : : : : :- * Project (10)
|
||||
: : : : : : +- * BroadcastHashJoin Inner BuildRight (9)
|
||||
: : : : : : :- * Filter (3)
|
||||
: : : : : : : +- * ColumnarToRow (2)
|
||||
: : : : : : : +- Scan parquet default.store_sales (1)
|
||||
: : : : : : +- BroadcastExchange (8)
|
||||
: : : : : : +- * Project (7)
|
||||
: : : : : : +- * Filter (6)
|
||||
: : : : : : +- * ColumnarToRow (5)
|
||||
: : : : : : +- Scan parquet default.store (4)
|
||||
: : : : : +- BroadcastExchange (15)
|
||||
: : : : : +- * Project (14)
|
||||
: : : : : +- * Filter (13)
|
||||
: : : : : +- * ColumnarToRow (12)
|
||||
: : : : : +- Scan parquet default.promotion (11)
|
||||
: : : : +- BroadcastExchange (22)
|
||||
: : : : +- * Project (21)
|
||||
: : : : +- * Filter (20)
|
||||
: : : : +- * ColumnarToRow (19)
|
||||
: : : : +- Scan parquet default.date_dim (18)
|
||||
: : : +- BroadcastExchange (28)
|
||||
: : : +- * Filter (27)
|
||||
: : : +- * ColumnarToRow (26)
|
||||
: : : +- Scan parquet default.customer (25)
|
||||
: : +- BroadcastExchange (35)
|
||||
: : +- * Project (34)
|
||||
: : +- * Filter (33)
|
||||
: : +- * ColumnarToRow (32)
|
||||
: : +- Scan parquet default.customer_address (31)
|
||||
: +- BroadcastExchange (42)
|
||||
: +- * Project (41)
|
||||
: +- * Filter (40)
|
||||
: +- * ColumnarToRow (39)
|
||||
: +- Scan parquet default.item (38)
|
||||
+- BroadcastExchange (69)
|
||||
+- * HashAggregate (68)
|
||||
+- Exchange (67)
|
||||
+- * HashAggregate (66)
|
||||
+- * Project (65)
|
||||
+- * BroadcastHashJoin Inner BuildRight (64)
|
||||
:- * Project (62)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (61)
|
||||
: :- * Project (59)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (58)
|
||||
: : :- * Project (56)
|
||||
: : : +- * BroadcastHashJoin Inner BuildRight (55)
|
||||
: : : :- * Project (53)
|
||||
: : : : +- * BroadcastHashJoin Inner BuildRight (52)
|
||||
: : : : :- * Filter (50)
|
||||
: : : : : +- * ColumnarToRow (49)
|
||||
: : : : : +- Scan parquet default.store_sales (48)
|
||||
: : : : +- ReusedExchange (51)
|
||||
: : : +- ReusedExchange (54)
|
||||
: : +- ReusedExchange (57)
|
||||
: +- ReusedExchange (60)
|
||||
+- ReusedExchange (63)
|
||||
* Sort (72)
|
||||
+- * Project (71)
|
||||
+- BroadcastNestedLoopJoin Inner BuildRight (70)
|
||||
:- * HashAggregate (47)
|
||||
: +- Exchange (46)
|
||||
: +- * HashAggregate (45)
|
||||
: +- * Project (44)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (43)
|
||||
: :- * Project (37)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (36)
|
||||
: : :- * Project (30)
|
||||
: : : +- * BroadcastHashJoin Inner BuildRight (29)
|
||||
: : : :- * Project (24)
|
||||
: : : : +- * BroadcastHashJoin Inner BuildRight (23)
|
||||
: : : : :- * Project (17)
|
||||
: : : : : +- * BroadcastHashJoin Inner BuildRight (16)
|
||||
: : : : : :- * Project (10)
|
||||
: : : : : : +- * BroadcastHashJoin Inner BuildRight (9)
|
||||
: : : : : : :- * Filter (3)
|
||||
: : : : : : : +- * ColumnarToRow (2)
|
||||
: : : : : : : +- Scan parquet default.store_sales (1)
|
||||
: : : : : : +- BroadcastExchange (8)
|
||||
: : : : : : +- * Project (7)
|
||||
: : : : : : +- * Filter (6)
|
||||
: : : : : : +- * ColumnarToRow (5)
|
||||
: : : : : : +- Scan parquet default.store (4)
|
||||
: : : : : +- BroadcastExchange (15)
|
||||
: : : : : +- * Project (14)
|
||||
: : : : : +- * Filter (13)
|
||||
: : : : : +- * ColumnarToRow (12)
|
||||
: : : : : +- Scan parquet default.promotion (11)
|
||||
: : : : +- BroadcastExchange (22)
|
||||
: : : : +- * Project (21)
|
||||
: : : : +- * Filter (20)
|
||||
: : : : +- * ColumnarToRow (19)
|
||||
: : : : +- Scan parquet default.date_dim (18)
|
||||
: : : +- BroadcastExchange (28)
|
||||
: : : +- * Filter (27)
|
||||
: : : +- * ColumnarToRow (26)
|
||||
: : : +- Scan parquet default.customer (25)
|
||||
: : +- BroadcastExchange (35)
|
||||
: : +- * Project (34)
|
||||
: : +- * Filter (33)
|
||||
: : +- * ColumnarToRow (32)
|
||||
: : +- Scan parquet default.customer_address (31)
|
||||
: +- BroadcastExchange (42)
|
||||
: +- * Project (41)
|
||||
: +- * Filter (40)
|
||||
: +- * ColumnarToRow (39)
|
||||
: +- Scan parquet default.item (38)
|
||||
+- BroadcastExchange (69)
|
||||
+- * HashAggregate (68)
|
||||
+- Exchange (67)
|
||||
+- * HashAggregate (66)
|
||||
+- * Project (65)
|
||||
+- * BroadcastHashJoin Inner BuildRight (64)
|
||||
:- * Project (62)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (61)
|
||||
: :- * Project (59)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (58)
|
||||
: : :- * Project (56)
|
||||
: : : +- * BroadcastHashJoin Inner BuildRight (55)
|
||||
: : : :- * Project (53)
|
||||
: : : : +- * BroadcastHashJoin Inner BuildRight (52)
|
||||
: : : : :- * Filter (50)
|
||||
: : : : : +- * ColumnarToRow (49)
|
||||
: : : : : +- Scan parquet default.store_sales (48)
|
||||
: : : : +- ReusedExchange (51)
|
||||
: : : +- ReusedExchange (54)
|
||||
: : +- ReusedExchange (57)
|
||||
: +- ReusedExchange (60)
|
||||
+- ReusedExchange (63)
|
||||
|
||||
|
||||
(1) Scan parquet default.store_sales
|
||||
|
@ -244,7 +243,7 @@ Input [4]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#21, ca_address
|
|||
Output [2]: [i_item_sk#26, i_category#27]
|
||||
Batched: true
|
||||
Location [not included in comparison]/{warehouse_dir}/item]
|
||||
PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)]
|
||||
PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry ), IsNotNull(i_item_sk)]
|
||||
ReadSchema: struct<i_item_sk:int,i_category:string>
|
||||
|
||||
(39) ColumnarToRow [codegen id : 6]
|
||||
|
@ -252,7 +251,7 @@ Input [2]: [i_item_sk#26, i_category#27]
|
|||
|
||||
(40) Filter [codegen id : 6]
|
||||
Input [2]: [i_item_sk#26, i_category#27]
|
||||
Condition : ((isnotnull(i_category#27) AND (i_category#27 = Jewelry)) AND isnotnull(i_item_sk#26))
|
||||
Condition : ((isnotnull(i_category#27) AND (i_category#27 = Jewelry )) AND isnotnull(i_item_sk#26))
|
||||
|
||||
(41) Project [codegen id : 6]
|
||||
Output [1]: [i_item_sk#26]
|
||||
|
@ -393,21 +392,17 @@ Join condition: None
|
|||
Output [3]: [promotions#33, total#38, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#33 as decimal(15,4))) / promote_precision(cast(total#38 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
|
||||
Input [2]: [promotions#33, total#38]
|
||||
|
||||
(72) Exchange
|
||||
Input [3]: [promotions#33, total#38, ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
|
||||
Arguments: rangepartitioning(promotions#33 ASC NULLS FIRST, total#38 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#41]
|
||||
|
||||
(73) Sort [codegen id : 17]
|
||||
(72) Sort [codegen id : 16]
|
||||
Input [3]: [promotions#33, total#38, ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
|
||||
Arguments: [promotions#33 ASC NULLS FIRST, total#38 ASC NULLS FIRST], true, 0
|
||||
|
||||
===== Subqueries =====
|
||||
|
||||
Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7
|
||||
ReusedExchange (74)
|
||||
ReusedExchange (73)
|
||||
|
||||
|
||||
(74) ReusedExchange [Reuses operator id: 22]
|
||||
(73) ReusedExchange [Reuses operator id: 22]
|
||||
Output [1]: [d_date_sk#16]
|
||||
|
||||
Subquery:2 Hosting operator id = 48 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7
|
||||
|
|
|
@ -1,111 +1,108 @@
|
|||
WholeStageCodegen (17)
|
||||
WholeStageCodegen (16)
|
||||
Sort [promotions,total]
|
||||
InputAdapter
|
||||
Exchange [promotions,total] #1
|
||||
WholeStageCodegen (16)
|
||||
Project [promotions,total]
|
||||
InputAdapter
|
||||
BroadcastNestedLoopJoin
|
||||
WholeStageCodegen (8)
|
||||
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum]
|
||||
InputAdapter
|
||||
Exchange #2
|
||||
WholeStageCodegen (7)
|
||||
HashAggregate [ss_ext_sales_price] [sum,sum]
|
||||
Project [ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_item_sk,i_item_sk]
|
||||
Project [ss_item_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
|
||||
Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk]
|
||||
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
BroadcastHashJoin [ss_promo_sk,p_promo_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
BroadcastHashJoin [ss_store_sk,s_store_sk]
|
||||
Filter [ss_store_sk,ss_promo_sk,ss_customer_sk,ss_item_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
SubqueryBroadcast [d_date_sk] #1
|
||||
ReusedExchange [d_date_sk] #3
|
||||
InputAdapter
|
||||
BroadcastExchange #4
|
||||
WholeStageCodegen (1)
|
||||
Project [s_store_sk]
|
||||
Filter [s_gmt_offset,s_store_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.store [s_store_sk,s_gmt_offset]
|
||||
InputAdapter
|
||||
BroadcastExchange #5
|
||||
WholeStageCodegen (2)
|
||||
Project [p_promo_sk]
|
||||
Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
|
||||
InputAdapter
|
||||
BroadcastExchange #3
|
||||
WholeStageCodegen (3)
|
||||
Project [d_date_sk]
|
||||
Filter [d_year,d_moy,d_date_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
|
||||
InputAdapter
|
||||
BroadcastExchange #6
|
||||
WholeStageCodegen (4)
|
||||
Filter [c_customer_sk,c_current_addr_sk]
|
||||
Project [promotions,total]
|
||||
InputAdapter
|
||||
BroadcastNestedLoopJoin
|
||||
WholeStageCodegen (8)
|
||||
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum]
|
||||
InputAdapter
|
||||
Exchange #1
|
||||
WholeStageCodegen (7)
|
||||
HashAggregate [ss_ext_sales_price] [sum,sum]
|
||||
Project [ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_item_sk,i_item_sk]
|
||||
Project [ss_item_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
|
||||
Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk]
|
||||
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
BroadcastHashJoin [ss_promo_sk,p_promo_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
BroadcastHashJoin [ss_store_sk,s_store_sk]
|
||||
Filter [ss_store_sk,ss_promo_sk,ss_customer_sk,ss_item_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
|
||||
InputAdapter
|
||||
BroadcastExchange #7
|
||||
WholeStageCodegen (5)
|
||||
Project [ca_address_sk]
|
||||
Filter [ca_gmt_offset,ca_address_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
|
||||
InputAdapter
|
||||
BroadcastExchange #8
|
||||
WholeStageCodegen (6)
|
||||
Project [i_item_sk]
|
||||
Filter [i_category,i_item_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.item [i_item_sk,i_category]
|
||||
BroadcastExchange #9
|
||||
WholeStageCodegen (15)
|
||||
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum]
|
||||
InputAdapter
|
||||
Exchange #10
|
||||
WholeStageCodegen (14)
|
||||
HashAggregate [ss_ext_sales_price] [sum,sum]
|
||||
Project [ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_item_sk,i_item_sk]
|
||||
Project [ss_item_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
|
||||
Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk]
|
||||
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
BroadcastHashJoin [ss_store_sk,s_store_sk]
|
||||
Filter [ss_store_sk,ss_customer_sk,ss_item_sk]
|
||||
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
SubqueryBroadcast [d_date_sk] #1
|
||||
ReusedExchange [d_date_sk] #2
|
||||
InputAdapter
|
||||
BroadcastExchange #3
|
||||
WholeStageCodegen (1)
|
||||
Project [s_store_sk]
|
||||
Filter [s_gmt_offset,s_store_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.store [s_store_sk,s_gmt_offset]
|
||||
InputAdapter
|
||||
BroadcastExchange #4
|
||||
WholeStageCodegen (2)
|
||||
Project [p_promo_sk]
|
||||
Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
ReusedSubquery [d_date_sk] #1
|
||||
InputAdapter
|
||||
ReusedExchange [s_store_sk] #4
|
||||
InputAdapter
|
||||
ReusedExchange [d_date_sk] #3
|
||||
InputAdapter
|
||||
ReusedExchange [c_customer_sk,c_current_addr_sk] #6
|
||||
Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
|
||||
InputAdapter
|
||||
ReusedExchange [ca_address_sk] #7
|
||||
BroadcastExchange #2
|
||||
WholeStageCodegen (3)
|
||||
Project [d_date_sk]
|
||||
Filter [d_year,d_moy,d_date_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
|
||||
InputAdapter
|
||||
ReusedExchange [i_item_sk] #8
|
||||
BroadcastExchange #5
|
||||
WholeStageCodegen (4)
|
||||
Filter [c_customer_sk,c_current_addr_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
|
||||
InputAdapter
|
||||
BroadcastExchange #6
|
||||
WholeStageCodegen (5)
|
||||
Project [ca_address_sk]
|
||||
Filter [ca_gmt_offset,ca_address_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
|
||||
InputAdapter
|
||||
BroadcastExchange #7
|
||||
WholeStageCodegen (6)
|
||||
Project [i_item_sk]
|
||||
Filter [i_category,i_item_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.item [i_item_sk,i_category]
|
||||
BroadcastExchange #8
|
||||
WholeStageCodegen (15)
|
||||
HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum]
|
||||
InputAdapter
|
||||
Exchange #9
|
||||
WholeStageCodegen (14)
|
||||
HashAggregate [ss_ext_sales_price] [sum,sum]
|
||||
Project [ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_item_sk,i_item_sk]
|
||||
Project [ss_item_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
|
||||
Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk]
|
||||
BroadcastHashJoin [ss_customer_sk,c_customer_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price]
|
||||
BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
|
||||
Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
BroadcastHashJoin [ss_store_sk,s_store_sk]
|
||||
Filter [ss_store_sk,ss_customer_sk,ss_item_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
|
||||
ReusedSubquery [d_date_sk] #1
|
||||
InputAdapter
|
||||
ReusedExchange [s_store_sk] #3
|
||||
InputAdapter
|
||||
ReusedExchange [d_date_sk] #2
|
||||
InputAdapter
|
||||
ReusedExchange [c_customer_sk,c_current_addr_sk] #5
|
||||
InputAdapter
|
||||
ReusedExchange [ca_address_sk] #6
|
||||
InputAdapter
|
||||
ReusedExchange [i_item_sk] #7
|
||||
|
|
|
@ -1,57 +1,56 @@
|
|||
== Physical Plan ==
|
||||
* Sort (53)
|
||||
+- Exchange (52)
|
||||
+- * Project (51)
|
||||
+- BroadcastNestedLoopJoin Inner BuildRight (50)
|
||||
:- * HashAggregate (28)
|
||||
: +- Exchange (27)
|
||||
: +- * HashAggregate (26)
|
||||
: +- * Project (25)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (24)
|
||||
: :- * Project (18)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (17)
|
||||
: : :- * Project (11)
|
||||
: : : +- * BroadcastHashJoin Inner BuildRight (10)
|
||||
: : : :- * Project (4)
|
||||
: : : : +- * Filter (3)
|
||||
: : : : +- * ColumnarToRow (2)
|
||||
: : : : +- Scan parquet default.web_sales (1)
|
||||
: : : +- BroadcastExchange (9)
|
||||
: : : +- * Project (8)
|
||||
: : : +- * Filter (7)
|
||||
: : : +- * ColumnarToRow (6)
|
||||
: : : +- Scan parquet default.web_page (5)
|
||||
: : +- BroadcastExchange (16)
|
||||
: : +- * Project (15)
|
||||
: : +- * Filter (14)
|
||||
: : +- * ColumnarToRow (13)
|
||||
: : +- Scan parquet default.household_demographics (12)
|
||||
: +- BroadcastExchange (23)
|
||||
: +- * Project (22)
|
||||
: +- * Filter (21)
|
||||
: +- * ColumnarToRow (20)
|
||||
: +- Scan parquet default.time_dim (19)
|
||||
+- BroadcastExchange (49)
|
||||
+- * HashAggregate (48)
|
||||
+- Exchange (47)
|
||||
+- * HashAggregate (46)
|
||||
+- * Project (45)
|
||||
+- * BroadcastHashJoin Inner BuildRight (44)
|
||||
:- * Project (38)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (37)
|
||||
: :- * Project (35)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (34)
|
||||
: : :- * Project (32)
|
||||
: : : +- * Filter (31)
|
||||
: : : +- * ColumnarToRow (30)
|
||||
: : : +- Scan parquet default.web_sales (29)
|
||||
: : +- ReusedExchange (33)
|
||||
: +- ReusedExchange (36)
|
||||
+- BroadcastExchange (43)
|
||||
+- * Project (42)
|
||||
+- * Filter (41)
|
||||
+- * ColumnarToRow (40)
|
||||
+- Scan parquet default.time_dim (39)
|
||||
* Sort (52)
|
||||
+- * Project (51)
|
||||
+- BroadcastNestedLoopJoin Inner BuildRight (50)
|
||||
:- * HashAggregate (28)
|
||||
: +- Exchange (27)
|
||||
: +- * HashAggregate (26)
|
||||
: +- * Project (25)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (24)
|
||||
: :- * Project (18)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (17)
|
||||
: : :- * Project (11)
|
||||
: : : +- * BroadcastHashJoin Inner BuildRight (10)
|
||||
: : : :- * Project (4)
|
||||
: : : : +- * Filter (3)
|
||||
: : : : +- * ColumnarToRow (2)
|
||||
: : : : +- Scan parquet default.web_sales (1)
|
||||
: : : +- BroadcastExchange (9)
|
||||
: : : +- * Project (8)
|
||||
: : : +- * Filter (7)
|
||||
: : : +- * ColumnarToRow (6)
|
||||
: : : +- Scan parquet default.web_page (5)
|
||||
: : +- BroadcastExchange (16)
|
||||
: : +- * Project (15)
|
||||
: : +- * Filter (14)
|
||||
: : +- * ColumnarToRow (13)
|
||||
: : +- Scan parquet default.household_demographics (12)
|
||||
: +- BroadcastExchange (23)
|
||||
: +- * Project (22)
|
||||
: +- * Filter (21)
|
||||
: +- * ColumnarToRow (20)
|
||||
: +- Scan parquet default.time_dim (19)
|
||||
+- BroadcastExchange (49)
|
||||
+- * HashAggregate (48)
|
||||
+- Exchange (47)
|
||||
+- * HashAggregate (46)
|
||||
+- * Project (45)
|
||||
+- * BroadcastHashJoin Inner BuildRight (44)
|
||||
:- * Project (38)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (37)
|
||||
: :- * Project (35)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (34)
|
||||
: : :- * Project (32)
|
||||
: : : +- * Filter (31)
|
||||
: : : +- * ColumnarToRow (30)
|
||||
: : : +- Scan parquet default.web_sales (29)
|
||||
: : +- ReusedExchange (33)
|
||||
: +- ReusedExchange (36)
|
||||
+- BroadcastExchange (43)
|
||||
+- * Project (42)
|
||||
+- * Filter (41)
|
||||
+- * ColumnarToRow (40)
|
||||
+- Scan parquet default.time_dim (39)
|
||||
|
||||
|
||||
(1) Scan parquet default.web_sales
|
||||
|
@ -285,11 +284,7 @@ Join condition: None
|
|||
Output [1]: [CheckOverflow((promote_precision(cast(amc#18 as decimal(15,4))) / promote_precision(cast(pmc#24 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#26]
|
||||
Input [2]: [amc#18, pmc#24]
|
||||
|
||||
(52) Exchange
|
||||
Input [1]: [am_pm_ratio#26]
|
||||
Arguments: rangepartitioning(am_pm_ratio#26 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#27]
|
||||
|
||||
(53) Sort [codegen id : 12]
|
||||
(52) Sort [codegen id : 11]
|
||||
Input [1]: [am_pm_ratio#26]
|
||||
Arguments: [am_pm_ratio#26 ASC NULLS FIRST], true, 0
|
||||
|
||||
|
|
|
@ -1,79 +1,76 @@
|
|||
WholeStageCodegen (12)
|
||||
WholeStageCodegen (11)
|
||||
Sort [am_pm_ratio]
|
||||
InputAdapter
|
||||
Exchange [am_pm_ratio] #1
|
||||
WholeStageCodegen (11)
|
||||
Project [amc,pmc]
|
||||
InputAdapter
|
||||
BroadcastNestedLoopJoin
|
||||
WholeStageCodegen (5)
|
||||
HashAggregate [count] [count(1),amc,count]
|
||||
InputAdapter
|
||||
Exchange #2
|
||||
WholeStageCodegen (4)
|
||||
HashAggregate [count,count]
|
||||
Project
|
||||
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
|
||||
Project [ws_sold_time_sk]
|
||||
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
|
||||
Project [ws_sold_time_sk,ws_ship_hdemo_sk]
|
||||
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
|
||||
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
|
||||
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
|
||||
Project [amc,pmc]
|
||||
InputAdapter
|
||||
BroadcastNestedLoopJoin
|
||||
WholeStageCodegen (5)
|
||||
HashAggregate [count] [count(1),amc,count]
|
||||
InputAdapter
|
||||
Exchange #1
|
||||
WholeStageCodegen (4)
|
||||
HashAggregate [count,count]
|
||||
Project
|
||||
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
|
||||
Project [ws_sold_time_sk]
|
||||
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
|
||||
Project [ws_sold_time_sk,ws_ship_hdemo_sk]
|
||||
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
|
||||
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
|
||||
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
BroadcastExchange #3
|
||||
WholeStageCodegen (1)
|
||||
Project [wp_web_page_sk]
|
||||
Filter [wp_char_count,wp_web_page_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
|
||||
InputAdapter
|
||||
BroadcastExchange #4
|
||||
WholeStageCodegen (2)
|
||||
Project [hd_demo_sk]
|
||||
Filter [hd_dep_count,hd_demo_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
|
||||
InputAdapter
|
||||
BroadcastExchange #5
|
||||
WholeStageCodegen (3)
|
||||
Project [t_time_sk]
|
||||
Filter [t_hour,t_time_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.time_dim [t_time_sk,t_hour]
|
||||
BroadcastExchange #6
|
||||
WholeStageCodegen (10)
|
||||
HashAggregate [count] [count(1),pmc,count]
|
||||
InputAdapter
|
||||
Exchange #7
|
||||
WholeStageCodegen (9)
|
||||
HashAggregate [count,count]
|
||||
Project
|
||||
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
|
||||
Project [ws_sold_time_sk]
|
||||
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
|
||||
Project [ws_sold_time_sk,ws_ship_hdemo_sk]
|
||||
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
|
||||
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
|
||||
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
|
||||
InputAdapter
|
||||
ReusedExchange [wp_web_page_sk] #3
|
||||
InputAdapter
|
||||
ReusedExchange [hd_demo_sk] #4
|
||||
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
|
||||
InputAdapter
|
||||
BroadcastExchange #8
|
||||
WholeStageCodegen (8)
|
||||
Project [t_time_sk]
|
||||
Filter [t_hour,t_time_sk]
|
||||
BroadcastExchange #2
|
||||
WholeStageCodegen (1)
|
||||
Project [wp_web_page_sk]
|
||||
Filter [wp_char_count,wp_web_page_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.time_dim [t_time_sk,t_hour]
|
||||
Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
|
||||
InputAdapter
|
||||
BroadcastExchange #3
|
||||
WholeStageCodegen (2)
|
||||
Project [hd_demo_sk]
|
||||
Filter [hd_dep_count,hd_demo_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
|
||||
InputAdapter
|
||||
BroadcastExchange #4
|
||||
WholeStageCodegen (3)
|
||||
Project [t_time_sk]
|
||||
Filter [t_hour,t_time_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.time_dim [t_time_sk,t_hour]
|
||||
BroadcastExchange #5
|
||||
WholeStageCodegen (10)
|
||||
HashAggregate [count] [count(1),pmc,count]
|
||||
InputAdapter
|
||||
Exchange #6
|
||||
WholeStageCodegen (9)
|
||||
HashAggregate [count,count]
|
||||
Project
|
||||
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
|
||||
Project [ws_sold_time_sk]
|
||||
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
|
||||
Project [ws_sold_time_sk,ws_ship_hdemo_sk]
|
||||
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
|
||||
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
|
||||
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
|
||||
InputAdapter
|
||||
ReusedExchange [wp_web_page_sk] #2
|
||||
InputAdapter
|
||||
ReusedExchange [hd_demo_sk] #3
|
||||
InputAdapter
|
||||
BroadcastExchange #7
|
||||
WholeStageCodegen (8)
|
||||
Project [t_time_sk]
|
||||
Filter [t_hour,t_time_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.time_dim [t_time_sk,t_hour]
|
||||
|
|
|
@ -1,57 +1,56 @@
|
|||
== Physical Plan ==
|
||||
* Sort (53)
|
||||
+- Exchange (52)
|
||||
+- * Project (51)
|
||||
+- BroadcastNestedLoopJoin Inner BuildRight (50)
|
||||
:- * HashAggregate (28)
|
||||
: +- Exchange (27)
|
||||
: +- * HashAggregate (26)
|
||||
: +- * Project (25)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (24)
|
||||
: :- * Project (18)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (17)
|
||||
: : :- * Project (11)
|
||||
: : : +- * BroadcastHashJoin Inner BuildRight (10)
|
||||
: : : :- * Project (4)
|
||||
: : : : +- * Filter (3)
|
||||
: : : : +- * ColumnarToRow (2)
|
||||
: : : : +- Scan parquet default.web_sales (1)
|
||||
: : : +- BroadcastExchange (9)
|
||||
: : : +- * Project (8)
|
||||
: : : +- * Filter (7)
|
||||
: : : +- * ColumnarToRow (6)
|
||||
: : : +- Scan parquet default.household_demographics (5)
|
||||
: : +- BroadcastExchange (16)
|
||||
: : +- * Project (15)
|
||||
: : +- * Filter (14)
|
||||
: : +- * ColumnarToRow (13)
|
||||
: : +- Scan parquet default.time_dim (12)
|
||||
: +- BroadcastExchange (23)
|
||||
: +- * Project (22)
|
||||
: +- * Filter (21)
|
||||
: +- * ColumnarToRow (20)
|
||||
: +- Scan parquet default.web_page (19)
|
||||
+- BroadcastExchange (49)
|
||||
+- * HashAggregate (48)
|
||||
+- Exchange (47)
|
||||
+- * HashAggregate (46)
|
||||
+- * Project (45)
|
||||
+- * BroadcastHashJoin Inner BuildRight (44)
|
||||
:- * Project (42)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (41)
|
||||
: :- * Project (35)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (34)
|
||||
: : :- * Project (32)
|
||||
: : : +- * Filter (31)
|
||||
: : : +- * ColumnarToRow (30)
|
||||
: : : +- Scan parquet default.web_sales (29)
|
||||
: : +- ReusedExchange (33)
|
||||
: +- BroadcastExchange (40)
|
||||
: +- * Project (39)
|
||||
: +- * Filter (38)
|
||||
: +- * ColumnarToRow (37)
|
||||
: +- Scan parquet default.time_dim (36)
|
||||
+- ReusedExchange (43)
|
||||
* Sort (52)
|
||||
+- * Project (51)
|
||||
+- BroadcastNestedLoopJoin Inner BuildRight (50)
|
||||
:- * HashAggregate (28)
|
||||
: +- Exchange (27)
|
||||
: +- * HashAggregate (26)
|
||||
: +- * Project (25)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (24)
|
||||
: :- * Project (18)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (17)
|
||||
: : :- * Project (11)
|
||||
: : : +- * BroadcastHashJoin Inner BuildRight (10)
|
||||
: : : :- * Project (4)
|
||||
: : : : +- * Filter (3)
|
||||
: : : : +- * ColumnarToRow (2)
|
||||
: : : : +- Scan parquet default.web_sales (1)
|
||||
: : : +- BroadcastExchange (9)
|
||||
: : : +- * Project (8)
|
||||
: : : +- * Filter (7)
|
||||
: : : +- * ColumnarToRow (6)
|
||||
: : : +- Scan parquet default.household_demographics (5)
|
||||
: : +- BroadcastExchange (16)
|
||||
: : +- * Project (15)
|
||||
: : +- * Filter (14)
|
||||
: : +- * ColumnarToRow (13)
|
||||
: : +- Scan parquet default.time_dim (12)
|
||||
: +- BroadcastExchange (23)
|
||||
: +- * Project (22)
|
||||
: +- * Filter (21)
|
||||
: +- * ColumnarToRow (20)
|
||||
: +- Scan parquet default.web_page (19)
|
||||
+- BroadcastExchange (49)
|
||||
+- * HashAggregate (48)
|
||||
+- Exchange (47)
|
||||
+- * HashAggregate (46)
|
||||
+- * Project (45)
|
||||
+- * BroadcastHashJoin Inner BuildRight (44)
|
||||
:- * Project (42)
|
||||
: +- * BroadcastHashJoin Inner BuildRight (41)
|
||||
: :- * Project (35)
|
||||
: : +- * BroadcastHashJoin Inner BuildRight (34)
|
||||
: : :- * Project (32)
|
||||
: : : +- * Filter (31)
|
||||
: : : +- * ColumnarToRow (30)
|
||||
: : : +- Scan parquet default.web_sales (29)
|
||||
: : +- ReusedExchange (33)
|
||||
: +- BroadcastExchange (40)
|
||||
: +- * Project (39)
|
||||
: +- * Filter (38)
|
||||
: +- * ColumnarToRow (37)
|
||||
: +- Scan parquet default.time_dim (36)
|
||||
+- ReusedExchange (43)
|
||||
|
||||
|
||||
(1) Scan parquet default.web_sales
|
||||
|
@ -285,11 +284,7 @@ Join condition: None
|
|||
Output [1]: [CheckOverflow((promote_precision(cast(amc#18 as decimal(15,4))) / promote_precision(cast(pmc#24 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#26]
|
||||
Input [2]: [amc#18, pmc#24]
|
||||
|
||||
(52) Exchange
|
||||
Input [1]: [am_pm_ratio#26]
|
||||
Arguments: rangepartitioning(am_pm_ratio#26 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#27]
|
||||
|
||||
(53) Sort [codegen id : 12]
|
||||
(52) Sort [codegen id : 11]
|
||||
Input [1]: [am_pm_ratio#26]
|
||||
Arguments: [am_pm_ratio#26 ASC NULLS FIRST], true, 0
|
||||
|
||||
|
|
|
@ -1,79 +1,76 @@
|
|||
WholeStageCodegen (12)
|
||||
WholeStageCodegen (11)
|
||||
Sort [am_pm_ratio]
|
||||
InputAdapter
|
||||
Exchange [am_pm_ratio] #1
|
||||
WholeStageCodegen (11)
|
||||
Project [amc,pmc]
|
||||
InputAdapter
|
||||
BroadcastNestedLoopJoin
|
||||
WholeStageCodegen (5)
|
||||
HashAggregate [count] [count(1),amc,count]
|
||||
InputAdapter
|
||||
Exchange #2
|
||||
WholeStageCodegen (4)
|
||||
HashAggregate [count,count]
|
||||
Project
|
||||
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
|
||||
Project [ws_web_page_sk]
|
||||
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
|
||||
Project [ws_sold_time_sk,ws_web_page_sk]
|
||||
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
|
||||
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
|
||||
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
|
||||
Project [amc,pmc]
|
||||
InputAdapter
|
||||
BroadcastNestedLoopJoin
|
||||
WholeStageCodegen (5)
|
||||
HashAggregate [count] [count(1),amc,count]
|
||||
InputAdapter
|
||||
Exchange #1
|
||||
WholeStageCodegen (4)
|
||||
HashAggregate [count,count]
|
||||
Project
|
||||
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
|
||||
Project [ws_web_page_sk]
|
||||
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
|
||||
Project [ws_sold_time_sk,ws_web_page_sk]
|
||||
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
|
||||
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
|
||||
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
|
||||
InputAdapter
|
||||
BroadcastExchange #2
|
||||
WholeStageCodegen (1)
|
||||
Project [hd_demo_sk]
|
||||
Filter [hd_dep_count,hd_demo_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
|
||||
InputAdapter
|
||||
BroadcastExchange #3
|
||||
WholeStageCodegen (1)
|
||||
Project [hd_demo_sk]
|
||||
Filter [hd_dep_count,hd_demo_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
|
||||
Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
|
||||
InputAdapter
|
||||
BroadcastExchange #3
|
||||
WholeStageCodegen (2)
|
||||
Project [t_time_sk]
|
||||
Filter [t_hour,t_time_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.time_dim [t_time_sk,t_hour]
|
||||
InputAdapter
|
||||
BroadcastExchange #4
|
||||
WholeStageCodegen (3)
|
||||
Project [wp_web_page_sk]
|
||||
Filter [wp_char_count,wp_web_page_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
|
||||
BroadcastExchange #5
|
||||
WholeStageCodegen (10)
|
||||
HashAggregate [count] [count(1),pmc,count]
|
||||
InputAdapter
|
||||
Exchange #6
|
||||
WholeStageCodegen (9)
|
||||
HashAggregate [count,count]
|
||||
Project
|
||||
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
|
||||
Project [ws_web_page_sk]
|
||||
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
|
||||
Project [ws_sold_time_sk,ws_web_page_sk]
|
||||
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
|
||||
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
|
||||
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
|
||||
InputAdapter
|
||||
BroadcastExchange #4
|
||||
WholeStageCodegen (2)
|
||||
Project [t_time_sk]
|
||||
Filter [t_hour,t_time_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.time_dim [t_time_sk,t_hour]
|
||||
ReusedExchange [hd_demo_sk] #2
|
||||
InputAdapter
|
||||
BroadcastExchange #5
|
||||
WholeStageCodegen (3)
|
||||
Project [wp_web_page_sk]
|
||||
Filter [wp_char_count,wp_web_page_sk]
|
||||
BroadcastExchange #7
|
||||
WholeStageCodegen (7)
|
||||
Project [t_time_sk]
|
||||
Filter [t_hour,t_time_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
|
||||
BroadcastExchange #6
|
||||
WholeStageCodegen (10)
|
||||
HashAggregate [count] [count(1),pmc,count]
|
||||
InputAdapter
|
||||
Exchange #7
|
||||
WholeStageCodegen (9)
|
||||
HashAggregate [count,count]
|
||||
Project
|
||||
BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
|
||||
Project [ws_web_page_sk]
|
||||
BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
|
||||
Project [ws_sold_time_sk,ws_web_page_sk]
|
||||
BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
|
||||
Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
|
||||
Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
|
||||
InputAdapter
|
||||
ReusedExchange [hd_demo_sk] #3
|
||||
InputAdapter
|
||||
BroadcastExchange #8
|
||||
WholeStageCodegen (7)
|
||||
Project [t_time_sk]
|
||||
Filter [t_hour,t_time_sk]
|
||||
ColumnarToRow
|
||||
InputAdapter
|
||||
Scan parquet default.time_dim [t_time_sk,t_hour]
|
||||
InputAdapter
|
||||
ReusedExchange [wp_web_page_sk] #5
|
||||
Scan parquet default.time_dim [t_time_sk,t_hour]
|
||||
InputAdapter
|
||||
ReusedExchange [wp_web_page_sk] #4
|
||||
|
|
|
@ -1296,4 +1296,92 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-34593: Preserve broadcast nested loop join partitioning and ordering") {
|
||||
withTable("t1", "t2", "t3", "t4", "t5") {
|
||||
spark.range(15).toDF("k").write.bucketBy(4, "k").saveAsTable("t1")
|
||||
spark.range(6).toDF("k").write.bucketBy(4, "k").saveAsTable("t2")
|
||||
spark.range(8).toDF("k").write.saveAsTable("t3")
|
||||
spark.range(9).toDF("k").write.saveAsTable("t4")
|
||||
spark.range(11).toDF("k").write.saveAsTable("t5")
|
||||
|
||||
def getAggQuery(selectExpr: String, joinType: String): String = {
|
||||
s"""
|
||||
|SELECT k, COUNT(*)
|
||||
|FROM (SELECT $selectExpr FROM t1 $joinType JOIN t2)
|
||||
|GROUP BY k
|
||||
""".stripMargin
|
||||
}
|
||||
|
||||
// Test output partitioning is preserved
|
||||
Seq("INNER", "LEFT OUTER", "RIGHT OUTER", "LEFT SEMI", "LEFT ANTI").foreach {
|
||||
joinType =>
|
||||
val selectExpr = if (joinType == "RIGHT OUTER") {
|
||||
"/*+ BROADCAST(t1) */ t2.k AS k"
|
||||
} else {
|
||||
"/*+ BROADCAST(t2) */ t1.k as k"
|
||||
}
|
||||
val plan = sql(getAggQuery(selectExpr, joinType)).queryExecution.executedPlan
|
||||
assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
|
||||
// No extra shuffle before aggregation
|
||||
assert(collect(plan) { case _: ShuffleExchangeExec => true }.size === 0)
|
||||
}
|
||||
|
||||
// Test output partitioning is not preserved
|
||||
Seq("LEFT OUTER", "RIGHT OUTER", "LEFT SEMI", "LEFT ANTI", "FULL OUTER").foreach {
|
||||
joinType =>
|
||||
val selectExpr = if (joinType == "RIGHT OUTER") {
|
||||
"/*+ BROADCAST(t2) */ t1.k AS k"
|
||||
} else {
|
||||
"/*+ BROADCAST(t1) */ t1.k as k"
|
||||
}
|
||||
val plan = sql(getAggQuery(selectExpr, joinType)).queryExecution.executedPlan
|
||||
assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
|
||||
// Have shuffle before aggregation
|
||||
assert(collect(plan) { case _: ShuffleExchangeExec => true }.size === 1)
|
||||
}
|
||||
|
||||
def getJoinQuery(selectExpr: String, joinType: String): String = {
|
||||
s"""
|
||||
|SELECT /*+ MERGE(t3) */ t3.k
|
||||
|FROM
|
||||
|(
|
||||
| SELECT $selectExpr
|
||||
| FROM
|
||||
| (SELECT /*+ MERGE(t4) */ t1.k AS k1 FROM t1 JOIN t4 ON t1.k = t4.k) AS left_t
|
||||
| $joinType JOIN
|
||||
| (SELECT /*+ MERGE(t5) */ t2.k AS k2 FROM t2 JOIN t5 ON t2.k = t5.k) AS right_t
|
||||
|)
|
||||
|JOIN t3
|
||||
|ON t3.k = k0
|
||||
""".stripMargin
|
||||
}
|
||||
|
||||
// Test output ordering is preserved
|
||||
Seq("INNER", "LEFT OUTER", "RIGHT OUTER", "LEFT SEMI", "LEFT ANTI").foreach {
|
||||
joinType =>
|
||||
val selectExpr = if (joinType == "RIGHT OUTER") {
|
||||
"/*+ BROADCAST(left_t) */ k2 AS k0"
|
||||
} else {
|
||||
"/*+ BROADCAST(right_t) */ k1 as k0"
|
||||
}
|
||||
val plan = sql(getJoinQuery(selectExpr, joinType)).queryExecution.executedPlan
|
||||
assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
|
||||
assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 3)
|
||||
// No extra sort on left side before last sort merge join
|
||||
assert(collect(plan) { case _: SortExec => true }.size === 5)
|
||||
}
|
||||
|
||||
// Test output ordering is not preserved
|
||||
Seq("LEFT OUTER", "FULL OUTER").foreach {
|
||||
joinType =>
|
||||
val selectExpr = "/*+ BROADCAST(left_t) */ k1 as k0"
|
||||
val plan = sql(getJoinQuery(selectExpr, joinType)).queryExecution.executedPlan
|
||||
assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
|
||||
assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 3)
|
||||
// Have sort on left side before last sort merge join
|
||||
assert(collect(plan) { case _: SortExec => true }.size === 6)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue