[SPARK-9853][CORE][FOLLOW-UP] Regularize all the shuffle configurations related to adaptive execution
### What changes were proposed in this pull request? 1. Regularize all the shuffle configurations related to adaptive execution. 2. Add default value for `BlockStoreShuffleReader.shouldBatchFetch`. ### Why are the changes needed? It's a follow-up PR for #26040. Regularize the existing `spark.sql.adaptive.shuffle` namespace in SQLConf. ### Does this PR introduce any user-facing change? Rename one released user config `spark.sql.adaptive.minNumPostShufflePartitions` to `spark.sql.adaptive.shuffle.minNumPostShufflePartitions`, other changed configs is not released yet. ### How was this patch tested? Existing UT. Closes #26147 from xuanyuanking/SPARK-9853. Authored-by: Yuanjian Li <xyliyuanjian@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
901ff92969
commit
8616109061
|
@ -36,7 +36,7 @@ private[spark] class BlockStoreShuffleReader[K, C](
|
||||||
serializerManager: SerializerManager = SparkEnv.get.serializerManager,
|
serializerManager: SerializerManager = SparkEnv.get.serializerManager,
|
||||||
blockManager: BlockManager = SparkEnv.get.blockManager,
|
blockManager: BlockManager = SparkEnv.get.blockManager,
|
||||||
mapOutputTracker: MapOutputTracker = SparkEnv.get.mapOutputTracker,
|
mapOutputTracker: MapOutputTracker = SparkEnv.get.mapOutputTracker,
|
||||||
shouldBatchFetch: Boolean)
|
shouldBatchFetch: Boolean = false)
|
||||||
extends ShuffleReader[K, C] with Logging {
|
extends ShuffleReader[K, C] with Logging {
|
||||||
|
|
||||||
private val dep = handle.dependency
|
private val dep = handle.dependency
|
||||||
|
|
|
@ -138,8 +138,7 @@ class BlockStoreShuffleReaderSuite extends SparkFunSuite with LocalSparkContext
|
||||||
taskContext,
|
taskContext,
|
||||||
metrics,
|
metrics,
|
||||||
serializerManager,
|
serializerManager,
|
||||||
blockManager,
|
blockManager)
|
||||||
shouldBatchFetch = false)
|
|
||||||
|
|
||||||
assert(shuffleReader.read().length === keyValuePairsPerMap * numMaps)
|
assert(shuffleReader.read().length === keyValuePairsPerMap * numMaps)
|
||||||
|
|
||||||
|
|
|
@ -349,15 +349,19 @@ object SQLConf {
|
||||||
.checkValue(_ > 0, "The value of spark.sql.shuffle.partitions must be positive")
|
.checkValue(_ > 0, "The value of spark.sql.shuffle.partitions must be positive")
|
||||||
.createWithDefault(200)
|
.createWithDefault(200)
|
||||||
|
|
||||||
|
val ADAPTIVE_EXECUTION_ENABLED = buildConf("spark.sql.adaptive.enabled")
|
||||||
|
.doc("When true, enable adaptive query execution.")
|
||||||
|
.booleanConf
|
||||||
|
.createWithDefault(false)
|
||||||
|
|
||||||
val SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE =
|
val SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE =
|
||||||
buildConf("spark.sql.adaptive.shuffle.targetPostShuffleInputSize")
|
buildConf("spark.sql.adaptive.shuffle.targetPostShuffleInputSize")
|
||||||
.doc("The target post-shuffle input size in bytes of a task.")
|
.doc("The target post-shuffle input size in bytes of a task.")
|
||||||
.bytesConf(ByteUnit.BYTE)
|
.bytesConf(ByteUnit.BYTE)
|
||||||
.createWithDefault(64 * 1024 * 1024)
|
.createWithDefault(64 * 1024 * 1024)
|
||||||
|
|
||||||
|
|
||||||
val FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED =
|
val FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED =
|
||||||
buildConf("spark.sql.adaptive.fetchShuffleBlocksInBatch.enabled")
|
buildConf("spark.sql.adaptive.shuffle.fetchShuffleBlocksInBatch.enabled")
|
||||||
.doc("Whether to fetch the continuous shuffle blocks in batch. Instead of fetching blocks " +
|
.doc("Whether to fetch the continuous shuffle blocks in batch. Instead of fetching blocks " +
|
||||||
"one by one, fetching continuous shuffle blocks for the same map task in batch can " +
|
"one by one, fetching continuous shuffle blocks for the same map task in batch can " +
|
||||||
"reduce IO and improve performance. Note, this feature also depends on a relocatable " +
|
"reduce IO and improve performance. Note, this feature also depends on a relocatable " +
|
||||||
|
@ -365,29 +369,15 @@ object SQLConf {
|
||||||
.booleanConf
|
.booleanConf
|
||||||
.createWithDefault(true)
|
.createWithDefault(true)
|
||||||
|
|
||||||
val ADAPTIVE_EXECUTION_ENABLED = buildConf("spark.sql.adaptive.enabled")
|
|
||||||
.doc("When true, enable adaptive query execution.")
|
|
||||||
.booleanConf
|
|
||||||
.createWithDefault(false)
|
|
||||||
|
|
||||||
val NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN =
|
|
||||||
buildConf("spark.sql.adaptive.nonEmptyPartitionRatioForBroadcastJoin")
|
|
||||||
.doc("The relation with a non-empty partition ratio lower than this config will not be " +
|
|
||||||
"considered as the build side of a broadcast-hash join in adaptive execution regardless " +
|
|
||||||
"of its size.")
|
|
||||||
.doubleConf
|
|
||||||
.checkValue(_ >= 0, "The non-empty partition ratio must be positive number.")
|
|
||||||
.createWithDefault(0.2)
|
|
||||||
|
|
||||||
val REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED =
|
val REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED =
|
||||||
buildConf("spark.sql.adaptive.reducePostShufflePartitions.enabled")
|
buildConf("spark.sql.adaptive.shuffle.reducePostShufflePartitions.enabled")
|
||||||
.doc("When true and adaptive execution is enabled, this enables reducing the number of " +
|
.doc("When true and adaptive execution is enabled, this enables reducing the number of " +
|
||||||
"post-shuffle partitions based on map output statistics.")
|
"post-shuffle partitions based on map output statistics.")
|
||||||
.booleanConf
|
.booleanConf
|
||||||
.createWithDefault(true)
|
.createWithDefault(true)
|
||||||
|
|
||||||
val SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS =
|
val SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS =
|
||||||
buildConf("spark.sql.adaptive.minNumPostShufflePartitions")
|
buildConf("spark.sql.adaptive.shuffle.minNumPostShufflePartitions")
|
||||||
.doc("The advisory minimum number of post-shuffle partitions used in adaptive execution.")
|
.doc("The advisory minimum number of post-shuffle partitions used in adaptive execution.")
|
||||||
.intConf
|
.intConf
|
||||||
.checkValue(_ > 0, "The minimum shuffle partition number " +
|
.checkValue(_ > 0, "The minimum shuffle partition number " +
|
||||||
|
@ -395,7 +385,7 @@ object SQLConf {
|
||||||
.createWithDefault(1)
|
.createWithDefault(1)
|
||||||
|
|
||||||
val SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS =
|
val SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS =
|
||||||
buildConf("spark.sql.adaptive.maxNumPostShufflePartitions")
|
buildConf("spark.sql.adaptive.shuffle.maxNumPostShufflePartitions")
|
||||||
.doc("The advisory maximum number of post-shuffle partitions used in adaptive execution. " +
|
.doc("The advisory maximum number of post-shuffle partitions used in adaptive execution. " +
|
||||||
"This is used as the initial number of pre-shuffle partitions. By default it equals to " +
|
"This is used as the initial number of pre-shuffle partitions. By default it equals to " +
|
||||||
"spark.sql.shuffle.partitions")
|
"spark.sql.shuffle.partitions")
|
||||||
|
@ -405,13 +395,22 @@ object SQLConf {
|
||||||
.createOptional
|
.createOptional
|
||||||
|
|
||||||
val OPTIMIZE_LOCAL_SHUFFLE_READER_ENABLED =
|
val OPTIMIZE_LOCAL_SHUFFLE_READER_ENABLED =
|
||||||
buildConf("spark.sql.adaptive.optimizedLocalShuffleReader.enabled")
|
buildConf("spark.sql.adaptive.shuffle.optimizedLocalShuffleReader.enabled")
|
||||||
.doc("When true and adaptive execution is enabled, this enables the optimization of" +
|
.doc("When true and adaptive execution is enabled, this enables the optimization of" +
|
||||||
" converting the shuffle reader to local shuffle reader for the shuffle exchange" +
|
" converting the shuffle reader to local shuffle reader for the shuffle exchange" +
|
||||||
" of the broadcast hash join in probe side.")
|
" of the broadcast hash join in probe side.")
|
||||||
.booleanConf
|
.booleanConf
|
||||||
.createWithDefault(true)
|
.createWithDefault(true)
|
||||||
|
|
||||||
|
val NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN =
|
||||||
|
buildConf("spark.sql.adaptive.nonEmptyPartitionRatioForBroadcastJoin")
|
||||||
|
.doc("The relation with a non-empty partition ratio lower than this config will not be " +
|
||||||
|
"considered as the build side of a broadcast-hash join in adaptive execution regardless " +
|
||||||
|
"of its size.")
|
||||||
|
.doubleConf
|
||||||
|
.checkValue(_ >= 0, "The non-empty partition ratio must be positive number.")
|
||||||
|
.createWithDefault(0.2)
|
||||||
|
|
||||||
val SUBEXPRESSION_ELIMINATION_ENABLED =
|
val SUBEXPRESSION_ELIMINATION_ENABLED =
|
||||||
buildConf("spark.sql.subexpressionElimination.enabled")
|
buildConf("spark.sql.subexpressionElimination.enabled")
|
||||||
.internal()
|
.internal()
|
||||||
|
@ -2148,21 +2147,18 @@ class SQLConf extends Serializable with Logging {
|
||||||
|
|
||||||
def numShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS)
|
def numShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS)
|
||||||
|
|
||||||
def targetPostShuffleInputSize: Long =
|
|
||||||
getConf(SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE)
|
|
||||||
|
|
||||||
def fetchShuffleBlocksInBatchEnabled: Boolean =
|
|
||||||
getConf(FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED)
|
|
||||||
|
|
||||||
def adaptiveExecutionEnabled: Boolean = getConf(ADAPTIVE_EXECUTION_ENABLED)
|
def adaptiveExecutionEnabled: Boolean = getConf(ADAPTIVE_EXECUTION_ENABLED)
|
||||||
|
|
||||||
|
def targetPostShuffleInputSize: Long = getConf(SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE)
|
||||||
|
|
||||||
|
def fetchShuffleBlocksInBatchEnabled: Boolean = getConf(FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED)
|
||||||
|
|
||||||
def nonEmptyPartitionRatioForBroadcastJoin: Double =
|
def nonEmptyPartitionRatioForBroadcastJoin: Double =
|
||||||
getConf(NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN)
|
getConf(NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN)
|
||||||
|
|
||||||
def reducePostShufflePartitionsEnabled: Boolean = getConf(REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED)
|
def reducePostShufflePartitionsEnabled: Boolean = getConf(REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED)
|
||||||
|
|
||||||
def minNumPostShufflePartitions: Int =
|
def minNumPostShufflePartitions: Int = getConf(SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS)
|
||||||
getConf(SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS)
|
|
||||||
|
|
||||||
def maxNumPostShufflePartitions: Int =
|
def maxNumPostShufflePartitions: Int =
|
||||||
getConf(SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS).getOrElse(numShufflePartitions)
|
getConf(SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS).getOrElse(numShufflePartitions)
|
||||||
|
|
Loading…
Reference in a new issue