[SPARK-1507][YARN]specify # cores for ApplicationMaster

Based on top of changes in https://github.com/apache/spark/pull/3806.

https://issues.apache.org/jira/browse/SPARK-1507

`--driver-cores` and `spark.driver.cores` for all cluster modes and `spark.yarn.am.cores` for yarn client mode.

Author: WangTaoTheTonic <barneystinson@aliyun.com>
Author: WangTao <barneystinson@aliyun.com>

Closes #4018 from WangTaoTheTonic/SPARK-1507 and squashes the following commits:

01419d3 [WangTaoTheTonic] amend the args name
b255795 [WangTaoTheTonic] indet thing
d86557c [WangTaoTheTonic] some comments amend
43c9392 [WangTao] fix compile error
b39a100 [WangTao] specify # cores for ApplicationMaster
This commit is contained in:
WangTaoTheTonic 2015-01-16 09:16:56 -08:00 committed by Andrew Or
parent a79a9f923c
commit 2be82b1e66
7 changed files with 58 additions and 11 deletions

View file

@ -23,7 +23,7 @@ import scala.collection.mutable.ListBuffer
import org.apache.log4j.Level
import org.apache.spark.util.MemoryParam
import org.apache.spark.util.{IntParam, MemoryParam}
/**
* Command-line parser for the driver client.
@ -51,8 +51,8 @@ private[spark] class ClientArguments(args: Array[String]) {
parse(args.toList)
def parse(args: List[String]): Unit = args match {
case ("--cores" | "-c") :: value :: tail =>
cores = value.toInt
case ("--cores" | "-c") :: IntParam(value) :: tail =>
cores = value
parse(tail)
case ("--memory" | "-m") :: MemoryParam(value) :: tail =>

View file

@ -200,6 +200,7 @@ object SparkSubmit {
// Yarn cluster only
OptionAssigner(args.name, YARN, CLUSTER, clOption = "--name"),
OptionAssigner(args.driverMemory, YARN, CLUSTER, clOption = "--driver-memory"),
OptionAssigner(args.driverCores, YARN, CLUSTER, clOption = "--driver-cores"),
OptionAssigner(args.queue, YARN, CLUSTER, clOption = "--queue"),
OptionAssigner(args.numExecutors, YARN, CLUSTER, clOption = "--num-executors"),
OptionAssigner(args.executorMemory, YARN, CLUSTER, clOption = "--executor-memory"),

View file

@ -108,6 +108,9 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
.orElse(sparkProperties.get("spark.driver.memory"))
.orElse(env.get("SPARK_DRIVER_MEMORY"))
.orNull
driverCores = Option(driverCores)
.orElse(sparkProperties.get("spark.driver.cores"))
.orNull
executorMemory = Option(executorMemory)
.orElse(sparkProperties.get("spark.executor.memory"))
.orElse(env.get("SPARK_EXECUTOR_MEMORY"))
@ -406,6 +409,8 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
| --total-executor-cores NUM Total cores for all executors.
|
| YARN-only:
| --driver-cores NUM Number of cores used by the driver, only in cluster mode
| (Default: 1).
| --executor-cores NUM Number of cores per executor (Default: 1).
| --queue QUEUE_NAME The YARN queue to submit to (Default: "default").
| --num-executors NUM Number of executors to launch (Default: 2).

View file

@ -102,11 +102,10 @@ of the most common options to set are:
</td>
</tr>
<tr>
<td><code>spark.executor.memory</code></td>
<td>512m</td>
<td><code>spark.driver.cores</code></td>
<td>1</td>
<td>
Amount of memory to use per executor process, in the same format as JVM memory strings
(e.g. <code>512m</code>, <code>2g</code>).
Number of cores to use for the driver process, only in cluster mode.
</td>
</tr>
<tr>
@ -117,6 +116,14 @@ of the most common options to set are:
(e.g. <code>512m</code>, <code>2g</code>).
</td>
</tr>
<tr>
<td><code>spark.executor.memory</code></td>
<td>512m</td>
<td>
Amount of memory to use per executor process, in the same format as JVM memory strings
(e.g. <code>512m</code>, <code>2g</code>).
</td>
</tr>
<tr>
<td><code>spark.driver.maxResultSize</code></td>
<td>1g</td>

View file

@ -29,6 +29,23 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
In cluster mode, use <code>spark.driver.memory</code> instead.
</td>
</tr>
<tr>
<td><code>spark.driver.cores</code></td>
<td>1</td>
<td>
Number of cores used by the driver in YARN cluster mode.
Since the driver is run in the same JVM as the YARN Application Master in cluster mode, this also controls the cores used by the YARN AM.
In client mode, use <code>spark.yarn.am.cores</code> to control the number of cores used by the YARN AM instead.
</td>
</tr>
<tr>
<td><code>spark.yarn.am.cores</code></td>
<td>1</td>
<td>
Number of cores to use for the YARN Application Master in client mode.
In cluster mode, use <code>spark.driver.cores</code> instead.
</td>
</tr>
<tr>
<td><code>spark.yarn.am.waitTime</code></td>
<td>100000</td>

View file

@ -127,6 +127,7 @@ private[spark] class Client(
}
val capability = Records.newRecord(classOf[Resource])
capability.setMemory(args.amMemory + amMemoryOverhead)
capability.setVirtualCores(args.amCores)
appContext.setResource(capability)
appContext
}

View file

@ -36,14 +36,18 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf)
var numExecutors = DEFAULT_NUMBER_EXECUTORS
var amQueue = sparkConf.get("spark.yarn.queue", "default")
var amMemory: Int = 512 // MB
var amCores: Int = 1
var appName: String = "Spark"
var priority = 0
def isClusterMode: Boolean = userClass != null
private var driverMemory: Int = 512 // MB
private var driverCores: Int = 1
private val driverMemOverheadKey = "spark.yarn.driver.memoryOverhead"
private val amMemKey = "spark.yarn.am.memory"
private val amMemOverheadKey = "spark.yarn.am.memoryOverhead"
private val driverCoresKey = "spark.driver.cores"
private val amCoresKey = "spark.yarn.am.cores"
private val isDynamicAllocationEnabled =
sparkConf.getBoolean("spark.dynamicAllocation.enabled", false)
@ -92,19 +96,25 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf)
"You must specify at least 1 executor!\n" + getUsageMessage())
}
if (isClusterMode) {
for (key <- Seq(amMemKey, amMemOverheadKey)) {
for (key <- Seq(amMemKey, amMemOverheadKey, amCoresKey)) {
if (sparkConf.contains(key)) {
println(s"$key is set but does not apply in cluster mode.")
}
}
amMemory = driverMemory
amCores = driverCores
} else {
if (sparkConf.contains(driverMemOverheadKey)) {
println(s"$driverMemOverheadKey is set but does not apply in client mode.")
for (key <- Seq(driverMemOverheadKey, driverCoresKey)) {
if (sparkConf.contains(key)) {
println(s"$key is set but does not apply in client mode.")
}
}
sparkConf.getOption(amMemKey)
.map(Utils.memoryStringToMb)
.foreach { mem => amMemory = mem }
sparkConf.getOption(amCoresKey)
.map(_.toInt)
.foreach { cores => amCores = cores }
}
}
@ -140,6 +150,10 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf)
driverMemory = value
args = tail
case ("--driver-cores") :: IntParam(value) :: tail =>
driverCores = value
args = tail
case ("--num-workers" | "--num-executors") :: IntParam(value) :: tail =>
if (args(0) == "--num-workers") {
println("--num-workers is deprecated. Use --num-executors instead.")
@ -198,7 +212,8 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf)
private def getUsageMessage(unknownParam: List[String] = null): String = {
val message = if (unknownParam != null) s"Unknown/unsupported param $unknownParam\n" else ""
message + """
message +
"""
|Usage: org.apache.spark.deploy.yarn.Client [options]
|Options:
| --jar JAR_PATH Path to your application's JAR file (required in yarn-cluster
@ -209,6 +224,7 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf)
| --num-executors NUM Number of executors to start (Default: 2)
| --executor-cores NUM Number of cores for the executors (Default: 1).
| --driver-memory MEM Memory for driver (e.g. 1000M, 2G) (Default: 512 Mb)
| --driver-cores NUM Number of cores used by the driver (Default: 1).
| --executor-memory MEM Memory per executor (e.g. 1000M, 2G) (Default: 1G)
| --name NAME The name of your application (Default: Spark)
| --queue QUEUE The hadoop queue to use for allocation requests (Default: