[SPARK-4697][YARN]System properties should override environment variables

I found some arguments in yarn module take environment variables before system properties while the latter override the former in core module.

Author: WangTaoTheTonic <barneystinson@aliyun.com>
Author: WangTao <barneystinson@aliyun.com>

Closes #3557 from WangTaoTheTonic/SPARK4697 and squashes the following commits:

836b9ef [WangTaoTheTonic] fix type mismatch
e3e486a [WangTaoTheTonic] remove the comma
1262d57 [WangTaoTheTonic] handle spark.app.name and SPARK_YARN_APP_NAME in SparkSubmitArguments
bee9447 [WangTaoTheTonic] wrong brace
81833bb [WangTaoTheTonic] rebase
40934b4 [WangTaoTheTonic] just switch blocks
5f43f45 [WangTao] System property can override environment variable
This commit is contained in:
WangTaoTheTonic 2015-01-13 09:43:48 -08:00 committed by Andrew Or
parent f7741a9a72
commit 9dea64e53a
3 changed files with 18 additions and 10 deletions

View file

@ -149,6 +149,11 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
// Global defaults. These should be keep to minimum to avoid confusing behavior.
master = Option(master).getOrElse("local[*]")
// In YARN mode, app name can be set via SPARK_YARN_APP_NAME (see SPARK-5222)
if (master.startsWith("yarn")) {
name = Option(name).orElse(env.get("SPARK_YARN_APP_NAME")).orNull
}
// Set name from main class if not given
name = Option(name).orElse(Option(mainClass)).orNull
if (name == null && primaryResource != null) {

View file

@ -64,12 +64,12 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf)
// For backward compatibility, SPARK_YARN_DIST_{ARCHIVES/FILES} should be resolved to hdfs://,
// while spark.yarn.dist.{archives/files} should be resolved to file:// (SPARK-2051).
files = Option(files)
.orElse(sys.env.get("SPARK_YARN_DIST_FILES"))
.orElse(sparkConf.getOption("spark.yarn.dist.files").map(p => Utils.resolveURIs(p)))
.orElse(sys.env.get("SPARK_YARN_DIST_FILES"))
.orNull
archives = Option(archives)
.orElse(sys.env.get("SPARK_YARN_DIST_ARCHIVES"))
.orElse(sparkConf.getOption("spark.yarn.dist.archives").map(p => Utils.resolveURIs(p)))
.orElse(sys.env.get("SPARK_YARN_DIST_ARCHIVES"))
.orNull
// If dynamic allocation is enabled, start at the max number of executors
if (isDynamicAllocationEnabled) {

View file

@ -74,8 +74,7 @@ private[spark] class YarnClientSchedulerBackend(
("--executor-memory", "SPARK_EXECUTOR_MEMORY", "spark.executor.memory"),
("--executor-cores", "SPARK_WORKER_CORES", "spark.executor.cores"),
("--executor-cores", "SPARK_EXECUTOR_CORES", "spark.executor.cores"),
("--queue", "SPARK_YARN_QUEUE", "spark.yarn.queue"),
("--name", "SPARK_YARN_APP_NAME", "spark.app.name")
("--queue", "SPARK_YARN_QUEUE", "spark.yarn.queue")
)
// Warn against the following deprecated environment variables: env var -> suggestion
val deprecatedEnvVars = Map(
@ -86,18 +85,22 @@ private[spark] class YarnClientSchedulerBackend(
// Do the same for deprecated properties: property -> suggestion
val deprecatedProps = Map("spark.master.memory" -> "--driver-memory through spark-submit")
optionTuples.foreach { case (optionName, envVar, sparkProp) =>
if (System.getenv(envVar) != null) {
extraArgs += (optionName, System.getenv(envVar))
if (deprecatedEnvVars.contains(envVar)) {
logWarning(s"NOTE: $envVar is deprecated. Use ${deprecatedEnvVars(envVar)} instead.")
}
} else if (sc.getConf.contains(sparkProp)) {
if (sc.getConf.contains(sparkProp)) {
extraArgs += (optionName, sc.getConf.get(sparkProp))
if (deprecatedProps.contains(sparkProp)) {
logWarning(s"NOTE: $sparkProp is deprecated. Use ${deprecatedProps(sparkProp)} instead.")
}
} else if (System.getenv(envVar) != null) {
extraArgs += (optionName, System.getenv(envVar))
if (deprecatedEnvVars.contains(envVar)) {
logWarning(s"NOTE: $envVar is deprecated. Use ${deprecatedEnvVars(envVar)} instead.")
}
}
}
// The app name is a special case because "spark.app.name" is required of all applications.
// As a result, the corresponding "SPARK_YARN_APP_NAME" is already handled preemptively in
// SparkSubmitArguments if "spark.app.name" is not explicitly set by the user. (SPARK-5222)
sc.getConf.getOption("spark.app.name").foreach(v => extraArgs += ("--name", v))
extraArgs
}