[SPARK-31092][YARN][DOC] Add version information to the configuration of Yarn

### What changes were proposed in this pull request?
Add version information to the configuration of `Yarn`.

I sorted out some information show below.

Item name | Since version | JIRA ID | Commit ID | Note
-- | -- | -- | -- | --
spark.yarn.tags | 1.5.0 | SPARK-9782 | 9b731fad2b43ca18f3c5274062d4c7bc2622ab72#diff-b050df3f55b82065803d6e83453b9706 |  
spark.yarn.priority | 3.0.0 | SPARK-29603 | 4615769736f4c052ae1a2de26e715e229154cd2f#diff-4804e0f83ca7f891183eb0db229b4b9a |  
spark.yarn.am.attemptFailuresValidityInterval | 1.6.0 | SPARK-10739 | f97e9323b526b3d0b0fee0ca03f4276f37bb5750#diff-b050df3f55b82065803d6e83453b9706 |
spark.yarn.executor.failuresValidityInterval | 2.0.0 | SPARK-6735 | 8b44bd52fa40c0fc7d34798c3654e31533fd3008#diff-14b8ed2ef4e3da985300b8d796a38fa9 |
spark.yarn.maxAppAttempts | 1.3.0 | SPARK-2165 | 8fdd48959c93b9cf809f03549e2ae6c4687d1fcd#diff-b050df3f55b82065803d6e83453b9706 |
spark.yarn.user.classpath.first | 1.3.0 | SPARK-5087 | 8d45834debc6986e61831d0d6e982d5528dccc51#diff-b050df3f55b82065803d6e83453b9706 |  
spark.yarn.config.gatewayPath | 1.5.0 | SPARK-8302 | 37bf76a2de2143ec6348a3d43b782227849520cc#diff-b050df3f55b82065803d6e83453b9706 |  
spark.yarn.config.replacementPath | 1.5.0 | SPARK-8302 | 37bf76a2de2143ec6348a3d43b782227849520cc#diff-b050df3f55b82065803d6e83453b9706 |  
spark.yarn.queue | 1.0.0 | SPARK-1126 | 1617816090e7b20124a512a43860a21232ebf511#diff-ae6a41a938a767e5bb97b5d738371a5b |  
spark.yarn.historyServer.address | 1.0.0 | SPARK-1408 | 0058b5d2c74147d24b127a5432f89ebc7050dc18#diff-923ae58523a12397f74dd590744b8b41 |  
spark.yarn.historyServer.allowTracking | 2.2.0 | SPARK-19554 | 4661d30b988bf773ab45a15b143efb2908d33743#diff-4804e0f83ca7f891183eb0db229b4b9a |
spark.yarn.archive | 2.0.0 | SPARK-13577 | 07f1c5447753a3d593cd6ececfcb03c11b1cf8ff#diff-14b8ed2ef4e3da985300b8d796a38fa9 |  
spark.yarn.jars | 2.0.0 | SPARK-13577 | 07f1c5447753a3d593cd6ececfcb03c11b1cf8ff#diff-14b8ed2ef4e3da985300b8d796a38fa9 |  
spark.yarn.dist.archives | 1.0.0 | SPARK-1126 | 1617816090e7b20124a512a43860a21232ebf511#diff-ae6a41a938a767e5bb97b5d738371a5b |  
spark.yarn.dist.files | 1.0.0 | SPARK-1126 | 1617816090e7b20124a512a43860a21232ebf511#diff-ae6a41a938a767e5bb97b5d738371a5b |  
spark.yarn.dist.jars | 2.0.0 | SPARK-12343 | 8ba2b7f28fee39c4839e5ea125bd25f5091a3a1e#diff-14b8ed2ef4e3da985300b8d796a38fa9 |  
spark.yarn.preserve.staging.files | 1.1.0 | SPARK-2933 | b92d823ad13f6fcc325eeb99563bea543871c6aa#diff-85a1f4b2810b3e11b8434dcefac5bb85 |  
spark.yarn.submit.file.replication | 0.8.1 | None | 4668fcb9ff8f9c176c4866480d52dde5d67c8522#diff-b050df3f55b82065803d6e83453b9706 |
spark.yarn.submit.waitAppCompletion | 1.4.0 | SPARK-3591 | b65bad65c3500475b974ca0219f218eef296db2c#diff-b050df3f55b82065803d6e83453b9706 |
spark.yarn.report.interval | 0.9.0 | None | ebdfa6bb9766209bc5a3c4241fa47141c5e9c5cb#diff-e0a7ae95b6d8e04a67ebca0945d27b65 |  
spark.yarn.clientLaunchMonitorInterval | 2.3.0 | SPARK-16019 | 1cad31f00644d899d8e74d58c6eb4e9f72065473#diff-4804e0f83ca7f891183eb0db229b4b9a |
spark.yarn.am.waitTime | 1.3.0 | SPARK-3779 | 253b72b56fe908bbab5d621eae8a5f359c639dfd#diff-87125050a2e2eaf87ea83aac9c19b200 |  
spark.yarn.metrics.namespace | 2.4.0 | SPARK-24594 | d2436a85294a178398525c37833dae79d45c1452#diff-4804e0f83ca7f891183eb0db229b4b9a |
spark.yarn.am.nodeLabelExpression | 1.6.0 | SPARK-7173 | 7db3610327d0725ec2ad378bc873b127a59bb87a#diff-b050df3f55b82065803d6e83453b9706 |
spark.yarn.containerLauncherMaxThreads | 1.2.0 | SPARK-1713 | 1f4a648d4e30e837d6cf3ea8de1808e2254ad70b#diff-801a04f9e67321f3203399f7f59234c1 |  
spark.yarn.max.executor.failures | 1.0.0 | SPARK-1183 | 698373211ef3cdf841c82d48168cd5dbe00a57b4#diff-0c239e58b37779967e0841fb42f3415a |  
spark.yarn.scheduler.reporterThread.maxFailures | 1.2.0 | SPARK-3304 | 11c10df825419372df61a8d23c51e8c3cc78047f#diff-85a1f4b2810b3e11b8434dcefac5bb85 |  
spark.yarn.scheduler.heartbeat.interval-ms | 0.8.1 | None | ee22be0e6c302fb2cdb24f83365c2b8a43a1baab#diff-87125050a2e2eaf87ea83aac9c19b200 |  
spark.yarn.scheduler.initial-allocation.interval | 1.4.0 | SPARK-7533 | 3ddf051ee7256f642f8a17768d161c7b5f55c7e1#diff-87125050a2e2eaf87ea83aac9c19b200 |  
spark.yarn.am.finalMessageLimit | 2.4.0 | SPARK-25174 | f8346d2fc01f1e881e4e3f9c4499bf5f9e3ceb3f#diff-4804e0f83ca7f891183eb0db229b4b9a |  
spark.yarn.am.cores | 1.3.0 | SPARK-1507 | 2be82b1e66cd188456bbf1e5abb13af04d1629d5#diff-746d34aa06bfa57adb9289011e725472 |  
spark.yarn.am.extraJavaOptions | 1.3.0 | SPARK-5087 | 8d45834debc6986e61831d0d6e982d5528dccc51#diff-b050df3f55b82065803d6e83453b9706 |  
spark.yarn.am.extraLibraryPath | 1.4.0 | SPARK-7281 | 7b5dd3e3c0030087eea5a8224789352c03717c1d#diff-b050df3f55b82065803d6e83453b9706 |  
spark.yarn.am.memoryOverhead | 1.3.0 | SPARK-1953 | e96645206006a009e5c1a23bbd177dcaf3ef9b83#diff-746d34aa06bfa57adb9289011e725472 |  
spark.yarn.am.memory | 1.3.0 | SPARK-1953 | e96645206006a009e5c1a23bbd177dcaf3ef9b83#diff-746d34aa06bfa57adb9289011e725472 |  
spark.driver.appUIAddress | 1.1.0 | SPARK-1291 | 72ea56da8e383c61c6f18eeefef03b9af00f5158#diff-2b4617e158e9c5999733759550440b96 |  
spark.yarn.executor.nodeLabelExpression | 1.4.0 | SPARK-6470 | 82fee9d9aad2c9ba2fb4bd658579fe99218cafac#diff-d4620cf162e045960d84c88b2e0aa428 |  
spark.yarn.unmanagedAM.enabled | 3.0.0 | SPARK-22404 | f06bc0cd1dee2a58e04ebf24bf719a2f7ef2dc4e#diff-4804e0f83ca7f891183eb0db229b4b9a |  
spark.yarn.rolledLog.includePattern | 2.0.0 | SPARK-15990 | 272a2f78f3ff801b94a81fa8fcc6633190eaa2f4#diff-14b8ed2ef4e3da985300b8d796a38fa9 |  
spark.yarn.rolledLog.excludePattern | 2.0.0 | SPARK-15990 | 272a2f78f3ff801b94a81fa8fcc6633190eaa2f4#diff-14b8ed2ef4e3da985300b8d796a38fa9 |  
spark.yarn.user.jar | 1.1.0 | SPARK-1395 | e380767de344fd6898429de43da592658fd86a39#diff-50e237ea17ce94c3ccfc44143518a5f7 |  
spark.yarn.secondary.jars | 0.9.2 | SPARK-1870 | 1d3aab96120c6770399e78a72b5692cf8f61a144#diff-50b743cff4885220c828b16c44eeecfd |  
spark.yarn.cache.filenames | 2.0.0 | SPARK-14602 | f47dbf27fa034629fab12d0f3c89ab75edb03f86#diff-14b8ed2ef4e3da985300b8d796a38fa9 |  
spark.yarn.cache.sizes | 2.0.0 | SPARK-14602 | f47dbf27fa034629fab12d0f3c89ab75edb03f86#diff-14b8ed2ef4e3da985300b8d796a38fa9 |  
spark.yarn.cache.timestamps | 2.0.0 | SPARK-14602 | f47dbf27fa034629fab12d0f3c89ab75edb03f86#diff-14b8ed2ef4e3da985300b8d796a38fa9 |  
spark.yarn.cache.visibilities | 2.0.0 | SPARK-14602 | f47dbf27fa034629fab12d0f3c89ab75edb03f86#diff-14b8ed2ef4e3da985300b8d796a38fa9 |  
spark.yarn.cache.types | 2.0.0 | SPARK-14602 | f47dbf27fa034629fab12d0f3c89ab75edb03f86#diff-14b8ed2ef4e3da985300b8d796a38fa9 |  
spark.yarn.cache.confArchive | 2.0.0 | SPARK-14602 | f47dbf27fa034629fab12d0f3c89ab75edb03f86#diff-14b8ed2ef4e3da985300b8d796a38fa9 |  
spark.yarn.blacklist.executor.launch.blacklisting.enabled | 2.4.0 | SPARK-16630 | b56e9c613fb345472da3db1a567ee129621f6bf3#diff-4804e0f83ca7f891183eb0db229b4b9a |  
spark.yarn.exclude.nodes | 3.0.0 | SPARK-26688 | caceaec93203edaea1d521b88e82ef67094cdea9#diff-4804e0f83ca7f891183eb0db229b4b9a |  
The following appears in the document |   |   |   |  
spark.yarn.am.resource.{resource-type}.amount | 3.0.0 | SPARK-20327 | 3946de773498621f88009c309254b019848ed490#diff-4804e0f83ca7f891183eb0db229b4b9a |  
spark.yarn.driver.resource.{resource-type}.amount | 3.0.0 | SPARK-20327 | 3946de773498621f88009c309254b019848ed490#diff-4804e0f83ca7f891183eb0db229b4b9a |  
spark.yarn.executor.resource.{resource-type}.amount | 3.0.0 | SPARK-20327 | 3946de773498621f88009c309254b019848ed490#diff-4804e0f83ca7f891183eb0db229b4b9a |  
spark.yarn.appMasterEnv.[EnvironmentVariableName] | 1.1.0 | SPARK-1680 | 7b798e10e214cd407d3399e2cab9e3789f9a929e#diff-50e237ea17ce94c3ccfc44143518a5f7 |  
spark.yarn.kerberos.relogin.period | 2.3.0 | SPARK-22290 | dc2714da50ecba1bf1fdf555a82a4314f763a76e#diff-4804e0f83ca7f891183eb0db229b4b9a |  

### Why are the changes needed?
Supplemental configuration version information.

### Does this PR introduce any user-facing change?
'No'.

### How was this patch tested?
Exists UT

Closes #27856 from beliefer/add-version-to-yarn-config.

Authored-by: beliefer <beliefer@163.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
beliefer 2020-03-12 09:52:57 +09:00 committed by HyukjinKwon
parent c1b2675f2e
commit 0722dc5fb8
2 changed files with 107 additions and 13 deletions

View file

@ -130,7 +130,7 @@ To use a custom metrics.properties for the application master and executors, upd
#### Spark Properties
<table class="table">
<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
<tr>
<td><code>spark.yarn.am.memory</code></td>
<td><code>512m</code></td>
@ -140,6 +140,7 @@ To use a custom metrics.properties for the application master and executors, upd
<p/>
Use lower-case suffixes, e.g. <code>k</code>, <code>m</code>, <code>g</code>, <code>t</code>, and <code>p</code>, for kibi-, mebi-, gibi-, tebi-, and pebibytes, respectively.
</td>
<td>1.3.0</td>
</tr>
<tr>
<td><code>spark.yarn.am.resource.{resource-type}.amount</code></td>
@ -153,6 +154,7 @@ To use a custom metrics.properties for the application master and executors, upd
Example:
To request GPU resources from YARN, use: <code>spark.yarn.am.resource.yarn.io/gpu.amount</code>
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.driver.resource.{resource-type}.amount</code></td>
@ -165,18 +167,20 @@ To use a custom metrics.properties for the application master and executors, upd
Example:
To request GPU resources from YARN, use: <code>spark.yarn.driver.resource.yarn.io/gpu.amount</code>
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.executor.resource.{resource-type}.amount</code></td>
<td><code>(none)</code></td>
<td>
Amount of resource to use per executor process.
Please note that this feature can be used only with YARN 3.0+
For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/r3.0.1/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
<p/>
Example:
To request GPU resources from YARN, use: <code>spark.yarn.executor.resource.yarn.io/gpu.amount</code>
</td>
<td>
Amount of resource to use per executor process.
Please note that this feature can be used only with YARN 3.0+
For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/r3.0.1/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
<p/>
Example:
To request GPU resources from YARN, use: <code>spark.yarn.executor.resource.yarn.io/gpu.amount</code>
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.am.cores</code></td>
@ -185,6 +189,7 @@ To use a custom metrics.properties for the application master and executors, upd
Number of cores to use for the YARN Application Master in client mode.
In cluster mode, use <code>spark.driver.cores</code> instead.
</td>
<td>1.3.0</td>
</tr>
<tr>
<td><code>spark.yarn.am.waitTime</code></td>
@ -193,6 +198,7 @@ To use a custom metrics.properties for the application master and executors, upd
Only used in <code>cluster</code> mode. Time for the YARN Application Master to wait for the
SparkContext to be initialized.
</td>
<td>1.3.0</td>
</tr>
<tr>
<td><code>spark.yarn.submit.file.replication</code></td>
@ -200,6 +206,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
HDFS replication level for the files uploaded into HDFS for the application. These include things like the Spark jar, the app jar, and any distributed cache files/archives.
</td>
<td>0.8.1</td>
</tr>
<tr>
<td><code>spark.yarn.stagingDir</code></td>
@ -207,6 +214,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
Staging directory used while submitting applications.
</td>
<td>2.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.preserve.staging.files</code></td>
@ -214,6 +222,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
Set to <code>true</code> to preserve the staged files (Spark jar, app jar, distributed cache files) at the end of the job rather than delete them.
</td>
<td>1.1.0</td>
</tr>
<tr>
<td><code>spark.yarn.scheduler.heartbeat.interval-ms</code></td>
@ -223,6 +232,7 @@ To use a custom metrics.properties for the application master and executors, upd
The value is capped at half the value of YARN's configuration for the expiry interval, i.e.
<code>yarn.am.liveness-monitor.expiry-interval-ms</code>.
</td>
<td>0.8.1</td>
</tr>
<tr>
<td><code>spark.yarn.scheduler.initial-allocation.interval</code></td>
@ -234,6 +244,7 @@ To use a custom metrics.properties for the application master and executors, upd
successive eager heartbeats if pending containers still exist, until
<code>spark.yarn.scheduler.heartbeat.interval-ms</code> is reached.
</td>
<td>1.4.0</td>
</tr>
<tr>
<td><code>spark.yarn.max.executor.failures</code></td>
@ -241,6 +252,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
The maximum number of executor failures before failing the application.
</td>
<td>1.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.historyServer.address</code></td>
@ -249,6 +261,7 @@ To use a custom metrics.properties for the application master and executors, upd
The address of the Spark history server, e.g. <code>host.com:18080</code>. The address should not contain a scheme (<code>http://</code>). Defaults to not being set since the history server is an optional service. This address is given to the YARN ResourceManager when the Spark application finishes to link the application from the ResourceManager UI to the Spark history server UI.
For this property, YARN properties can be used as variables, and these are substituted by Spark at runtime. For example, if the Spark history server runs on the same node as the YARN ResourceManager, it can be set to <code>${hadoopconf-yarn.resourcemanager.hostname}:18080</code>.
</td>
<td>1.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.dist.archives</code></td>
@ -256,6 +269,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
Comma separated list of archives to be extracted into the working directory of each executor.
</td>
<td>1.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.dist.files</code></td>
@ -263,6 +277,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
Comma-separated list of files to be placed in the working directory of each executor.
</td>
<td>1.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.dist.jars</code></td>
@ -270,6 +285,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
Comma-separated list of jars to be placed in the working directory of each executor.
</td>
<td>2.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.dist.forceDownloadSchemes</code></td>
@ -280,6 +296,7 @@ To use a custom metrics.properties for the application master and executors, upd
support schemes that are supported by Spark, like http, https and ftp, or jars required to be in the
local YARN client's classpath. Wildcard '*' is denoted to download resources for all the schemes.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.executor.instances</code></td>
@ -287,6 +304,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
The number of executors for static allocation. With <code>spark.dynamicAllocation.enabled</code>, the initial set of executors will be at least this large.
</td>
<td>1.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.am.memoryOverhead</code></td>
@ -294,6 +312,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
Same as <code>spark.driver.memoryOverhead</code>, but for the YARN Application Master in client mode.
</td>
<td>1.3.0</td>
</tr>
<tr>
<td><code>spark.yarn.queue</code></td>
@ -301,6 +320,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
The name of the YARN queue to which the application is submitted.
</td>
<td>1.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.jars</code></td>
@ -312,6 +332,7 @@ To use a custom metrics.properties for the application master and executors, upd
need to be distributed each time an application runs. To point to jars on HDFS, for example,
set this configuration to <code>hdfs:///some/path</code>. Globs are allowed.
</td>
<td>2.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.archive</code></td>
@ -323,6 +344,7 @@ To use a custom metrics.properties for the application master and executors, upd
Like with the previous option, the archive can also be hosted on HDFS to speed up file
distribution.
</td>
<td>2.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.appMasterEnv.[EnvironmentVariableName]</code></td>
@ -334,6 +356,7 @@ To use a custom metrics.properties for the application master and executors, upd
the environment of the Spark driver and in <code>client</code> mode it only controls
the environment of the executor launcher.
</td>
<td>1.1.0</td>
</tr>
<tr>
<td><code>spark.yarn.containerLauncherMaxThreads</code></td>
@ -341,6 +364,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
The maximum number of threads to use in the YARN Application Master for launching executor containers.
</td>
<td>1.2.0</td>
</tr>
<tr>
<td><code>spark.yarn.am.extraJavaOptions</code></td>
@ -351,6 +375,7 @@ To use a custom metrics.properties for the application master and executors, upd
to set maximum heap size (-Xmx) settings with this option. Maximum heap size settings can be set
with <code>spark.yarn.am.memory</code>
</td>
<td>1.3.0</td>
</tr>
<tr>
<td><code>spark.yarn.am.extraLibraryPath</code></td>
@ -358,6 +383,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
Set a special library path to use when launching the YARN Application Master in client mode.
</td>
<td>1.4.0</td>
</tr>
<tr>
<td><code>spark.yarn.maxAppAttempts</code></td>
@ -366,6 +392,7 @@ To use a custom metrics.properties for the application master and executors, upd
The maximum number of attempts that will be made to submit the application.
It should be no larger than the global number of max attempts in the YARN configuration.
</td>
<td>1.3.0</td>
</tr>
<tr>
<td><code>spark.yarn.am.attemptFailuresValidityInterval</code></td>
@ -375,6 +402,7 @@ To use a custom metrics.properties for the application master and executors, upd
If the AM has been running for at least the defined interval, the AM failure count will be reset.
This feature is not enabled if not configured.
</td>
<td>1.6.0</td>
</tr>
<tr>
<td><code>spark.yarn.executor.failuresValidityInterval</code></td>
@ -383,6 +411,7 @@ To use a custom metrics.properties for the application master and executors, upd
Defines the validity interval for executor failure tracking.
Executor failures which are older than the validity interval will be ignored.
</td>
<td>2.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.submit.waitAppCompletion</code></td>
@ -392,6 +421,7 @@ To use a custom metrics.properties for the application master and executors, upd
If set to <code>true</code>, the client process will stay alive reporting the application's status.
Otherwise, the client process will exit after submission.
</td>
<td>1.4.0</td>
</tr>
<tr>
<td><code>spark.yarn.am.nodeLabelExpression</code></td>
@ -401,6 +431,7 @@ To use a custom metrics.properties for the application master and executors, upd
Only versions of YARN greater than or equal to 2.6 support node label expressions, so when
running against earlier versions, this property will be ignored.
</td>
<td>1.6.0</td>
</tr>
<tr>
<td><code>spark.yarn.executor.nodeLabelExpression</code></td>
@ -410,6 +441,7 @@ To use a custom metrics.properties for the application master and executors, upd
Only versions of YARN greater than or equal to 2.6 support node label expressions, so when
running against earlier versions, this property will be ignored.
</td>
<td>1.4.0</td>
</tr>
<tr>
<td><code>spark.yarn.tags</code></td>
@ -418,6 +450,7 @@ To use a custom metrics.properties for the application master and executors, upd
Comma-separated list of strings to pass through as YARN application tags appearing
in YARN ApplicationReports, which can be used for filtering when querying YARN apps.
</td>
<td>1.5.0</td>
</tr>
<tr>
<td><code>spark.yarn.priority</code></td>
@ -427,6 +460,7 @@ To use a custom metrics.properties for the application master and executors, upd
integer value have a better opportunity to be activated. Currently, YARN only supports application
priority when using FIFO ordering policy.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.config.gatewayPath</code></td>
@ -446,6 +480,7 @@ To use a custom metrics.properties for the application master and executors, upd
<code>$HADOOP_HOME</code> will make sure that paths used to launch remote processes properly
reference the local YARN configuration.
</td>
<td>1.5.0</td>
</tr>
<tr>
<td><code>spark.yarn.config.replacementPath</code></td>
@ -453,6 +488,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
See <code>spark.yarn.config.gatewayPath</code>.
</td>
<td>1.5.0</td>
</tr>
<tr>
<td><code>spark.yarn.rolledLog.includePattern</code></td>
@ -467,6 +503,7 @@ To use a custom metrics.properties for the application master and executors, upd
on the file name configured in the log4j configuration (like spark.log), the user should set the
regex (spark*) to include all the log files that need to be aggregated.
</td>
<td>2.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.rolledLog.excludePattern</code></td>
@ -476,6 +513,7 @@ To use a custom metrics.properties for the application master and executors, upd
and those log files will not be aggregated in a rolling fashion. If the log file
name matches both the include and the exclude pattern, this file will be excluded eventually.
</td>
<td>2.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.blacklist.executor.launch.blacklisting.enabled</code></td>
@ -485,6 +523,7 @@ To use a custom metrics.properties for the application master and executors, upd
The error limit for blacklisting can be configured by
<code>spark.blacklist.application.maxFailedExecutorsPerNode</code>.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.yarn.exclude.nodes</code></td>
@ -492,6 +531,7 @@ To use a custom metrics.properties for the application master and executors, upd
<td>
Comma-separated list of YARN node names which are excluded from resource allocation.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.metrics.namespace</code></td>
@ -500,6 +540,7 @@ To use a custom metrics.properties for the application master and executors, upd
The root namespace for AM metrics reporting.
If it is not set then the YARN application ID is used.
</td>
<td>2.4.0</td>
</tr>
</table>
@ -583,7 +624,7 @@ staging directory of the Spark application.
## YARN-specific Kerberos Configuration
<table class="table">
<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
<tr>
<td><code>spark.kerberos.keytab</code></td>
<td>(none)</td>
@ -595,6 +636,7 @@ staging directory of the Spark application.
<br /> (Works also with the "local" master.)
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kerberos.principal</code></td>
@ -605,6 +647,7 @@ staging directory of the Spark application.
<br /> (Works also with the "local" master.)
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.yarn.kerberos.relogin.period</code></td>
@ -614,6 +657,7 @@ staging directory of the Spark application.
that is shorter than the TGT renewal period (or the TGT lifetime if TGT renewal is not enabled).
The default value should be enough for most deployments.
</td>
<td>2.3.0</td>
</tr>
</table>

View file

@ -29,6 +29,7 @@ package object config {
private[spark] val APPLICATION_TAGS = ConfigBuilder("spark.yarn.tags")
.doc("Comma-separated list of strings to pass through as YARN application tags appearing " +
"in YARN Application Reports, which can be used for filtering when querying YARN.")
.version("1.5.0")
.stringConf
.toSequence
.createOptional
@ -37,6 +38,7 @@ package object config {
.doc("Application priority for YARN to define pending applications ordering policy, those" +
" with higher value have a better opportunity to be activated. Currently, YARN only" +
" supports application priority when using FIFO ordering policy.")
.version("3.0.0")
.intConf
.createOptional
@ -44,6 +46,7 @@ package object config {
ConfigBuilder("spark.yarn.am.attemptFailuresValidityInterval")
.doc("Interval after which AM failures will be considered independent and " +
"not accumulate towards the attempt count.")
.version("1.6.0")
.timeConf(TimeUnit.MILLISECONDS)
.createOptional
@ -51,36 +54,43 @@ package object config {
ConfigBuilder("spark.yarn.executor.failuresValidityInterval")
.doc("Interval after which Executor failures will be considered independent and not " +
"accumulate towards the attempt count.")
.version("2.0.0")
.timeConf(TimeUnit.MILLISECONDS)
.createOptional
private[spark] val MAX_APP_ATTEMPTS = ConfigBuilder("spark.yarn.maxAppAttempts")
.doc("Maximum number of AM attempts before failing the app.")
.version("1.3.0")
.intConf
.createOptional
private[spark] val USER_CLASS_PATH_FIRST = ConfigBuilder("spark.yarn.user.classpath.first")
.doc("Whether to place user jars in front of Spark's classpath.")
.version("1.3.0")
.booleanConf
.createWithDefault(false)
private[spark] val GATEWAY_ROOT_PATH = ConfigBuilder("spark.yarn.config.gatewayPath")
.doc("Root of configuration paths that is present on gateway nodes, and will be replaced " +
"with the corresponding path in cluster machines.")
.version("1.5.0")
.stringConf
.createWithDefault(null)
private[spark] val REPLACEMENT_ROOT_PATH = ConfigBuilder("spark.yarn.config.replacementPath")
.doc(s"Path to use as a replacement for ${GATEWAY_ROOT_PATH.key} when launching processes " +
"in the YARN cluster.")
.version("1.5.0")
.stringConf
.createWithDefault(null)
private[spark] val QUEUE_NAME = ConfigBuilder("spark.yarn.queue")
.version("1.0.0")
.stringConf
.createWithDefault("default")
private[spark] val HISTORY_SERVER_ADDRESS = ConfigBuilder("spark.yarn.historyServer.address")
.version("1.0.0")
.stringConf
.createOptional
@ -88,6 +98,7 @@ package object config {
ConfigBuilder("spark.yarn.historyServer.allowTracking")
.doc("Allow using the History Server URL for the application as the tracking URL for the " +
"application when the Web UI is not enabled.")
.version("2.2.0")
.booleanConf
.createWithDefault(false)
@ -95,37 +106,44 @@ package object config {
private[spark] val SPARK_ARCHIVE = ConfigBuilder("spark.yarn.archive")
.doc("Location of archive containing jars files with Spark classes.")
.version("2.0.0")
.stringConf
.createOptional
private[spark] val SPARK_JARS = ConfigBuilder("spark.yarn.jars")
.doc("Location of jars containing Spark classes.")
.version("2.0.0")
.stringConf
.toSequence
.createOptional
private[spark] val ARCHIVES_TO_DISTRIBUTE = ConfigBuilder("spark.yarn.dist.archives")
.version("1.0.0")
.stringConf
.toSequence
.createWithDefault(Nil)
private[spark] val FILES_TO_DISTRIBUTE = ConfigBuilder("spark.yarn.dist.files")
.version("1.0.0")
.stringConf
.toSequence
.createWithDefault(Nil)
private[spark] val JARS_TO_DISTRIBUTE = ConfigBuilder("spark.yarn.dist.jars")
.version("2.0.0")
.stringConf
.toSequence
.createWithDefault(Nil)
private[spark] val PRESERVE_STAGING_FILES = ConfigBuilder("spark.yarn.preserve.staging.files")
.doc("Whether to preserve temporary files created by the job in HDFS.")
.version("1.1.0")
.booleanConf
.createWithDefault(false)
private[spark] val STAGING_FILE_REPLICATION = ConfigBuilder("spark.yarn.submit.file.replication")
.doc("Replication factor for files uploaded by Spark to HDFS.")
.version("0.8.1")
.intConf
.createOptional
@ -134,93 +152,111 @@ package object config {
private[spark] val WAIT_FOR_APP_COMPLETION = ConfigBuilder("spark.yarn.submit.waitAppCompletion")
.doc("In cluster mode, whether to wait for the application to finish before exiting the " +
"launcher process.")
.version("1.4.0")
.booleanConf
.createWithDefault(true)
private[spark] val REPORT_INTERVAL = ConfigBuilder("spark.yarn.report.interval")
.doc("Interval between reports of the current app status.")
.version("0.9.0")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefaultString("1s")
private[spark] val CLIENT_LAUNCH_MONITOR_INTERVAL =
ConfigBuilder("spark.yarn.clientLaunchMonitorInterval")
.doc("Interval between requests for status the client mode AM when starting the app.")
.version("2.3.0")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefaultString("1s")
/* Shared Client-mode AM / Driver configuration. */
private[spark] val AM_MAX_WAIT_TIME = ConfigBuilder("spark.yarn.am.waitTime")
.version("1.3.0")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefaultString("100s")
private[spark] val YARN_METRICS_NAMESPACE = ConfigBuilder("spark.yarn.metrics.namespace")
.doc("The root namespace for AM metrics reporting.")
.version("2.4.0")
.stringConf
.createOptional
private[spark] val AM_NODE_LABEL_EXPRESSION = ConfigBuilder("spark.yarn.am.nodeLabelExpression")
.doc("Node label expression for the AM.")
.version("1.6.0")
.stringConf
.createOptional
private[spark] val CONTAINER_LAUNCH_MAX_THREADS =
ConfigBuilder("spark.yarn.containerLauncherMaxThreads")
.version("1.2.0")
.intConf
.createWithDefault(25)
private[spark] val MAX_EXECUTOR_FAILURES = ConfigBuilder("spark.yarn.max.executor.failures")
.version("1.0.0")
.intConf
.createOptional
private[spark] val MAX_REPORTER_THREAD_FAILURES =
ConfigBuilder("spark.yarn.scheduler.reporterThread.maxFailures")
.version("1.2.0")
.intConf
.createWithDefault(5)
private[spark] val RM_HEARTBEAT_INTERVAL =
ConfigBuilder("spark.yarn.scheduler.heartbeat.interval-ms")
.version("0.8.1")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefaultString("3s")
private[spark] val INITIAL_HEARTBEAT_INTERVAL =
ConfigBuilder("spark.yarn.scheduler.initial-allocation.interval")
.version("1.4.0")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefaultString("200ms")
private[spark] val AM_FINAL_MSG_LIMIT = ConfigBuilder("spark.yarn.am.finalMessageLimit")
.doc("The limit size of final diagnostic message for our ApplicationMaster to unregister from" +
" the ResourceManager.")
.version("2.4.0")
.bytesConf(ByteUnit.BYTE)
.createWithDefaultString("1m")
/* Client-mode AM configuration. */
private[spark] val AM_CORES = ConfigBuilder("spark.yarn.am.cores")
.version("1.3.0")
.intConf
.createWithDefault(1)
private[spark] val AM_JAVA_OPTIONS = ConfigBuilder("spark.yarn.am.extraJavaOptions")
.doc("Extra Java options for the client-mode AM.")
.version("1.3.0")
.stringConf
.createOptional
private[spark] val AM_LIBRARY_PATH = ConfigBuilder("spark.yarn.am.extraLibraryPath")
.doc("Extra native library path for the client-mode AM.")
.version("1.4.0")
.stringConf
.createOptional
private[spark] val AM_MEMORY_OVERHEAD = ConfigBuilder("spark.yarn.am.memoryOverhead")
.version("1.3.0")
.bytesConf(ByteUnit.MiB)
.createOptional
private[spark] val AM_MEMORY = ConfigBuilder("spark.yarn.am.memory")
.version("1.3.0")
.bytesConf(ByteUnit.MiB)
.createWithDefaultString("512m")
/* Driver configuration. */
private[spark] val DRIVER_APP_UI_ADDRESS = ConfigBuilder("spark.driver.appUIAddress")
.version("1.1.0")
.stringConf
.createOptional
@ -229,6 +265,7 @@ package object config {
private[spark] val EXECUTOR_NODE_LABEL_EXPRESSION =
ConfigBuilder("spark.yarn.executor.nodeLabelExpression")
.doc("Node label expression for executors.")
.version("1.4.0")
.stringConf
.createOptional
@ -237,6 +274,7 @@ package object config {
private[spark] val YARN_UNMANAGED_AM = ConfigBuilder("spark.yarn.unmanagedAM.enabled")
.doc("In client mode, whether to launch the Application Master service as part of the client " +
"using unmanaged am.")
.version("3.0.0")
.booleanConf
.createWithDefault(false)
@ -246,6 +284,7 @@ package object config {
ConfigBuilder("spark.yarn.rolledLog.includePattern")
.doc("Java Regex to filter the log files which match the defined include pattern and those " +
"log files will be aggregated in a rolling fashion.")
.version("2.0.0")
.stringConf
.createOptional
@ -253,6 +292,7 @@ package object config {
ConfigBuilder("spark.yarn.rolledLog.excludePattern")
.doc("Java Regex to filter the log files which match the defined exclude pattern and those " +
"log files will not be aggregated in a rolling fashion.")
.version("2.0.0")
.stringConf
.createOptional
@ -261,6 +301,7 @@ package object config {
// Internal config to propagate the location of the user's jar to the driver/executors
private[spark] val APP_JAR = ConfigBuilder("spark.yarn.user.jar")
.internal()
.version("1.1.0")
.stringConf
.createOptional
@ -268,6 +309,7 @@ package object config {
// of the executors
private[spark] val SECONDARY_JARS = ConfigBuilder("spark.yarn.secondary.jars")
.internal()
.version("0.9.2")
.stringConf
.toSequence
.createOptional
@ -276,24 +318,28 @@ package object config {
private[spark] val CACHED_FILES = ConfigBuilder("spark.yarn.cache.filenames")
.internal()
.version("2.0.0")
.stringConf
.toSequence
.createWithDefault(Nil)
private[spark] val CACHED_FILES_SIZES = ConfigBuilder("spark.yarn.cache.sizes")
.internal()
.version("2.0.0")
.longConf
.toSequence
.createWithDefault(Nil)
private[spark] val CACHED_FILES_TIMESTAMPS = ConfigBuilder("spark.yarn.cache.timestamps")
.internal()
.version("2.0.0")
.longConf
.toSequence
.createWithDefault(Nil)
private[spark] val CACHED_FILES_VISIBILITIES = ConfigBuilder("spark.yarn.cache.visibilities")
.internal()
.version("2.0.0")
.stringConf
.toSequence
.createWithDefault(Nil)
@ -301,6 +347,7 @@ package object config {
// Either "file" or "archive", for each file.
private[spark] val CACHED_FILES_TYPES = ConfigBuilder("spark.yarn.cache.types")
.internal()
.version("2.0.0")
.stringConf
.toSequence
.createWithDefault(Nil)
@ -308,20 +355,23 @@ package object config {
// The location of the conf archive in HDFS.
private[spark] val CACHED_CONF_ARCHIVE = ConfigBuilder("spark.yarn.cache.confArchive")
.internal()
.version("2.0.0")
.stringConf
.createOptional
/* YARN allocator-level blacklisting related config entries. */
private[spark] val YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED =
ConfigBuilder("spark.yarn.blacklist.executor.launch.blacklisting.enabled")
.version("2.4.0")
.booleanConf
.createWithDefault(false)
/* Initially blacklisted YARN nodes. */
private[spark] val YARN_EXCLUDE_NODES = ConfigBuilder("spark.yarn.exclude.nodes")
.stringConf
.toSequence
.createWithDefault(Nil)
.version("3.0.0")
.stringConf
.toSequence
.createWithDefault(Nil)
private[yarn] val YARN_EXECUTOR_RESOURCE_TYPES_PREFIX = "spark.yarn.executor.resource."
private[yarn] val YARN_DRIVER_RESOURCE_TYPES_PREFIX = "spark.yarn.driver.resource."