From c63366a693a67c8e96b949ce7a9761f35d78fbe5 Mon Sep 17 00:00:00 2001 From: beliefer Date: Mon, 2 Mar 2020 15:15:49 +0900 Subject: [PATCH] [SPARK-30891][CORE][DOC] Add version information to the configuration of History MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? 1.Add version information to the configuration of `History`. 2.Update the docs of `History`. I sorted out some information show below. Item name | Since version | JIRA ID | Commit ID | Note -- | -- | -- | -- | -- spark.history.fs.logDirectory | 1.1.0 | SPARK-1768 | 21ddd7d1e9f8e2a726427f32422c31706a20ba3f#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e |   spark.history.fs.safemodeCheck.interval | 1.6.0 | SPARK-11020 | cf04fdfe71abc395163a625cc1f99ec5e54cc07e#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e |   spark.history.fs.update.interval | 1.4.0 | SPARK-6046 | 4527761bcd6501c362baf2780905a0018b9a74ba#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e |   spark.history.fs.cleaner.enabled | 1.3.0 | SPARK-3562 | 8942b522d8a3269a2a357e3a274ed4b3e66ebdde#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e | Branch branch-1.3 does not exist, exists in branch-1.4, but it is 1.3.0-SNAPSHOT in pom.xml spark.history.fs.cleaner.interval | 1.4.0 | SPARK-5933 | 1991337336596f94698e79c2366f065c374128ab#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e | spark.history.fs.cleaner.maxAge | 1.4.0 | SPARK-5933 | 1991337336596f94698e79c2366f065c374128ab#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e | spark.history.fs.cleaner.maxNum | 3.0.0 | SPARK-28294 | bbc2be4f425c4c26450e1bf21db407e81046ce21#diff-6bddeb5e25239974fc13db66266b167b |   spark.history.store.path | 2.3.0 | SPARK-20642 | 74daf622de4e534d5a5929b424a6e836850eefad#diff-19f35f981fdc5b0a46f070b879a9a9fc |   spark.history.store.maxDiskUsage | 2.3.0 | SPARK-20654 | 8b497046c647a21bbed1bdfbdcb176745a1d5cd5#diff-19f35f981fdc5b0a46f070b879a9a9fc |   spark.history.ui.port | 1.0.0 | SPARK-1276 | 9ae80bf9bd3e4da7443af97b41fe26aa5d35d70b#diff-b49b5b9c31ddb36a9061004b5b723058 |   spark.history.fs.inProgressOptimization.enabled | 2.4.0 | SPARK-6951 | 653fe02415a537299e15f92b56045569864b6183#diff-19f35f981fdc5b0a46f070b879a9a9fc |   spark.history.fs.endEventReparseChunkSize | 2.4.0 | SPARK-6951 | 653fe02415a537299e15f92b56045569864b6183#diff-19f35f981fdc5b0a46f070b879a9a9fc |   spark.history.fs.eventLog.rolling.maxFilesToRetain | 3.0.0 | SPARK-30481 | a2fe73b83c0e7c61d1c83b236565a71e3d005a71#diff-6bddeb5e25239974fc13db66266b167b |   spark.history.fs.eventLog.rolling.compaction.score.threshold | 3.0.0 | SPARK-30481 | a2fe73b83c0e7c61d1c83b236565a71e3d005a71#diff-6bddeb5e25239974fc13db66266b167b |   spark.history.fs.driverlog.cleaner.enabled | 3.0.0 | SPARK-25118 | 5f11e8c4cb9a5db037ac239b8fcc97f3a746e772#diff-6bddeb5e25239974fc13db66266b167b |   spark.history.fs.driverlog.cleaner.interval | 3.0.0 | SPARK-25118 | 5f11e8c4cb9a5db037ac239b8fcc97f3a746e772#diff-6bddeb5e25239974fc13db66266b167b |   spark.history.fs.driverlog.cleaner.maxAge | 3.0.0 | SPARK-25118 | 5f11e8c4cb9a5db037ac239b8fcc97f3a746e772#diff-6bddeb5e25239974fc13db66266b167b |   spark.history.ui.acls.enable | 1.0.1 | Spark 1489 | c8dd13221215275948b1a6913192d40e0c8cbadd#diff-b49b5b9c31ddb36a9061004b5b723058 |   spark.history.ui.admin.acls | 2.1.1 | SPARK-19033 | 4ca1788805e4a0131ba8f0ccb7499ee0e0242837#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e |   spark.history.ui.admin.acls.groups | 2.1.1 | SPARK-19033 | 4ca1788805e4a0131ba8f0ccb7499ee0e0242837#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e |   spark.history.fs.numReplayThreads | 2.0.0 | SPARK-13988 | 6fdd0e32a6c3fdce1f3f7e1f8d252af05c419f7b#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e |   spark.history.retainedApplications | 1.0.0 | SPARK-1276 | 9ae80bf9bd3e4da7443af97b41fe26aa5d35d70b#diff-b49b5b9c31ddb36a9061004b5b723058 | spark.history.provider | 1.1.0 | SPARK-1768 | 21ddd7d1e9f8e2a726427f32422c31706a20ba3f#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e |   spark.history.kerberos.enabled | 1.0.1 | Spark-1490 | 866b03ef4d27b2160563b58d577de29ba6eb4442#diff-b49b5b9c31ddb36a9061004b5b723058 |   spark.history.kerberos.principal | 1.0.1 | Spark-1490 | 866b03ef4d27b2160563b58d577de29ba6eb4442#diff-b49b5b9c31ddb36a9061004b5b723058 |   spark.history.kerberos.keytab | 1.0.1 | Spark-1490 | 866b03ef4d27b2160563b58d577de29ba6eb4442#diff-b49b5b9c31ddb36a9061004b5b723058 |   spark.history.custom.executor.log.url | 3.0.0 | SPARK-26311 | ae5b2a6a92be4986ef5b8062d7fb59318cff6430#diff-6bddeb5e25239974fc13db66266b167b |   spark.history.custom.executor.log.url.applyIncompleteApplication | 3.0.0 | SPARK-26311 | ae5b2a6a92be4986ef5b8062d7fb59318cff6430#diff-6bddeb5e25239974fc13db66266b167b |   ### Why are the changes needed? Supplemental configuration version information. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Exists UT Closes #27751 from beliefer/add-version-to-history-config. Authored-by: beliefer Signed-off-by: HyukjinKwon --- .../spark/internal/config/History.scala | 28 +++++++++++++++++++ docs/monitoring.md | 26 ++++++++++++++++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/History.scala b/core/src/main/scala/org/apache/spark/internal/config/History.scala index 8f99908507..581777de36 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/History.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/History.scala @@ -26,46 +26,56 @@ private[spark] object History { val DEFAULT_LOG_DIR = "file:/tmp/spark-events" val HISTORY_LOG_DIR = ConfigBuilder("spark.history.fs.logDirectory") + .version("1.1.0") .stringConf .createWithDefault(DEFAULT_LOG_DIR) val SAFEMODE_CHECK_INTERVAL_S = ConfigBuilder("spark.history.fs.safemodeCheck.interval") + .version("1.6.0") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("5s") val UPDATE_INTERVAL_S = ConfigBuilder("spark.history.fs.update.interval") + .version("1.4.0") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("10s") val CLEANER_ENABLED = ConfigBuilder("spark.history.fs.cleaner.enabled") + .version("1.4.0") .booleanConf .createWithDefault(false) val CLEANER_INTERVAL_S = ConfigBuilder("spark.history.fs.cleaner.interval") + .version("1.4.0") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("1d") val MAX_LOG_AGE_S = ConfigBuilder("spark.history.fs.cleaner.maxAge") + .version("1.4.0") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("7d") val MAX_LOG_NUM = ConfigBuilder("spark.history.fs.cleaner.maxNum") .doc("The maximum number of log files in the event log directory.") + .version("3.0.0") .intConf .createWithDefault(Int.MaxValue) val LOCAL_STORE_DIR = ConfigBuilder("spark.history.store.path") .doc("Local directory where to cache application history information. By default this is " + "not set, meaning all history information will be kept in memory.") + .version("2.3.0") .stringConf .createOptional val MAX_LOCAL_DISK_USAGE = ConfigBuilder("spark.history.store.maxDiskUsage") + .version("2.3.0") .bytesConf(ByteUnit.BYTE) .createWithDefaultString("10g") val HISTORY_SERVER_UI_PORT = ConfigBuilder("spark.history.ui.port") .doc("Web UI port to bind Spark History Server") + .version("1.0.0") .intConf .createWithDefault(18080) @@ -73,6 +83,7 @@ private[spark] object History { ConfigBuilder("spark.history.fs.inProgressOptimization.enabled") .doc("Enable optimized handling of in-progress logs. This option may leave finished " + "applications that fail to rename their event logs listed as in-progress.") + .version("2.4.0") .booleanConf .createWithDefault(true) @@ -81,6 +92,7 @@ private[spark] object History { .doc("How many bytes to parse at the end of log files looking for the end event. " + "This is used to speed up generation of application listings by skipping unnecessary " + "parts of event log files. It can be disabled by setting this config to 0.") + .version("2.4.0") .bytesConf(ByteUnit.BYTE) .createWithDefaultString("1m") @@ -90,6 +102,7 @@ private[spark] object History { "By default, all event log files will be retained. Please set the configuration " + s"and ${EVENT_LOG_ROLLING_MAX_FILE_SIZE.key} accordingly if you want to control " + "the overall size of event log files.") + .version("3.0.0") .intConf .checkValue(_ > 0, "Max event log files to retain should be higher than 0.") .createWithDefault(Integer.MAX_VALUE) @@ -99,54 +112,67 @@ private[spark] object History { .doc("The threshold score to determine whether it's good to do the compaction or not. " + "The compaction score is calculated in analyzing, and being compared to this value. " + "Compaction will proceed only when the score is higher than the threshold value.") + .version("3.0.0") .internal() .doubleConf .createWithDefault(0.7d) val DRIVER_LOG_CLEANER_ENABLED = ConfigBuilder("spark.history.fs.driverlog.cleaner.enabled") + .version("3.0.0") .fallbackConf(CLEANER_ENABLED) val DRIVER_LOG_CLEANER_INTERVAL = ConfigBuilder("spark.history.fs.driverlog.cleaner.interval") + .version("3.0.0") .fallbackConf(CLEANER_INTERVAL_S) val MAX_DRIVER_LOG_AGE_S = ConfigBuilder("spark.history.fs.driverlog.cleaner.maxAge") + .version("3.0.0") .fallbackConf(MAX_LOG_AGE_S) val HISTORY_SERVER_UI_ACLS_ENABLE = ConfigBuilder("spark.history.ui.acls.enable") + .version("1.0.1") .booleanConf .createWithDefault(false) val HISTORY_SERVER_UI_ADMIN_ACLS = ConfigBuilder("spark.history.ui.admin.acls") + .version("2.1.1") .stringConf .toSequence .createWithDefault(Nil) val HISTORY_SERVER_UI_ADMIN_ACLS_GROUPS = ConfigBuilder("spark.history.ui.admin.acls.groups") + .version("2.1.1") .stringConf .toSequence .createWithDefault(Nil) val NUM_REPLAY_THREADS = ConfigBuilder("spark.history.fs.numReplayThreads") + .version("2.0.0") .intConf .createWithDefaultFunction(() => Math.ceil(Runtime.getRuntime.availableProcessors() / 4f).toInt) val RETAINED_APPLICATIONS = ConfigBuilder("spark.history.retainedApplications") + .version("1.0.0") .intConf .createWithDefault(50) val PROVIDER = ConfigBuilder("spark.history.provider") + .version("1.1.0") .stringConf .createOptional val KERBEROS_ENABLED = ConfigBuilder("spark.history.kerberos.enabled") + .version("1.0.1") .booleanConf .createWithDefault(false) val KERBEROS_PRINCIPAL = ConfigBuilder("spark.history.kerberos.principal") + .version("1.0.1") .stringConf .createOptional val KERBEROS_KEYTAB = ConfigBuilder("spark.history.kerberos.keytab") + .version("1.0.1") .stringConf .createOptional @@ -156,6 +182,7 @@ private[spark] object History { "some path variables via patterns which can vary on cluster manager. Please check the " + "documentation for your cluster manager to see which patterns are supported, if any. " + "This configuration has no effect on a live application, it only affects the history server.") + .version("3.0.0") .stringConf .createOptional @@ -165,6 +192,7 @@ private[spark] object History { s"${CUSTOM_EXECUTOR_LOG_URL.key}, to incomplete application as well. " + "Even if this is true, this still only affects the behavior of the history server, " + "not running spark applications.") + .version("3.0.0") .booleanConf .createWithDefault(true) } diff --git a/docs/monitoring.md b/docs/monitoring.md index 4cba15b35e..016d357222 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -143,13 +143,14 @@ Security options for the Spark History Server are covered more detail in the [Security](security.html#web-ui) page. - + + @@ -160,6 +161,7 @@ Security options for the Spark History Server are covered more detail in the an HDFS path hdfs://namenode/shared/spark-logs or that of an alternative filesystem supported by the Hadoop APIs. + @@ -171,6 +173,7 @@ Security options for the Spark History Server are covered more detail in the As soon as an update has completed, listings of the completed and incomplete applications will reflect the changes. + @@ -180,6 +183,7 @@ Security options for the Spark History Server are covered more detail in the the oldest applications will be removed from the cache. If an application is not in the cache, it will have to be loaded from disk if it is accessed from the UI. + @@ -188,6 +192,7 @@ Security options for the Spark History Server are covered more detail in the The number of applications to display on the history summary page. Application UIs are still available by accessing their URLs directly even if they are not displayed on the history summary page. + @@ -195,6 +200,7 @@ Security options for the Spark History Server are covered more detail in the + @@ -203,6 +209,7 @@ Security options for the Spark History Server are covered more detail in the Indicates whether the history server should use kerberos to login. This is required if the history server is accessing HDFS files on a secure Hadoop cluster. + @@ -210,6 +217,7 @@ Security options for the Spark History Server are covered more detail in the + @@ -217,6 +225,7 @@ Security options for the Spark History Server are covered more detail in the + @@ -224,6 +233,7 @@ Security options for the Spark History Server are covered more detail in the + @@ -236,6 +246,7 @@ Security options for the Spark History Server are covered more detail in the spark.history.fs.cleaner.maxNum, Spark tries to clean up the completed attempts from the applications based on the order of their oldest attempt time. + @@ -243,6 +254,7 @@ Security options for the Spark History Server are covered more detail in the + @@ -253,6 +265,7 @@ Security options for the Spark History Server are covered more detail in the This should be smaller than the underlying file system limit like `dfs.namenode.fs-limits.max-directory-items` in HDFS. + @@ -262,6 +275,7 @@ Security options for the Spark History Server are covered more detail in the This is used to speed up generation of application listings by skipping unnecessary parts of event log files. It can be disabled by setting this config to 0. + @@ -270,6 +284,7 @@ Security options for the Spark History Server are covered more detail in the Enable optimized handling of in-progress logs. This option may leave finished applications that fail to rename their event logs listed as in-progress. + @@ -277,6 +292,7 @@ Security options for the Spark History Server are covered more detail in the + @@ -285,6 +301,7 @@ Security options for the Spark History Server are covered more detail in the When spark.history.fs.driverlog.cleaner.enabled=true, specifies how often the filesystem driver log cleaner checks for files to delete. Files are only deleted if they are older than spark.history.fs.driverlog.cleaner.maxAge + @@ -292,6 +309,7 @@ Security options for the Spark History Server are covered more detail in the + @@ -299,6 +317,7 @@ Security options for the Spark History Server are covered more detail in the + @@ -307,6 +326,7 @@ Security options for the Spark History Server are covered more detail in the Maximum disk usage for the local directory where the cache application history information are stored. + @@ -316,6 +336,7 @@ Security options for the Spark History Server are covered more detail in the server will store application data on disk instead of keeping it in memory. The data written to disk will be re-used in the event of a history server restart. + @@ -329,6 +350,7 @@ Security options for the Spark History Server are covered more detail in the

For now, only YARN mode supports this configuration +

@@ -339,6 +361,7 @@ Security options for the Spark History Server are covered more detail in the Please note that incomplete applications may include applications which didn't shutdown gracefully. Even this is set to `true`, this configuration has no effect on a live application, it only affects the history server. + @@ -348,6 +371,7 @@ Security options for the Spark History Server are covered more detail in the all event log files will be retained. The lowest value is 1 for technical reason.
Please read the section of "Applying compaction of old event log files" for more details. +
Property NameDefaultMeaning
Property NameDefaultMeaningSince Version
spark.history.provider org.apache.spark.deploy.history.FsHistoryProvider Name of the class implementing the application history backend. Currently there is only one implementation, provided by Spark, which looks for application logs stored in the file system.1.1.0
spark.history.fs.logDirectory1.1.0
spark.history.fs.update.interval1.4.0
spark.history.retainedApplications1.0.0
spark.history.ui.maxApplications2.0.1
spark.history.ui.port The port to which the web interface of the history server binds. 1.0.0
spark.history.kerberos.enabled1.0.1
spark.history.kerberos.principal When spark.history.kerberos.enabled=true, specifies kerberos principal name for the History Server. 1.0.1
spark.history.kerberos.keytab When spark.history.kerberos.enabled=true, specifies location of the kerberos keytab file for the History Server. 1.0.1
spark.history.fs.cleaner.enabled Specifies whether the History Server should periodically clean up event logs from storage. 1.4.0
spark.history.fs.cleaner.interval1.4.0
spark.history.fs.cleaner.maxAge When spark.history.fs.cleaner.enabled=true, job history files older than this will be deleted when the filesystem history cleaner runs. 1.4.0
spark.history.fs.cleaner.maxNum3.0.0
spark.history.fs.endEventReparseChunkSize2.4.0
spark.history.fs.inProgressOptimization.enabled2.4.0
spark.history.fs.driverlog.cleaner.enabled Specifies whether the History Server should periodically clean up driver logs from storage. 3.0.0
spark.history.fs.driverlog.cleaner.interval 3.0.0
spark.history.fs.driverlog.cleaner.maxAge When spark.history.fs.driverlog.cleaner.enabled=true, driver log files older than this will be deleted when the driver log cleaner runs. 3.0.0
spark.history.fs.numReplayThreads Number of threads that will be used by history server to process event logs. 2.0.0
spark.history.store.maxDiskUsage2.3.0
spark.history.store.path2.3.0
spark.history.custom.executor.log.url3.0.0
spark.history.custom.executor.log.url.applyIncompleteApplication3.0.0
spark.history.fs.eventLog.rolling.maxFilesToRetain3.0.0