[SPARK-35229][WEBUI] Limit the maximum number of items on the timeline view
### What changes were proposed in this pull request? This PR proposes to introduces three new configurations to limit the maximum number of jobs/stages/executors on the timeline view. ### Why are the changes needed? If the number of items on the timeline view grows +1000, rendering can be significantly slow. https://issues.apache.org/jira/browse/SPARK-35229 The maximum number of tasks on the timeline is already limited by `spark.ui.timeline.tasks.maximum` so l proposed to mitigate this issue with the same manner. ### Does this PR introduce _any_ user-facing change? Yes. the maximum number of items shown on the timeline view is limited. I proposed the default value 500 for jobs and stages, and 250 for executors. A executor has at most 2 items (added and removed) 250 is chosen. ### How was this patch tested? I manually confirm this change works with the following procedures. ``` # launch a cluster $ bin/spark-shell --conf spark.ui.retainedDeadExecutors=300 --master "local-cluster[4, 1, 1024]" // Confirm the maximum number of jobs (1 to 1000).foreach { _ => sc.parallelize(List(1)).collect } // Confirm the maximum number of stages var df = sc.parallelize(1 to 2) (1 to 1000).foreach { i => df = df.repartition(i % 5 + 1) } df.collect // Confirm the maximum number of executors (1 to 300).foreach { _ => try sc.parallelize(List(1)).foreach { _ => System.exit(0) } catch { case e => }} ``` Screenshots here. ![jobs_limited](https://user-images.githubusercontent.com/4736016/116386937-3e8c4a00-a855-11eb-8f4c-151cf7ddd3b8.png) ![stages_limited](https://user-images.githubusercontent.com/4736016/116386990-49df7580-a855-11eb-9f71-8e129e3336ab.png) ![executors_limited](https://user-images.githubusercontent.com/4736016/116387009-4f3cc000-a855-11eb-8697-a2eb4c9c99e6.png) Closes #32381 from sarutak/mitigate-timeline-issue. Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com> Signed-off-by: Gengliang Wang <ltnwgl@gmail.com>
This commit is contained in:
parent
7c9a9ec04f
commit
2b6640a169
|
@ -129,6 +129,21 @@ private[spark] object UI {
|
|||
.intConf
|
||||
.createWithDefault(1000)
|
||||
|
||||
val UI_TIMELINE_JOBS_MAXIMUM = ConfigBuilder("spark.ui.timeline.jobs.maximum")
|
||||
.version("3.2.0")
|
||||
.intConf
|
||||
.createWithDefault(500)
|
||||
|
||||
val UI_TIMELINE_STAGES_MAXIMUM = ConfigBuilder("spark.ui.timeline.stages.maximum")
|
||||
.version("3.2.0")
|
||||
.intConf
|
||||
.createWithDefault(500)
|
||||
|
||||
val UI_TIMELINE_EXECUTORS_MAXIMUM = ConfigBuilder("spark.ui.timeline.executors.maximum")
|
||||
.version("3.2.0")
|
||||
.intConf
|
||||
.createWithDefault(250)
|
||||
|
||||
val ACLS_ENABLE = ConfigBuilder("spark.acls.enable")
|
||||
.version("1.1.0")
|
||||
.booleanConf
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.commons.text.StringEscapeUtils
|
|||
|
||||
import org.apache.spark.JobExecutionStatus
|
||||
import org.apache.spark.internal.config.SCHEDULER_MODE
|
||||
import org.apache.spark.internal.config.UI._
|
||||
import org.apache.spark.scheduler._
|
||||
import org.apache.spark.status.AppStatusStore
|
||||
import org.apache.spark.status.api.v1
|
||||
|
@ -40,6 +41,9 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
|
|||
|
||||
import ApiHelper._
|
||||
|
||||
private val MAX_TIMELINE_JOBS = parent.conf.get(UI_TIMELINE_JOBS_MAXIMUM)
|
||||
private val MAX_TIMELINE_EXECUTORS = parent.conf.get(UI_TIMELINE_EXECUTORS_MAXIMUM)
|
||||
|
||||
private val JOBS_LEGEND =
|
||||
<div class="legend-area"><svg width="150px" height="85px">
|
||||
<rect class="succeeded-job-legend"
|
||||
|
@ -64,9 +68,12 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
|
|||
</svg></div>.toString.filter(_ != '\n')
|
||||
|
||||
private def makeJobEvent(jobs: Seq[v1.JobData]): Seq[String] = {
|
||||
val now = System.currentTimeMillis()
|
||||
jobs.filter { job =>
|
||||
job.status != JobExecutionStatus.UNKNOWN && job.submissionTime.isDefined
|
||||
}.map { job =>
|
||||
}.sortBy { j =>
|
||||
(j.completionTime.map(_.getTime).getOrElse(now), j.submissionTime.get.getTime)
|
||||
}.takeRight(MAX_TIMELINE_JOBS).map { job =>
|
||||
val jobId = job.jobId
|
||||
val status = job.status
|
||||
val (_, lastStageDescription) = lastStageNameAndDescription(store, job)
|
||||
|
@ -76,7 +83,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
|
|||
plainText = true).text
|
||||
|
||||
val submissionTime = job.submissionTime.get.getTime()
|
||||
val completionTime = job.completionTime.map(_.getTime()).getOrElse(System.currentTimeMillis())
|
||||
val completionTime = job.completionTime.map(_.getTime()).getOrElse(now)
|
||||
val classNameByStatus = status match {
|
||||
case JobExecutionStatus.SUCCEEDED => "succeeded"
|
||||
case JobExecutionStatus.FAILED => "failed"
|
||||
|
@ -118,7 +125,9 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
|
|||
private def makeExecutorEvent(executors: Seq[v1.ExecutorSummary]):
|
||||
Seq[String] = {
|
||||
val events = ListBuffer[String]()
|
||||
executors.foreach { e =>
|
||||
executors.sortBy { e =>
|
||||
e.removeTime.map(_.getTime).getOrElse(e.addTime.getTime)
|
||||
}.takeRight(MAX_TIMELINE_EXECUTORS).foreach { e =>
|
||||
val addedEvent =
|
||||
s"""
|
||||
|{
|
||||
|
@ -192,6 +201,30 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
|
|||
</a>
|
||||
</span> ++
|
||||
<div id="application-timeline" class="collapsed">
|
||||
{
|
||||
if (MAX_TIMELINE_JOBS < jobs.size) {
|
||||
<div>
|
||||
<strong>
|
||||
Only the most recent {MAX_TIMELINE_JOBS} submitted/completed jobs
|
||||
(of {jobs.size} total) are shown.
|
||||
</strong>
|
||||
</div>
|
||||
} else {
|
||||
Seq.empty
|
||||
}
|
||||
}
|
||||
{
|
||||
if (MAX_TIMELINE_EXECUTORS < executors.size) {
|
||||
<div>
|
||||
<strong>
|
||||
Only the most recent {MAX_TIMELINE_EXECUTORS} added/removed executors
|
||||
(of {executors.size} total) are shown.
|
||||
</strong>
|
||||
</div>
|
||||
} else {
|
||||
Seq.empty
|
||||
}
|
||||
}
|
||||
<div class="control-panel">
|
||||
<div id="application-timeline-zoom-lock">
|
||||
<input type="checkbox"></input>
|
||||
|
|
|
@ -26,6 +26,7 @@ import scala.xml.{Node, NodeSeq, Unparsed, Utility}
|
|||
import org.apache.commons.text.StringEscapeUtils
|
||||
|
||||
import org.apache.spark.JobExecutionStatus
|
||||
import org.apache.spark.internal.config.UI._
|
||||
import org.apache.spark.resource.ResourceProfile
|
||||
import org.apache.spark.status.AppStatusStore
|
||||
import org.apache.spark.status.api.v1
|
||||
|
@ -34,6 +35,9 @@ import org.apache.spark.ui._
|
|||
/** Page showing statistics and stage list for a given job */
|
||||
private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIPage("job") {
|
||||
|
||||
private val MAX_TIMELINE_STAGES = parent.conf.get(UI_TIMELINE_STAGES_MAXIMUM)
|
||||
private val MAX_TIMELINE_EXECUTORS = parent.conf.get(UI_TIMELINE_EXECUTORS_MAXIMUM)
|
||||
|
||||
private val STAGES_LEGEND =
|
||||
<div class="legend-area"><svg width="150px" height="85px">
|
||||
<rect class="completed-stage-legend"
|
||||
|
@ -58,14 +62,17 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
|
|||
</svg></div>.toString.filter(_ != '\n')
|
||||
|
||||
private def makeStageEvent(stageInfos: Seq[v1.StageData]): Seq[String] = {
|
||||
stageInfos.map { stage =>
|
||||
val now = System.currentTimeMillis()
|
||||
stageInfos.sortBy { s =>
|
||||
(s.completionTime.map(_.getTime).getOrElse(now), s.submissionTime.get.getTime)
|
||||
}.takeRight(MAX_TIMELINE_STAGES).map { stage =>
|
||||
val stageId = stage.stageId
|
||||
val attemptId = stage.attemptId
|
||||
val name = stage.name
|
||||
val status = stage.status.toString.toLowerCase(Locale.ROOT)
|
||||
val submissionTime = stage.submissionTime.get.getTime()
|
||||
val completionTime = stage.completionTime.map(_.getTime())
|
||||
.getOrElse(System.currentTimeMillis())
|
||||
.getOrElse(now)
|
||||
|
||||
// The timeline library treats contents as HTML, so we have to escape them. We need to add
|
||||
// extra layers of escaping in order to embed this in a JavaScript string literal.
|
||||
|
@ -98,7 +105,9 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
|
|||
|
||||
def makeExecutorEvent(executors: Seq[v1.ExecutorSummary]): Seq[String] = {
|
||||
val events = ListBuffer[String]()
|
||||
executors.foreach { e =>
|
||||
executors.sortBy { e =>
|
||||
e.removeTime.map(_.getTime).getOrElse(e.addTime.getTime)
|
||||
}.takeRight(MAX_TIMELINE_EXECUTORS).foreach { e =>
|
||||
val addedEvent =
|
||||
s"""
|
||||
|{
|
||||
|
@ -172,6 +181,30 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
|
|||
</a>
|
||||
</span> ++
|
||||
<div id="job-timeline" class="collapsed">
|
||||
{
|
||||
if (MAX_TIMELINE_STAGES < stages.size) {
|
||||
<div>
|
||||
<strong>
|
||||
Only the most recent {MAX_TIMELINE_STAGES} submitted/completed stages
|
||||
(of {stages.size} total) are shown.
|
||||
</strong>
|
||||
</div>
|
||||
} else {
|
||||
Seq.empty
|
||||
}
|
||||
}
|
||||
{
|
||||
if (MAX_TIMELINE_EXECUTORS < executors.size) {
|
||||
<div>
|
||||
<strong>
|
||||
Only the most recent {MAX_TIMELINE_EXECUTORS} added/removed executors
|
||||
(of {executors.size} total) are shown.
|
||||
</strong>
|
||||
</div>
|
||||
} else {
|
||||
Seq.empty
|
||||
}
|
||||
}
|
||||
<div class="control-panel">
|
||||
<div id="job-timeline-zoom-lock">
|
||||
<input type="checkbox"></input>
|
||||
|
|
|
@ -30,6 +30,7 @@ private[ui] class JobsTab(parent: SparkUI, store: AppStatusStore)
|
|||
extends SparkUITab(parent, "jobs") {
|
||||
|
||||
val sc = parent.sc
|
||||
val conf = parent.conf
|
||||
val killEnabled = parent.killEnabled
|
||||
|
||||
// Show pool information for only live UI.
|
||||
|
|
|
@ -398,8 +398,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
|
|||
{
|
||||
if (MAX_TIMELINE_TASKS < tasks.size) {
|
||||
<strong>
|
||||
This page has more than the maximum number of tasks that can be shown in the
|
||||
visualization! Only the most recent {MAX_TIMELINE_TASKS} tasks
|
||||
Only the most recent {MAX_TIMELINE_TASKS} tasks
|
||||
(of {tasks.size} total) are shown.
|
||||
</strong>
|
||||
} else {
|
||||
|
|
|
@ -1365,6 +1365,38 @@ Apart from these, the following properties are also available, and may be useful
|
|||
</td>
|
||||
<td>2.2.3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>spark.ui.timeline.executors.maximum</code></td>
|
||||
<td>250</td>
|
||||
<td>
|
||||
The maximum number of executors shown in the event timeline.
|
||||
</td>
|
||||
<td>3.2.0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>spark.ui.timeline.jobs.maximum</code></td>
|
||||
<td>500</td>
|
||||
<td>
|
||||
The maximum number of jobs shown in the event timeline.
|
||||
</td>
|
||||
<td>3.2.0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>spark.ui.timeline.stages.maximum</code></td>
|
||||
<td>500</td>
|
||||
<td>
|
||||
The maximum number of stages shown in the event timeline.
|
||||
</td>
|
||||
<td>3.2.0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>spark.ui.timeline.tasks.maximum</code></td>
|
||||
<td>1000</td>
|
||||
<td>
|
||||
The maximum number of tasks shown in the event timeline.
|
||||
</td>
|
||||
<td>1.4.0</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
### Compression and Serialization
|
||||
|
|
Loading…
Reference in a new issue