[SPARK-35229][WEBUI] Limit the maximum number of items on the timeline view

### What changes were proposed in this pull request?

This PR proposes to introduces three new configurations to limit the maximum number of jobs/stages/executors on the timeline view.

### Why are the changes needed?

If the number of items on the timeline view grows +1000, rendering can be significantly slow.
https://issues.apache.org/jira/browse/SPARK-35229

The maximum number of tasks on the timeline is already limited by `spark.ui.timeline.tasks.maximum` so l proposed to mitigate this issue with the same manner.

### Does this PR introduce _any_ user-facing change?

Yes. the maximum number of items shown on the timeline view is limited.
I proposed the default value 500 for jobs and stages, and 250 for executors.
A executor has at most 2 items (added and removed) 250 is chosen.

### How was this patch tested?

I manually confirm this change works with the following procedures.
```
# launch a cluster
$ bin/spark-shell --conf spark.ui.retainedDeadExecutors=300 --master "local-cluster[4, 1, 1024]"

// Confirm the maximum number of jobs
(1 to 1000).foreach { _ => sc.parallelize(List(1)).collect }

// Confirm the maximum number of stages
var df = sc.parallelize(1 to 2)
(1 to 1000).foreach { i =>  df = df.repartition(i % 5 + 1) }
df.collect

// Confirm the maximum number of executors
(1 to 300).foreach { _ => try sc.parallelize(List(1)).foreach { _ => System.exit(0) } catch { case e => }}
```

Screenshots here.
![jobs_limited](https://user-images.githubusercontent.com/4736016/116386937-3e8c4a00-a855-11eb-8f4c-151cf7ddd3b8.png)
![stages_limited](https://user-images.githubusercontent.com/4736016/116386990-49df7580-a855-11eb-9f71-8e129e3336ab.png)
![executors_limited](https://user-images.githubusercontent.com/4736016/116387009-4f3cc000-a855-11eb-8697-a2eb4c9c99e6.png)

Closes #32381 from sarutak/mitigate-timeline-issue.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Gengliang Wang <ltnwgl@gmail.com>
This commit is contained in:
Kousuke Saruta 2021-05-11 20:53:11 +08:00 committed by Gengliang Wang
parent 7c9a9ec04f
commit 2b6640a169
6 changed files with 121 additions and 8 deletions

View file

@ -129,6 +129,21 @@ private[spark] object UI {
.intConf
.createWithDefault(1000)
val UI_TIMELINE_JOBS_MAXIMUM = ConfigBuilder("spark.ui.timeline.jobs.maximum")
.version("3.2.0")
.intConf
.createWithDefault(500)
val UI_TIMELINE_STAGES_MAXIMUM = ConfigBuilder("spark.ui.timeline.stages.maximum")
.version("3.2.0")
.intConf
.createWithDefault(500)
val UI_TIMELINE_EXECUTORS_MAXIMUM = ConfigBuilder("spark.ui.timeline.executors.maximum")
.version("3.2.0")
.intConf
.createWithDefault(250)
val ACLS_ENABLE = ConfigBuilder("spark.acls.enable")
.version("1.1.0")
.booleanConf

View file

@ -29,6 +29,7 @@ import org.apache.commons.text.StringEscapeUtils
import org.apache.spark.JobExecutionStatus
import org.apache.spark.internal.config.SCHEDULER_MODE
import org.apache.spark.internal.config.UI._
import org.apache.spark.scheduler._
import org.apache.spark.status.AppStatusStore
import org.apache.spark.status.api.v1
@ -40,6 +41,9 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
import ApiHelper._
private val MAX_TIMELINE_JOBS = parent.conf.get(UI_TIMELINE_JOBS_MAXIMUM)
private val MAX_TIMELINE_EXECUTORS = parent.conf.get(UI_TIMELINE_EXECUTORS_MAXIMUM)
private val JOBS_LEGEND =
<div class="legend-area"><svg width="150px" height="85px">
<rect class="succeeded-job-legend"
@ -64,9 +68,12 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
</svg></div>.toString.filter(_ != '\n')
private def makeJobEvent(jobs: Seq[v1.JobData]): Seq[String] = {
val now = System.currentTimeMillis()
jobs.filter { job =>
job.status != JobExecutionStatus.UNKNOWN && job.submissionTime.isDefined
}.map { job =>
}.sortBy { j =>
(j.completionTime.map(_.getTime).getOrElse(now), j.submissionTime.get.getTime)
}.takeRight(MAX_TIMELINE_JOBS).map { job =>
val jobId = job.jobId
val status = job.status
val (_, lastStageDescription) = lastStageNameAndDescription(store, job)
@ -76,7 +83,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
plainText = true).text
val submissionTime = job.submissionTime.get.getTime()
val completionTime = job.completionTime.map(_.getTime()).getOrElse(System.currentTimeMillis())
val completionTime = job.completionTime.map(_.getTime()).getOrElse(now)
val classNameByStatus = status match {
case JobExecutionStatus.SUCCEEDED => "succeeded"
case JobExecutionStatus.FAILED => "failed"
@ -118,7 +125,9 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
private def makeExecutorEvent(executors: Seq[v1.ExecutorSummary]):
Seq[String] = {
val events = ListBuffer[String]()
executors.foreach { e =>
executors.sortBy { e =>
e.removeTime.map(_.getTime).getOrElse(e.addTime.getTime)
}.takeRight(MAX_TIMELINE_EXECUTORS).foreach { e =>
val addedEvent =
s"""
|{
@ -192,6 +201,30 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
</a>
</span> ++
<div id="application-timeline" class="collapsed">
{
if (MAX_TIMELINE_JOBS < jobs.size) {
<div>
<strong>
Only the most recent {MAX_TIMELINE_JOBS} submitted/completed jobs
(of {jobs.size} total) are shown.
</strong>
</div>
} else {
Seq.empty
}
}
{
if (MAX_TIMELINE_EXECUTORS < executors.size) {
<div>
<strong>
Only the most recent {MAX_TIMELINE_EXECUTORS} added/removed executors
(of {executors.size} total) are shown.
</strong>
</div>
} else {
Seq.empty
}
}
<div class="control-panel">
<div id="application-timeline-zoom-lock">
<input type="checkbox"></input>

View file

@ -26,6 +26,7 @@ import scala.xml.{Node, NodeSeq, Unparsed, Utility}
import org.apache.commons.text.StringEscapeUtils
import org.apache.spark.JobExecutionStatus
import org.apache.spark.internal.config.UI._
import org.apache.spark.resource.ResourceProfile
import org.apache.spark.status.AppStatusStore
import org.apache.spark.status.api.v1
@ -34,6 +35,9 @@ import org.apache.spark.ui._
/** Page showing statistics and stage list for a given job */
private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIPage("job") {
private val MAX_TIMELINE_STAGES = parent.conf.get(UI_TIMELINE_STAGES_MAXIMUM)
private val MAX_TIMELINE_EXECUTORS = parent.conf.get(UI_TIMELINE_EXECUTORS_MAXIMUM)
private val STAGES_LEGEND =
<div class="legend-area"><svg width="150px" height="85px">
<rect class="completed-stage-legend"
@ -58,14 +62,17 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
</svg></div>.toString.filter(_ != '\n')
private def makeStageEvent(stageInfos: Seq[v1.StageData]): Seq[String] = {
stageInfos.map { stage =>
val now = System.currentTimeMillis()
stageInfos.sortBy { s =>
(s.completionTime.map(_.getTime).getOrElse(now), s.submissionTime.get.getTime)
}.takeRight(MAX_TIMELINE_STAGES).map { stage =>
val stageId = stage.stageId
val attemptId = stage.attemptId
val name = stage.name
val status = stage.status.toString.toLowerCase(Locale.ROOT)
val submissionTime = stage.submissionTime.get.getTime()
val completionTime = stage.completionTime.map(_.getTime())
.getOrElse(System.currentTimeMillis())
.getOrElse(now)
// The timeline library treats contents as HTML, so we have to escape them. We need to add
// extra layers of escaping in order to embed this in a JavaScript string literal.
@ -98,7 +105,9 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
def makeExecutorEvent(executors: Seq[v1.ExecutorSummary]): Seq[String] = {
val events = ListBuffer[String]()
executors.foreach { e =>
executors.sortBy { e =>
e.removeTime.map(_.getTime).getOrElse(e.addTime.getTime)
}.takeRight(MAX_TIMELINE_EXECUTORS).foreach { e =>
val addedEvent =
s"""
|{
@ -172,6 +181,30 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
</a>
</span> ++
<div id="job-timeline" class="collapsed">
{
if (MAX_TIMELINE_STAGES < stages.size) {
<div>
<strong>
Only the most recent {MAX_TIMELINE_STAGES} submitted/completed stages
(of {stages.size} total) are shown.
</strong>
</div>
} else {
Seq.empty
}
}
{
if (MAX_TIMELINE_EXECUTORS < executors.size) {
<div>
<strong>
Only the most recent {MAX_TIMELINE_EXECUTORS} added/removed executors
(of {executors.size} total) are shown.
</strong>
</div>
} else {
Seq.empty
}
}
<div class="control-panel">
<div id="job-timeline-zoom-lock">
<input type="checkbox"></input>

View file

@ -30,6 +30,7 @@ private[ui] class JobsTab(parent: SparkUI, store: AppStatusStore)
extends SparkUITab(parent, "jobs") {
val sc = parent.sc
val conf = parent.conf
val killEnabled = parent.killEnabled
// Show pool information for only live UI.

View file

@ -398,8 +398,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
{
if (MAX_TIMELINE_TASKS < tasks.size) {
<strong>
This page has more than the maximum number of tasks that can be shown in the
visualization! Only the most recent {MAX_TIMELINE_TASKS} tasks
Only the most recent {MAX_TIMELINE_TASKS} tasks
(of {tasks.size} total) are shown.
</strong>
} else {

View file

@ -1365,6 +1365,38 @@ Apart from these, the following properties are also available, and may be useful
</td>
<td>2.2.3</td>
</tr>
<tr>
<td><code>spark.ui.timeline.executors.maximum</code></td>
<td>250</td>
<td>
The maximum number of executors shown in the event timeline.
</td>
<td>3.2.0</td>
</tr>
<tr>
<td><code>spark.ui.timeline.jobs.maximum</code></td>
<td>500</td>
<td>
The maximum number of jobs shown in the event timeline.
</td>
<td>3.2.0</td>
</tr>
<tr>
<td><code>spark.ui.timeline.stages.maximum</code></td>
<td>500</td>
<td>
The maximum number of stages shown in the event timeline.
</td>
<td>3.2.0</td>
</tr>
<tr>
<td><code>spark.ui.timeline.tasks.maximum</code></td>
<td>1000</td>
<td>
The maximum number of tasks shown in the event timeline.
</td>
<td>1.4.0</td>
</tr>
</table>
### Compression and Serialization