[SPARK-4166][Core][WebUI] Display the executor ID in the Web UI when ExecutorLostFailure happens
Now when ExecutorLostFailure happens, it only displays `ExecutorLostFailure (executor lost)`. Adding the executor id will help locate the faulted executor. Author: zsxwing <zsxwing@gmail.com> Closes #3033 from zsxwing/SPARK-4166 and squashes the following commits: ff4664c [zsxwing] Backward-compatible support c5c4cf2 [zsxwing] Display the executor ID in the Web UI when ExecutorLostFailure happens
This commit is contained in:
parent
6181577e99
commit
4e6a7a0b3e
|
@ -117,8 +117,8 @@ case object TaskKilled extends TaskFailedReason {
|
|||
* the task crashed the JVM.
|
||||
*/
|
||||
@DeveloperApi
|
||||
case object ExecutorLostFailure extends TaskFailedReason {
|
||||
override def toErrorString: String = "ExecutorLostFailure (executor lost)"
|
||||
case class ExecutorLostFailure(execId: String) extends TaskFailedReason {
|
||||
override def toErrorString: String = s"ExecutorLostFailure (executor ${execId} lost)"
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -732,7 +732,7 @@ private[spark] class TaskSetManager(
|
|||
}
|
||||
// Also re-enqueue any tasks that were running on the node
|
||||
for ((tid, info) <- taskInfos if info.running && info.executorId == execId) {
|
||||
handleFailedTask(tid, TaskState.FAILED, ExecutorLostFailure)
|
||||
handleFailedTask(tid, TaskState.FAILED, ExecutorLostFailure(execId))
|
||||
}
|
||||
// recalculate valid locality levels and waits when executor is lost
|
||||
recomputeLocality()
|
||||
|
|
|
@ -272,7 +272,7 @@ private[spark] object JsonProtocol {
|
|||
|
||||
def taskEndReasonToJson(taskEndReason: TaskEndReason): JValue = {
|
||||
val reason = Utils.getFormattedClassName(taskEndReason)
|
||||
val json = taskEndReason match {
|
||||
val json: JObject = taskEndReason match {
|
||||
case fetchFailed: FetchFailed =>
|
||||
val blockManagerAddress = Option(fetchFailed.bmAddress).
|
||||
map(blockManagerIdToJson).getOrElse(JNothing)
|
||||
|
@ -287,6 +287,8 @@ private[spark] object JsonProtocol {
|
|||
("Description" -> exceptionFailure.description) ~
|
||||
("Stack Trace" -> stackTrace) ~
|
||||
("Metrics" -> metrics)
|
||||
case ExecutorLostFailure(executorId) =>
|
||||
("Executor ID" -> executorId)
|
||||
case _ => Utils.emptyJson
|
||||
}
|
||||
("Reason" -> reason) ~ json
|
||||
|
@ -636,7 +638,9 @@ private[spark] object JsonProtocol {
|
|||
new ExceptionFailure(className, description, stackTrace, metrics)
|
||||
case `taskResultLost` => TaskResultLost
|
||||
case `taskKilled` => TaskKilled
|
||||
case `executorLostFailure` => ExecutorLostFailure
|
||||
case `executorLostFailure` =>
|
||||
val executorId = Utils.jsonOption(json \ "Executor ID").map(_.extract[String])
|
||||
ExecutorLostFailure(executorId.getOrElse("Unknown"))
|
||||
case `unknownReason` => UnknownReason
|
||||
}
|
||||
}
|
||||
|
|
|
@ -119,7 +119,7 @@ class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matc
|
|||
new ExceptionFailure("Exception", "description", null, None),
|
||||
TaskResultLost,
|
||||
TaskKilled,
|
||||
ExecutorLostFailure,
|
||||
ExecutorLostFailure("0"),
|
||||
UnknownReason)
|
||||
var failCount = 0
|
||||
for (reason <- taskFailedReasons) {
|
||||
|
|
|
@ -115,7 +115,7 @@ class JsonProtocolSuite extends FunSuite {
|
|||
testTaskEndReason(exceptionFailure)
|
||||
testTaskEndReason(TaskResultLost)
|
||||
testTaskEndReason(TaskKilled)
|
||||
testTaskEndReason(ExecutorLostFailure)
|
||||
testTaskEndReason(ExecutorLostFailure("100"))
|
||||
testTaskEndReason(UnknownReason)
|
||||
|
||||
// BlockId
|
||||
|
@ -403,7 +403,8 @@ class JsonProtocolSuite extends FunSuite {
|
|||
assertOptionEquals(r1.metrics, r2.metrics, assertTaskMetricsEquals)
|
||||
case (TaskResultLost, TaskResultLost) =>
|
||||
case (TaskKilled, TaskKilled) =>
|
||||
case (ExecutorLostFailure, ExecutorLostFailure) =>
|
||||
case (ExecutorLostFailure(execId1), ExecutorLostFailure(execId2)) =>
|
||||
assert(execId1 === execId2)
|
||||
case (UnknownReason, UnknownReason) =>
|
||||
case _ => fail("Task end reasons don't match in types!")
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue