[SPARK-4166][Core][WebUI] Display the executor ID in the Web UI when ExecutorLostFailure happens

Now when ExecutorLostFailure happens, it only displays `ExecutorLostFailure (executor lost)`. Adding the executor id will help locate the faulted executor.

Author: zsxwing <zsxwing@gmail.com>

Closes #3033 from zsxwing/SPARK-4166 and squashes the following commits:

ff4664c [zsxwing] Backward-compatible support
c5c4cf2 [zsxwing] Display the executor ID in the Web UI when ExecutorLostFailure happens
This commit is contained in:
zsxwing 2014-11-02 10:44:52 -08:00 committed by Josh Rosen
parent 6181577e99
commit 4e6a7a0b3e
5 changed files with 13 additions and 8 deletions

View file

@ -117,8 +117,8 @@ case object TaskKilled extends TaskFailedReason {
* the task crashed the JVM.
*/
@DeveloperApi
case object ExecutorLostFailure extends TaskFailedReason {
override def toErrorString: String = "ExecutorLostFailure (executor lost)"
case class ExecutorLostFailure(execId: String) extends TaskFailedReason {
override def toErrorString: String = s"ExecutorLostFailure (executor ${execId} lost)"
}
/**

View file

@ -732,7 +732,7 @@ private[spark] class TaskSetManager(
}
// Also re-enqueue any tasks that were running on the node
for ((tid, info) <- taskInfos if info.running && info.executorId == execId) {
handleFailedTask(tid, TaskState.FAILED, ExecutorLostFailure)
handleFailedTask(tid, TaskState.FAILED, ExecutorLostFailure(execId))
}
// recalculate valid locality levels and waits when executor is lost
recomputeLocality()

View file

@ -272,7 +272,7 @@ private[spark] object JsonProtocol {
def taskEndReasonToJson(taskEndReason: TaskEndReason): JValue = {
val reason = Utils.getFormattedClassName(taskEndReason)
val json = taskEndReason match {
val json: JObject = taskEndReason match {
case fetchFailed: FetchFailed =>
val blockManagerAddress = Option(fetchFailed.bmAddress).
map(blockManagerIdToJson).getOrElse(JNothing)
@ -287,6 +287,8 @@ private[spark] object JsonProtocol {
("Description" -> exceptionFailure.description) ~
("Stack Trace" -> stackTrace) ~
("Metrics" -> metrics)
case ExecutorLostFailure(executorId) =>
("Executor ID" -> executorId)
case _ => Utils.emptyJson
}
("Reason" -> reason) ~ json
@ -636,7 +638,9 @@ private[spark] object JsonProtocol {
new ExceptionFailure(className, description, stackTrace, metrics)
case `taskResultLost` => TaskResultLost
case `taskKilled` => TaskKilled
case `executorLostFailure` => ExecutorLostFailure
case `executorLostFailure` =>
val executorId = Utils.jsonOption(json \ "Executor ID").map(_.extract[String])
ExecutorLostFailure(executorId.getOrElse("Unknown"))
case `unknownReason` => UnknownReason
}
}

View file

@ -119,7 +119,7 @@ class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matc
new ExceptionFailure("Exception", "description", null, None),
TaskResultLost,
TaskKilled,
ExecutorLostFailure,
ExecutorLostFailure("0"),
UnknownReason)
var failCount = 0
for (reason <- taskFailedReasons) {

View file

@ -115,7 +115,7 @@ class JsonProtocolSuite extends FunSuite {
testTaskEndReason(exceptionFailure)
testTaskEndReason(TaskResultLost)
testTaskEndReason(TaskKilled)
testTaskEndReason(ExecutorLostFailure)
testTaskEndReason(ExecutorLostFailure("100"))
testTaskEndReason(UnknownReason)
// BlockId
@ -403,7 +403,8 @@ class JsonProtocolSuite extends FunSuite {
assertOptionEquals(r1.metrics, r2.metrics, assertTaskMetricsEquals)
case (TaskResultLost, TaskResultLost) =>
case (TaskKilled, TaskKilled) =>
case (ExecutorLostFailure, ExecutorLostFailure) =>
case (ExecutorLostFailure(execId1), ExecutorLostFailure(execId2)) =>
assert(execId1 === execId2)
case (UnknownReason, UnknownReason) =>
case _ => fail("Task end reasons don't match in types!")
}