Revert [SPARK-14485][CORE] ignore task finished for executor lost

This reverts commit 695dbc816a.

This change is being reverted because it hurts performance of some jobs, and
only helps in a narrow set of cases.  For more discussion, refer to the JIRA.

Author: Kay Ousterhout <kayousterhout@gmail.com>

Closes #13580 from kayousterhout/revert-SPARK-14485.
This commit is contained in:
Kay Ousterhout 2016-06-10 12:50:27 -07:00
parent 2c8f40cea1
commit 5c16ad0d52

View file

@ -352,11 +352,9 @@ private[spark] class TaskSchedulerImpl(
}
taskIdToTaskSetManager.get(tid) match {
case Some(taskSet) =>
var executorId: String = null
if (TaskState.isFinished(state)) {
taskIdToTaskSetManager.remove(tid)
taskIdToExecutorId.remove(tid).foreach { execId =>
executorId = execId
if (executorIdToTaskCount.contains(execId)) {
executorIdToTaskCount(execId) -= 1
}
@ -364,17 +362,7 @@ private[spark] class TaskSchedulerImpl(
}
if (state == TaskState.FINISHED) {
taskSet.removeRunningTask(tid)
// In some case, executor has already been removed by driver for heartbeats timeout,
// but at sometime, before executor killed by cluster, the task of running on this
// executor is finished and return task success state to driver. However, this kinds
// of task should be ignored, because the task on this executor is already re-queued
// by driver. For more details, can check in SPARK-14485.
if (executorId != null && !executorIdToTaskCount.contains(executorId)) {
logInfo(s"Ignoring update with state $state for TID $tid because its executor " +
s"has already been removed by driver")
} else {
taskResultGetter.enqueueSuccessfulTask(taskSet, tid, serializedData)
}
taskResultGetter.enqueueSuccessfulTask(taskSet, tid, serializedData)
} else if (Set(TaskState.FAILED, TaskState.KILLED, TaskState.LOST).contains(state)) {
taskSet.removeRunningTask(tid)
taskResultGetter.enqueueFailedTask(taskSet, tid, state, serializedData)