Style fixes based on code review

2013-08-09 16:46:34 -07:00 · 2013-08-09 16:46:34 -07:00 · 29b79714f9
parent 81e1d4a7d1
commit 29b79714f9
3 changed files with 110 additions and 132 deletions
--- a/core/src/main/scala/spark/scheduler/JobLogger.scala
+++ b/core/src/main/scala/spark/scheduler/JobLogger.scala
@ -23,10 +23,11 @@ import java.io.FileNotFoundException
 import java.text.SimpleDateFormat
 import java.util.{Date, Properties}
 import java.util.concurrent.LinkedBlockingQueue
 import scala.collection.mutable.{Map, HashMap, ListBuffer}
 import scala.io.Source
 import spark._
 import spark.SparkContext
 import spark.executor.TaskMetrics
 import spark.scheduler.cluster.TaskInfo
--- a/core/src/main/scala/spark/scheduler/SparkListenerEventProcessor.scala
+++ b/core/src/main/scala/spark/scheduler/SparkListenerEventProcessor.scala
@ -17,16 +17,20 @@
 package spark.scheduler
 import scala.collection.mutable.ArrayBuffer
 import java.util.concurrent.LinkedBlockingQueue
 import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer}
 import spark.Logging
 /** Asynchronously passes SparkListenerEvents to registered SparkListeners. */
-class SparkListenerEventProcessor() {
+class SparkListenerEventProcessor() extends Logging {
-  /* sparkListeners is not thread safe, so this assumes that listeners are all added before any
+  private val sparkListeners = new ArrayBuffer[SparkListener]() with SynchronizedBuffer[SparkListener]
-   * SparkListenerEvents occur. */
+
-  private val sparkListeners = ArrayBuffer[SparkListener]()
+  /* Cap the capacity of the SparkListenerEvent queue so we get an explicit error (rather than
-  private val eventQueue = new LinkedBlockingQueue[SparkListenerEvents]
+   * an OOM exception) if it's perpetually being added to more quickly than it's being drained. */
  private val EVENT_QUEUE_CAPACITY = 10000 
  private val eventQueue = new LinkedBlockingQueue[SparkListenerEvents](EVENT_QUEUE_CAPACITY)
  new Thread("SparkListenerEventProcessor") {
    setDaemon(true)
@ -57,6 +61,12 @@ class SparkListenerEventProcessor() {
  }
  def addEvent(event: SparkListenerEvents) {
-    eventQueue.put(event)
+    val eventAdded = eventQueue.offer(event)
    if (!eventAdded) {
      logError("Dropping SparkListenerEvent because no remaining room in event queue. " +
        "This likely means one of the SparkListeners is too slow and cannot keep up with the " +
        "rate at which tasks are being started by the scheduler.")
    }
  }
 }
--- a/core/src/main/scala/spark/ui/jobs/JobProgressListener.scala
+++ b/core/src/main/scala/spark/ui/jobs/JobProgressListener.scala
@ -9,7 +9,8 @@ import spark.scheduler.cluster.TaskInfo
 import spark.executor.TaskMetrics
 import collection.mutable
-/** Tracks task-level information to be displayed in the UI.
+/**
 * Tracks task-level information to be displayed in the UI.
 *
 * All access to the data structures in this class must be synchronized on the
 * class, since the UI thread and the DAGScheduler event loop may otherwise
@ -44,19 +45,16 @@ private[spark] class JobProgressListener(val sc: SparkContext) extends SparkList
  override def onJobStart(jobStart: SparkListenerJobStart) {}
-  override def onStageCompleted(stageCompleted: StageCompleted) = {
+  override def onStageCompleted(stageCompleted: StageCompleted) : Unit = synchronized {
    this.synchronized {
    val stage = stageCompleted.stageInfo.stage
    poolToActiveStages(stageToPool(stage)) -= stage
    activeStages -= stage
    completedStages += stage
    trimIfNecessary(completedStages)
  }
  }
  /** If stages is too large, remove and garbage collect old stages */
-  def trimIfNecessary(stages: ListBuffer[Stage]) {
+  def trimIfNecessary(stages: ListBuffer[Stage]): Unit = synchronized {
    this.synchronized {
    if (stages.size > RETAINED_STAGES) {
      val toRemove = RETAINED_STAGES / 10
      stages.takeRight(toRemove).foreach( s => {
@ -73,11 +71,9 @@ private[spark] class JobProgressListener(val sc: SparkContext) extends SparkList
      stages.trimEnd(toRemove)
    }
  }
  }
  /** For FIFO, all stages are contained by "default" pool but "default" pool here is meaningless */
-  override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) = {
+  override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) : Unit = synchronized {
    this.synchronized {
    val stage = stageSubmitted.stage
    activeStages += stage
@ -94,10 +90,8 @@ private[spark] class JobProgressListener(val sc: SparkContext) extends SparkList
    val stages = poolToActiveStages.getOrElseUpdate(poolName, new HashSet[Stage]())
    stages += stage
  }
  }
-  override def onTaskStart(taskStart: SparkListenerTaskStart) {
+  override def onTaskStart(taskStart: SparkListenerTaskStart) : Unit = synchronized {
    this.synchronized {
    val sid = taskStart.task.stageId
    val tasksActive = stageToTasksActive.getOrElseUpdate(sid, new HashSet[TaskInfo]())
    tasksActive += taskStart.taskInfo
@ -106,10 +100,8 @@ private[spark] class JobProgressListener(val sc: SparkContext) extends SparkList
    taskList += ((taskStart.taskInfo, None, None))
    stageToTaskInfos(sid) = taskList
  }
  }
-  override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+  override def onTaskEnd(taskEnd: SparkListenerTaskEnd) : Unit = synchronized {
    this.synchronized {
    val sid = taskEnd.task.stageId
    val tasksActive = stageToTasksActive.getOrElseUpdate(sid, new HashSet[TaskInfo]())
    tasksActive -= taskEnd.taskInfo
@ -146,10 +138,8 @@ private[spark] class JobProgressListener(val sc: SparkContext) extends SparkList
    taskList += ((taskEnd.taskInfo, metrics, failureInfo))
    stageToTaskInfos(sid) = taskList
  }
  }
-  override def onJobEnd(jobEnd: SparkListenerJobEnd) {
+  override def onJobEnd(jobEnd: SparkListenerJobEnd) : Unit = synchronized {
    this.synchronized {
    jobEnd match {
      case end: SparkListenerJobEnd =>
        end.jobResult match {
@ -163,27 +153,4 @@ private[spark] class JobProgressListener(val sc: SparkContext) extends SparkList
      case _ =>
    }
  }
  }
  /** Is this stage's input from a shuffle read. */
  def hasShuffleRead(stageID: Int): Boolean = {
    this.synchronized {
      // This is written in a slightly complicated way to avoid having to scan all tasks
      for (s <- stageToTaskInfos.get(stageID).getOrElse(Seq())) {
        if (s._2 != null) return s._2.flatMap(m => m.shuffleReadMetrics).isDefined
      }
      return false // No tasks have finished for this stage
    }
  }
  /** Is this stage's output to a shuffle write. */
  def hasShuffleWrite(stageID: Int): Boolean = {
    this.synchronized {
      // This is written in a slightly complicated way to avoid having to scan all tasks
      for (s <- stageToTaskInfos.get(stageID).getOrElse(Seq())) {
        if (s._2 != null) return s._2.flatMap(m => m.shuffleWriteMetrics).isDefined
      }
      return false // No tasks have finished for this stage
    }
  }
 }