spark-instrumented-optimizer/core/src/main/scala/spark/MapOutputTracker.scala

package spark

import java.util.concurrent.ConcurrentHashMap

import akka.actor._
import akka.dispatch._
import akka.pattern.ask
import akka.remote._
import akka.util.Duration
import akka.util.Timeout
import akka.util.duration._

import scala.collection.mutable.HashSet

import spark.storage.BlockManagerId

sealed trait MapOutputTrackerMessage
case class GetMapOutputLocations(shuffleId: Int) extends MapOutputTrackerMessage 
case object StopMapOutputTracker extends MapOutputTrackerMessage

class MapOutputTrackerActor(bmAddresses: ConcurrentHashMap[Int, Array[BlockManagerId]]) 
extends Actor with Logging {
  def receive = {
    case GetMapOutputLocations(shuffleId: Int) =>
      logInfo("Asked to get map output locations for shuffle " + shuffleId)
      sender ! bmAddresses.get(shuffleId)

    case StopMapOutputTracker =>
      logInfo("MapOutputTrackerActor stopped!")
      sender ! true
      context.stop(self)
  }
}

class MapOutputTracker(actorSystem: ActorSystem, isMaster: Boolean) extends Logging {
  val ip: String = System.getProperty("spark.master.host", "localhost")
  val port: Int = System.getProperty("spark.master.port", "7077").toInt
  val actorName: String = "MapOutputTracker"

  val timeout = 10.seconds

  private var bmAddresses = new ConcurrentHashMap[Int, Array[BlockManagerId]]

  // Incremented every time a fetch fails so that client nodes know to clear
  // their cache of map output locations if this happens.
  private var generation: Long = 0
  private var generationLock = new java.lang.Object

  var trackerActor: ActorRef = if (isMaster) {
    val actor = actorSystem.actorOf(Props(new MapOutputTrackerActor(bmAddresses)), name = actorName)
    logInfo("Registered MapOutputTrackerActor actor")
    actor
  } else {
    val url = "akka://spark@%s:%s/%s".format(ip, port, actorName)
    actorSystem.actorFor(url)
  }

  // Send a message to the trackerActor and get its result within a default timeout, or
  // throw a SparkException if this fails.
  def askTracker(message: Any): Any = {
    try {
      val future = trackerActor.ask(message)(timeout)
      return Await.result(future, timeout)
    } catch {
      case e: Exception =>
        throw new SparkException("Error communicating with MapOutputTracker", e)
    }
  }

  // Send a one-way message to the trackerActor, to which we expect it to reply with true.
  def communicate(message: Any) {
    if (askTracker(message) != true) {
      throw new SparkException("Error reply received from MapOutputTracker")
    }
  }

  def registerShuffle(shuffleId: Int, numMaps: Int) {
    if (bmAddresses.get(shuffleId) != null) {
      throw new IllegalArgumentException("Shuffle ID " + shuffleId + " registered twice")
    }
    bmAddresses.put(shuffleId, new Array[BlockManagerId](numMaps))
  }
  
  def registerMapOutput(shuffleId: Int, mapId: Int, bmAddress: BlockManagerId) {
    var array = bmAddresses.get(shuffleId)
    array.synchronized {
      array(mapId) = bmAddress
    }
  }
  
  def registerMapOutputs(shuffleId: Int, locs: Array[BlockManagerId], changeGeneration: Boolean = false) {
    bmAddresses.put(shuffleId, Array[BlockManagerId]() ++ locs)
    if (changeGeneration) {
      incrementGeneration()
    }
  }

  def unregisterMapOutput(shuffleId: Int, mapId: Int, bmAddress: BlockManagerId) {
    var array = bmAddresses.get(shuffleId)
    if (array != null) {
      array.synchronized {
        if (array(mapId) == bmAddress) {
          array(mapId) = null
        }
      }
      incrementGeneration()
    } else {
      throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID")
    }
  }
  
  // Remembers which map output locations are currently being fetched on a worker
  val fetching = new HashSet[Int]
  
  // Called on possibly remote nodes to get the server URIs for a given shuffle
  def getServerAddresses(shuffleId: Int): Array[BlockManagerId] = {
    val locs = bmAddresses.get(shuffleId)
    if (locs == null) {
      logInfo("Don't have map outputs for shuffe " + shuffleId + ", fetching them")
      fetching.synchronized {
        if (fetching.contains(shuffleId)) {
          // Someone else is fetching it; wait for them to be done
          while (fetching.contains(shuffleId)) {
            try {
              fetching.wait()
            } catch {
              case _ =>
            }
          }
          return bmAddresses.get(shuffleId)
        } else {
          fetching += shuffleId
        }
      }
      // We won the race to fetch the output locs; do so
      logInfo("Doing the fetch; tracker actor = " + trackerActor)
      val fetched = askTracker(GetMapOutputLocations(shuffleId)).asInstanceOf[Array[BlockManagerId]]
      
      logInfo("Got the output locations")
      bmAddresses.put(shuffleId, fetched)
      fetching.synchronized {
        fetching -= shuffleId
        fetching.notifyAll()
      }
      return fetched
    } else {
      return locs
    }
  }

  def stop() {
    communicate(StopMapOutputTracker)
    bmAddresses.clear()
    trackerActor = null
  }

  // Called on master to increment the generation number
  def incrementGeneration() {
    generationLock.synchronized {
      generation += 1
    }
  }

  // Called on master or workers to get current generation number
  def getGeneration: Long = {
    generationLock.synchronized {
      return generation
    }
  }

  // Called on workers to update the generation number, potentially clearing old outputs
  // because of a fetch failure. (Each Mesos task calls this with the latest generation
  // number on the master at the time it was created.)
  def updateGeneration(newGen: Long) {
    generationLock.synchronized {
      if (newGen > generation) {
        logInfo("Updating generation to " + newGen + " and clearing cache")
        bmAddresses = new ConcurrentHashMap[Int, Array[BlockManagerId]]
        generation = newGen
      }
    }
  }
}
More stuff 2011-02-27 17:27:12 -05:00			`package spark`

			`import java.util.concurrent.ConcurrentHashMap`

Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`import akka.actor._`
Upgraded to Akka 2 and fixed test execution (which was still parallel across projects). 2012-06-29 02:51:28 -04:00			`import akka.dispatch._`
			`import akka.pattern.ask`
			`import akka.remote._`
			`import akka.util.Duration`
			`import akka.util.Timeout`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`import akka.util.duration._`

Various fixes to get MesosScheduler working with new RDDs 2011-03-06 19:16:38 -05:00			`import scala.collection.mutable.HashSet`
More work on new RDD design 2011-02-27 22:15:52 -05:00
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`import spark.storage.BlockManagerId`

Various fixes to get MesosScheduler working with new RDDs 2011-03-06 19:16:38 -05:00			`sealed trait MapOutputTrackerMessage`
			`case class GetMapOutputLocations(shuffleId: Int) extends MapOutputTrackerMessage`
Fixed unit tests by making them clean up the SparkContext after use and thus clean up the various singletons (RDDCache, MapOutputTracker, etc). This isn't perfect yet (ideally we shouldn't use singleton objects at all) but we can fix that later. 2011-05-13 15:03:58 -04:00			`case object StopMapOutputTracker extends MapOutputTrackerMessage`
Various fixes to get MesosScheduler working with new RDDs 2011-03-06 19:16:38 -05:00
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`class MapOutputTrackerActor(bmAddresses: ConcurrentHashMap[Int, Array[BlockManagerId]])`
			`extends Actor with Logging {`
			`def receive = {`
			`case GetMapOutputLocations(shuffleId: Int) =>`
			`logInfo("Asked to get map output locations for shuffle " + shuffleId)`
Upgraded to Akka 2 and fixed test execution (which was still parallel across projects). 2012-06-29 02:51:28 -04:00			`sender ! bmAddresses.get(shuffleId)`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00
			`case StopMapOutputTracker =>`
			`logInfo("MapOutputTrackerActor stopped!")`
Upgraded to Akka 2 and fixed test execution (which was still parallel across projects). 2012-06-29 02:51:28 -04:00			`sender ! true`
			`context.stop(self)`
More work on new RDD design 2011-02-27 22:15:52 -05:00			`}`
			`}`

Upgraded to Akka 2 and fixed test execution (which was still parallel across projects). 2012-06-29 02:51:28 -04:00			`class MapOutputTracker(actorSystem: ActorSystem, isMaster: Boolean) extends Logging {`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`val ip: String = System.getProperty("spark.master.host", "localhost")`
			`val port: Int = System.getProperty("spark.master.port", "7077").toInt`
Upgraded to Akka 2 and fixed test execution (which was still parallel across projects). 2012-06-29 02:51:28 -04:00			`val actorName: String = "MapOutputTracker"`

			`val timeout = 10.seconds`
Various minor fixes 2011-05-19 14:19:25 -04:00
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`private var bmAddresses = new ConcurrentHashMap[Int, Array[BlockManagerId]]`
Scheduler can now recover from lost map outputs 2011-05-20 03:19:53 -04:00
			`// Incremented every time a fetch fails so that client nodes know to clear`
			`// their cache of map output locations if this happens.`
			`private var generation: Long = 0`
			`private var generationLock = new java.lang.Object`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00
			`var trackerActor: ActorRef = if (isMaster) {`
Upgraded to Akka 2 and fixed test execution (which was still parallel across projects). 2012-06-29 02:51:28 -04:00			`val actor = actorSystem.actorOf(Props(new MapOutputTrackerActor(bmAddresses)), name = actorName)`
			`logInfo("Registered MapOutputTrackerActor actor")`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`actor`
Stop objectifying various trackers, caches, etc. 2011-05-17 15:41:13 -04:00			`} else {`
Upgraded to Akka 2 and fixed test execution (which was still parallel across projects). 2012-06-29 02:51:28 -04:00			`val url = "akka://spark@%s:%s/%s".format(ip, port, actorName)`
			`actorSystem.actorFor(url)`
			`}`

			`// Send a message to the trackerActor and get its result within a default timeout, or`
			`// throw a SparkException if this fails.`
			`def askTracker(message: Any): Any = {`
			`try {`
			`val future = trackerActor.ask(message)(timeout)`
			`return Await.result(future, timeout)`
			`} catch {`
			`case e: Exception =>`
			`throw new SparkException("Error communicating with MapOutputTracker", e)`
			`}`
			`}`

			`// Send a one-way message to the trackerActor, to which we expect it to reply with true.`
			`def communicate(message: Any) {`
			`if (askTracker(message) != true) {`
			`throw new SparkException("Error reply received from MapOutputTracker")`
			`}`
More work on new RDD design 2011-02-27 22:15:52 -05:00			`}`
Register RDDs with the MapOutputTracker even if they have no partitions. Fixes #105. 2012-01-05 15:59:20 -05:00
			`def registerShuffle(shuffleId: Int, numMaps: Int) {`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`if (bmAddresses.get(shuffleId) != null) {`
Register RDDs with the MapOutputTracker even if they have no partitions. Fixes #105. 2012-01-05 15:59:20 -05:00			`throw new IllegalArgumentException("Shuffle ID " + shuffleId + " registered twice")`
			`}`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`bmAddresses.put(shuffleId, new Array[BlockManagerId](numMaps))`
Register RDDs with the MapOutputTracker even if they have no partitions. Fixes #105. 2012-01-05 15:59:20 -05:00			`}`
More work on new RDD design 2011-02-27 22:15:52 -05:00
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`def registerMapOutput(shuffleId: Int, mapId: Int, bmAddress: BlockManagerId) {`
			`var array = bmAddresses.get(shuffleId)`
Scheduler can now recover from lost map outputs 2011-05-20 03:19:53 -04:00			`array.synchronized {`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`array(mapId) = bmAddress`
Scheduler can now recover from lost map outputs 2011-05-20 03:19:53 -04:00			`}`
More stuff 2011-02-27 17:27:12 -05:00			`}`

Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`def registerMapOutputs(shuffleId: Int, locs: Array[BlockManagerId], changeGeneration: Boolean = false) {`
			`bmAddresses.put(shuffleId, Array[BlockManagerId]() ++ locs)`
			`if (changeGeneration) {`
			`incrementGeneration()`
			`}`
More stuff 2011-02-27 17:27:12 -05:00			`}`
Scheduler can now recover from lost map outputs 2011-05-20 03:19:53 -04:00
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`def unregisterMapOutput(shuffleId: Int, mapId: Int, bmAddress: BlockManagerId) {`
			`var array = bmAddresses.get(shuffleId)`
Scheduler can now recover from lost map outputs 2011-05-20 03:19:53 -04:00			`if (array != null) {`
			`array.synchronized {`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`if (array(mapId) == bmAddress) {`
Scheduler can now recover from lost map outputs 2011-05-20 03:19:53 -04:00			`array(mapId) = null`
Code format. 2012-02-10 11:19:53 -05:00			`}`
Scheduler can now recover from lost map outputs 2011-05-20 03:19:53 -04:00			`}`
			`incrementGeneration()`
			`} else {`
			`throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID")`
			`}`
			`}`
More stuff 2011-02-27 17:27:12 -05:00
Scheduler can now recover from lost map outputs 2011-05-20 03:19:53 -04:00			`// Remembers which map output locations are currently being fetched on a worker`
Various fixes to get MesosScheduler working with new RDDs 2011-03-06 19:16:38 -05:00			`val fetching = new HashSet[Int]`

Scheduler can now recover from lost map outputs 2011-05-20 03:19:53 -04:00			`// Called on possibly remote nodes to get the server URIs for a given shuffle`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`def getServerAddresses(shuffleId: Int): Array[BlockManagerId] = {`
			`val locs = bmAddresses.get(shuffleId)`
Various fixes to get MesosScheduler working with new RDDs 2011-03-06 19:16:38 -05:00			`if (locs == null) {`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`logInfo("Don't have map outputs for shuffe " + shuffleId + ", fetching them")`
Various fixes to get MesosScheduler working with new RDDs 2011-03-06 19:16:38 -05:00			`fetching.synchronized {`
			`if (fetching.contains(shuffleId)) {`
			`// Someone else is fetching it; wait for them to be done`
			`while (fetching.contains(shuffleId)) {`
Code format. 2012-02-10 11:19:53 -05:00			`try {`
			`fetching.wait()`
			`} catch {`
			`case _ =>`
			`}`
Various fixes to get MesosScheduler working with new RDDs 2011-03-06 19:16:38 -05:00			`}`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`return bmAddresses.get(shuffleId)`
Various fixes to get MesosScheduler working with new RDDs 2011-03-06 19:16:38 -05:00			`} else {`
			`fetching += shuffleId`
			`}`
			`}`
			`// We won the race to fetch the output locs; do so`
			`logInfo("Doing the fetch; tracker actor = " + trackerActor)`
Upgraded to Akka 2 and fixed test execution (which was still parallel across projects). 2012-06-29 02:51:28 -04:00			`val fetched = askTracker(GetMapOutputLocations(shuffleId)).asInstanceOf[Array[BlockManagerId]]`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00
			`logInfo("Got the output locations")`
			`bmAddresses.put(shuffleId, fetched)`
Various fixes to get MesosScheduler working with new RDDs 2011-03-06 19:16:38 -05:00			`fetching.synchronized {`
			`fetching -= shuffleId`
			`fetching.notifyAll()`
			`}`
			`return fetched`
			`} else {`
			`return locs`
			`}`
More stuff 2011-02-27 17:27:12 -05:00			`}`
Fixed unit tests by making them clean up the SparkContext after use and thus clean up the various singletons (RDDCache, MapOutputTracker, etc). This isn't perfect yet (ideally we shouldn't use singleton objects at all) but we can fix that later. 2011-05-13 15:03:58 -04:00
			`def stop() {`
Upgraded to Akka 2 and fixed test execution (which was still parallel across projects). 2012-06-29 02:51:28 -04:00			`communicate(StopMapOutputTracker)`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`bmAddresses.clear()`
Fixed unit tests by making them clean up the SparkContext after use and thus clean up the various singletons (RDDCache, MapOutputTracker, etc). This isn't perfect yet (ideally we shouldn't use singleton objects at all) but we can fix that later. 2011-05-13 15:03:58 -04:00			`trackerActor = null`
			`}`
Scheduler can now recover from lost map outputs 2011-05-20 03:19:53 -04:00
			`// Called on master to increment the generation number`
			`def incrementGeneration() {`
			`generationLock.synchronized {`
			`generation += 1`
			`}`
			`}`

			`// Called on master or workers to get current generation number`
			`def getGeneration: Long = {`
			`generationLock.synchronized {`
			`return generation`
			`}`
			`}`

			`// Called on workers to update the generation number, potentially clearing old outputs`
			`// because of a fetch failure. (Each Mesos task calls this with the latest generation`
			`// number on the master at the time it was created.)`
			`def updateGeneration(newGen: Long) {`
			`generationLock.synchronized {`
			`if (newGen > generation) {`
			`logInfo("Updating generation to " + newGen + " and clearing cache")`
Merge in engine improvements from the Spark Streaming project, developed jointly with Tathagata Das and Haoyuan Li. This commit imports the changes and ports them to Mesos 0.9, but does not yet pass unit tests due to various classes not supporting a graceful stop() yet. 2012-06-07 03:25:47 -04:00			`bmAddresses = new ConcurrentHashMap[Int, Array[BlockManagerId]]`
Scheduler can now recover from lost map outputs 2011-05-20 03:19:53 -04:00			`generation = newGen`
			`}`
			`}`
			`}`
Fixed unit tests by making them clean up the SparkContext after use and thus clean up the various singletons (RDDCache, MapOutputTracker, etc). This isn't perfect yet (ideally we shouldn't use singleton objects at all) but we can fix that later. 2011-05-13 15:03:58 -04:00			`}`