2011-02-27 17:27:12 -05:00
|
|
|
package spark
|
|
|
|
|
|
|
|
import java.util.concurrent.ConcurrentHashMap
|
|
|
|
|
2011-02-27 22:15:52 -05:00
|
|
|
import scala.actors._
|
|
|
|
import scala.actors.Actor._
|
|
|
|
import scala.actors.remote._
|
2011-03-06 19:16:38 -05:00
|
|
|
import scala.collection.mutable.HashSet
|
2011-02-27 22:15:52 -05:00
|
|
|
|
2011-03-06 19:16:38 -05:00
|
|
|
sealed trait MapOutputTrackerMessage
|
|
|
|
case class GetMapOutputLocations(shuffleId: Int) extends MapOutputTrackerMessage
|
2011-05-13 15:03:58 -04:00
|
|
|
case object StopMapOutputTracker extends MapOutputTrackerMessage
|
2011-03-06 19:16:38 -05:00
|
|
|
|
2011-05-17 15:41:13 -04:00
|
|
|
class MapOutputTrackerActor(serverUris: ConcurrentHashMap[Int, Array[String]])
|
2011-03-06 19:16:38 -05:00
|
|
|
extends DaemonActor with Logging {
|
2011-02-27 22:15:52 -05:00
|
|
|
def act() {
|
2011-05-17 15:41:13 -04:00
|
|
|
val port = System.getProperty("spark.master.port").toInt
|
2011-02-27 22:15:52 -05:00
|
|
|
RemoteActor.alive(port)
|
|
|
|
RemoteActor.register('MapOutputTracker, self)
|
2011-03-06 15:16:38 -05:00
|
|
|
logInfo("Registered actor on port " + port)
|
2011-03-06 19:16:38 -05:00
|
|
|
|
|
|
|
loop {
|
|
|
|
react {
|
|
|
|
case GetMapOutputLocations(shuffleId: Int) =>
|
|
|
|
logInfo("Asked to get map output locations for shuffle " + shuffleId)
|
|
|
|
reply(serverUris.get(shuffleId))
|
2012-02-10 11:19:53 -05:00
|
|
|
|
2011-05-13 15:03:58 -04:00
|
|
|
case StopMapOutputTracker =>
|
|
|
|
reply('OK)
|
|
|
|
exit()
|
2011-03-06 19:16:38 -05:00
|
|
|
}
|
|
|
|
}
|
2011-02-27 22:15:52 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-05-17 15:41:13 -04:00
|
|
|
class MapOutputTracker(isMaster: Boolean) extends Logging {
|
2011-02-27 22:15:52 -05:00
|
|
|
var trackerActor: AbstractActor = null
|
2011-05-19 14:19:25 -04:00
|
|
|
|
2011-05-20 03:19:53 -04:00
|
|
|
private var serverUris = new ConcurrentHashMap[Int, Array[String]]
|
|
|
|
|
|
|
|
// Incremented every time a fetch fails so that client nodes know to clear
|
|
|
|
// their cache of map output locations if this happens.
|
|
|
|
private var generation: Long = 0
|
|
|
|
private var generationLock = new java.lang.Object
|
2011-02-27 22:15:52 -05:00
|
|
|
|
2011-05-17 15:41:13 -04:00
|
|
|
if (isMaster) {
|
|
|
|
val tracker = new MapOutputTrackerActor(serverUris)
|
2011-05-19 14:19:25 -04:00
|
|
|
tracker.start()
|
2011-05-17 15:41:13 -04:00
|
|
|
trackerActor = tracker
|
|
|
|
} else {
|
|
|
|
val host = System.getProperty("spark.master.host")
|
|
|
|
val port = System.getProperty("spark.master.port").toInt
|
|
|
|
trackerActor = RemoteActor.select(Node(host, port), 'MapOutputTracker)
|
2011-02-27 22:15:52 -05:00
|
|
|
}
|
2012-01-05 15:59:20 -05:00
|
|
|
|
|
|
|
def registerShuffle(shuffleId: Int, numMaps: Int) {
|
|
|
|
if (serverUris.get(shuffleId) != null) {
|
|
|
|
throw new IllegalArgumentException("Shuffle ID " + shuffleId + " registered twice")
|
|
|
|
}
|
|
|
|
serverUris.put(shuffleId, new Array[String](numMaps))
|
|
|
|
}
|
2011-02-27 22:15:52 -05:00
|
|
|
|
2012-01-05 15:59:20 -05:00
|
|
|
def registerMapOutput(shuffleId: Int, mapId: Int, serverUri: String) {
|
2011-02-27 17:27:12 -05:00
|
|
|
var array = serverUris.get(shuffleId)
|
2011-05-20 03:19:53 -04:00
|
|
|
array.synchronized {
|
|
|
|
array(mapId) = serverUri
|
|
|
|
}
|
2011-02-27 17:27:12 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
def registerMapOutputs(shuffleId: Int, locs: Array[String]) {
|
|
|
|
serverUris.put(shuffleId, Array[String]() ++ locs)
|
|
|
|
}
|
2011-05-20 03:19:53 -04:00
|
|
|
|
|
|
|
def unregisterMapOutput(shuffleId: Int, mapId: Int, serverUri: String) {
|
|
|
|
var array = serverUris.get(shuffleId)
|
|
|
|
if (array != null) {
|
|
|
|
array.synchronized {
|
2012-02-10 11:19:53 -05:00
|
|
|
if (array(mapId) == serverUri) {
|
2011-05-20 03:19:53 -04:00
|
|
|
array(mapId) = null
|
2012-02-10 11:19:53 -05:00
|
|
|
}
|
2011-05-20 03:19:53 -04:00
|
|
|
}
|
|
|
|
incrementGeneration()
|
|
|
|
} else {
|
|
|
|
throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID")
|
|
|
|
}
|
|
|
|
}
|
2011-02-27 17:27:12 -05:00
|
|
|
|
2011-05-20 03:19:53 -04:00
|
|
|
// Remembers which map output locations are currently being fetched on a worker
|
2011-03-06 19:16:38 -05:00
|
|
|
val fetching = new HashSet[Int]
|
|
|
|
|
2011-05-20 03:19:53 -04:00
|
|
|
// Called on possibly remote nodes to get the server URIs for a given shuffle
|
2011-02-27 17:27:12 -05:00
|
|
|
def getServerUris(shuffleId: Int): Array[String] = {
|
2011-03-06 19:16:38 -05:00
|
|
|
val locs = serverUris.get(shuffleId)
|
|
|
|
if (locs == null) {
|
|
|
|
logInfo("Don't have map outputs for " + shuffleId + ", fetching them")
|
|
|
|
fetching.synchronized {
|
|
|
|
if (fetching.contains(shuffleId)) {
|
|
|
|
// Someone else is fetching it; wait for them to be done
|
|
|
|
while (fetching.contains(shuffleId)) {
|
2012-02-10 11:19:53 -05:00
|
|
|
try {
|
|
|
|
fetching.wait()
|
|
|
|
} catch {
|
|
|
|
case _ =>
|
|
|
|
}
|
2011-03-06 19:16:38 -05:00
|
|
|
}
|
|
|
|
return serverUris.get(shuffleId)
|
|
|
|
} else {
|
|
|
|
fetching += shuffleId
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// We won the race to fetch the output locs; do so
|
|
|
|
logInfo("Doing the fetch; tracker actor = " + trackerActor)
|
|
|
|
val fetched = (trackerActor !? GetMapOutputLocations(shuffleId)).asInstanceOf[Array[String]]
|
|
|
|
serverUris.put(shuffleId, fetched)
|
|
|
|
fetching.synchronized {
|
|
|
|
fetching -= shuffleId
|
|
|
|
fetching.notifyAll()
|
|
|
|
}
|
|
|
|
return fetched
|
|
|
|
} else {
|
|
|
|
return locs
|
|
|
|
}
|
2011-02-27 17:27:12 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
def getMapOutputUri(serverUri: String, shuffleId: Int, mapId: Int, reduceId: Int): String = {
|
|
|
|
"%s/shuffle/%s/%s/%s".format(serverUri, shuffleId, mapId, reduceId)
|
|
|
|
}
|
2011-05-13 15:03:58 -04:00
|
|
|
|
|
|
|
def stop() {
|
|
|
|
trackerActor !? StopMapOutputTracker
|
|
|
|
serverUris.clear()
|
|
|
|
trackerActor = null
|
|
|
|
}
|
2011-05-20 03:19:53 -04:00
|
|
|
|
|
|
|
// Called on master to increment the generation number
|
|
|
|
def incrementGeneration() {
|
|
|
|
generationLock.synchronized {
|
|
|
|
generation += 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Called on master or workers to get current generation number
|
|
|
|
def getGeneration: Long = {
|
|
|
|
generationLock.synchronized {
|
|
|
|
return generation
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Called on workers to update the generation number, potentially clearing old outputs
|
|
|
|
// because of a fetch failure. (Each Mesos task calls this with the latest generation
|
|
|
|
// number on the master at the time it was created.)
|
|
|
|
def updateGeneration(newGen: Long) {
|
|
|
|
generationLock.synchronized {
|
|
|
|
if (newGen > generation) {
|
|
|
|
logInfo("Updating generation to " + newGen + " and clearing cache")
|
|
|
|
serverUris = new ConcurrentHashMap[Int, Array[String]]
|
|
|
|
generation = newGen
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2011-05-13 15:03:58 -04:00
|
|
|
}
|