Merge pull request #526 from tgravescs/yarn_client_stop_am_fix. Closes #526.

spark on yarn - yarn-client mode doesn't always exit immediately

https://spark-project.atlassian.net/browse/SPARK-1049

If you run in the yarn-client mode but you don't get all the workers you requested right away and then you exit your application, the application master stays around until it gets the number of workers you initially requested. This is a waste of resources.  The AM should exit immediately upon the client going away.

This fix simply checks to see if the driver closed while its waiting for the initial # of workers.

Author: Thomas Graves <tgraves@apache.org>

== Merge branch commits ==

commit 03f40a62584b6bdd094ba91670cd4aa6afe7cd81
Author: Thomas Graves <tgraves@apache.org>
Date:   Fri Jan 31 11:23:10 2014 -0600

    spark on yarn - yarn-client mode doesn't always exit immediately
This commit is contained in:
Thomas Graves 2014-02-05 23:37:07 -08:00 committed by Reynold Xin
parent 18c4ee71e2
commit 38020961d1
2 changed files with 2 additions and 2 deletions

View file

@ -210,7 +210,7 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration, spar
// Wait until all containers have finished
// TODO: This is a bit ugly. Can we make it nicer?
// TODO: Handle container failure
while(yarnAllocator.getNumWorkersRunning < args.numWorkers) {
while ((yarnAllocator.getNumWorkersRunning < args.numWorkers) && (!driverClosed)) {
yarnAllocator.allocateContainers(math.max(args.numWorkers - yarnAllocator.getNumWorkersRunning, 0))
Thread.sleep(100)
}

View file

@ -193,7 +193,7 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration, spar
// TODO: Handle container failure
yarnAllocator.addResourceRequests(args.numWorkers)
while (yarnAllocator.getNumWorkersRunning < args.numWorkers) {
while ((yarnAllocator.getNumWorkersRunning < args.numWorkers) && (!driverClosed)) {
yarnAllocator.allocateResources()
Thread.sleep(100)
}