[SPARK-17742][CORE] Fail launcher app handle if child process exits with error.

This is a follow up to cba826d0; that commit set the app handle state
to "LOST" when the child process exited, but that can be ambiguous. This
change sets the state to "FAILED" if the exit code was non-zero and
the handle state wasn't a failure state, or "LOST" if the exit status
was zero.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #19012 from vanzin/SPARK-17742.
This commit is contained in:
Marcelo Vanzin 2017-08-25 10:04:21 -07:00
parent 1813c4a8dd
commit 628bdeabda
2 changed files with 41 additions and 7 deletions

View file

@ -156,9 +156,15 @@ class ChildProcAppHandle implements SparkAppHandle {
* the exit code.
*/
void monitorChild() {
while (childProc.isAlive()) {
Process proc = childProc;
if (proc == null) {
// Process may have already been disposed of, e.g. by calling kill().
return;
}
while (proc.isAlive()) {
try {
childProc.waitFor();
proc.waitFor();
} catch (Exception e) {
LOG.log(Level.WARNING, "Exception waiting for child process to exit.", e);
}
@ -173,15 +179,24 @@ class ChildProcAppHandle implements SparkAppHandle {
int ec;
try {
ec = childProc.exitValue();
ec = proc.exitValue();
} catch (Exception e) {
LOG.log(Level.WARNING, "Exception getting child process exit code, assuming failure.", e);
ec = 1;
}
// Only override the success state; leave other fail states alone.
if (!state.isFinal() || (ec != 0 && state == State.FINISHED)) {
state = State.LOST;
State newState = null;
if (ec != 0) {
// Override state with failure if the current state is not final, or is success.
if (!state.isFinal() || state == State.FINISHED) {
newState = State.FAILED;
}
} else if (!state.isFinal()) {
newState = State.LOST;
}
if (newState != null) {
state = newState;
fireEvent(false);
}
}

View file

@ -46,7 +46,9 @@ public class ChildProcAppHandleSuite extends BaseSuite {
private static final List<String> TEST_SCRIPT = Arrays.asList(
"#!/bin/sh",
"echo \"output\"",
"echo \"error\" 1>&2");
"echo \"error\" 1>&2",
"while [ -n \"$1\" ]; do EC=$1; shift; done",
"exit $EC");
private static File TEST_SCRIPT_PATH;
@ -176,6 +178,7 @@ public class ChildProcAppHandleSuite extends BaseSuite {
@Test
public void testProcMonitorWithOutputRedirection() throws Exception {
assumeFalse(isWindows());
File err = Files.createTempFile("out", "txt").toFile();
SparkAppHandle handle = new TestSparkLauncher()
.redirectError()
@ -187,6 +190,7 @@ public class ChildProcAppHandleSuite extends BaseSuite {
@Test
public void testProcMonitorWithLogRedirection() throws Exception {
assumeFalse(isWindows());
SparkAppHandle handle = new TestSparkLauncher()
.redirectToLog(getClass().getName())
.startApplication();
@ -194,6 +198,16 @@ public class ChildProcAppHandleSuite extends BaseSuite {
assertEquals(SparkAppHandle.State.LOST, handle.getState());
}
@Test
public void testFailedChildProc() throws Exception {
assumeFalse(isWindows());
SparkAppHandle handle = new TestSparkLauncher(1)
.redirectToLog(getClass().getName())
.startApplication();
waitFor(handle);
assertEquals(SparkAppHandle.State.FAILED, handle.getState());
}
private void waitFor(SparkAppHandle handle) throws Exception {
long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(10);
try {
@ -212,7 +226,12 @@ public class ChildProcAppHandleSuite extends BaseSuite {
private static class TestSparkLauncher extends SparkLauncher {
TestSparkLauncher() {
this(0);
}
TestSparkLauncher(int ec) {
setAppResource("outputredirtest");
addAppArgs(String.valueOf(ec));
}
@Override