[SPARK-27219][CORE] Treat timeouts as fatal in SASL fallback path.

When a timeout happens we don't know what's the state of the remote end,
so there is no point in doing anything else since it will most probably
fail anyway.

The change also demotes the log message printed when falling back to
SASL, since a warning is too noisy for when the fallback is really
needed (e.g. old shuffle service, or shuffle service with new auth
disabled).

Closes #24160 from vanzin/SPARK-27219.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
Marcelo Vanzin 2019-03-24 21:49:54 -07:00 committed by Dongjoon Hyun
parent 84ec06d95e
commit db801cf3f2

View file

@ -20,6 +20,7 @@ package org.apache.spark.network.crypto;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.security.GeneralSecurityException;
import java.util.concurrent.TimeoutException;
import com.google.common.base.Throwables;
import io.netty.buffer.ByteBuf;
@ -82,13 +83,19 @@ public class AuthClientBootstrap implements TransportClientBootstrap {
} catch (RuntimeException e) {
// There isn't a good exception that can be caught here to know whether it's really
// OK to switch back to SASL (because the server doesn't speak the new protocol). So
// try it anyway, and in the worst case things will fail again.
if (conf.saslFallback()) {
LOG.warn("New auth protocol failed, trying SASL.", e);
doSaslAuth(client, channel);
} else {
// try it anyway, unless it's a timeout, which is locally fatal. In the worst case
// things will fail again.
if (!conf.saslFallback() || e.getCause() instanceof TimeoutException) {
throw e;
}
if (LOG.isDebugEnabled()) {
Throwable cause = e.getCause() != null ? e.getCause() : e;
LOG.debug("New auth protocol failed, trying SASL.", cause);
} else {
LOG.info("New auth protocol failed, trying SASL.");
}
doSaslAuth(client, channel);
}
}