From 79a650494fe7030450bd5add81e34cb95fe9e883 Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Fri, 22 Feb 2019 11:15:20 -0800 Subject: [PATCH] [SPARK-26895][CORE] prepareSubmitEnvironment should be called within doAs for proxy users ## What changes were proposed in this pull request? `prepareSubmitEnvironment` performs globbing that will fail in the case where a proxy user (`--proxy-user`) doesn't have permission to the file. This is a bug also with 2.3, so we should backport, as currently you can't launch an application that for instance is passing a file under `--archives`, and that file is owned by the target user. The solution is to call `prepareSubmitEnvironment` within a doAs context if proxying. ## How was this patch tested? Manual tests running with `--proxy-user` and `--archives`, before and after, showing that the globbing is successful when the resource is owned by the target user. I've looked at writing unit tests, but I am not sure I can do that cleanly (perhaps with a custom FileSystem). Open to ideas. Please review http://spark.apache.org/contributing.html before opening a pull request. Closes #23806 from abellina/SPARK-26895_prepareSubmitEnvironment_from_doAs. Lead-authored-by: Alessandro Bellina Co-authored-by: Alessandro Bellina Signed-off-by: Marcelo Vanzin --- .../org/apache/spark/deploy/SparkSubmit.scala | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index d5e17ffb55..f4d9fe0663 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -131,17 +131,11 @@ private[spark] class SparkSubmit extends Logging { } /** - * Submit the application using the provided parameters. - * - * This runs in two steps. First, we prepare the launch environment by setting up - * the appropriate classpath, system properties, and application arguments for - * running the child main class based on the cluster manager and the deploy mode. - * Second, we use this launch environment to invoke the main method of the child - * main class. + * Submit the application using the provided parameters, ensuring to first wrap + * in a doAs when --proxy-user is specified. */ @tailrec private def submit(args: SparkSubmitArguments, uninitLog: Boolean): Unit = { - val (childArgs, childClasspath, sparkConf, childMainClass) = prepareSubmitEnvironment(args) def doRunMain(): Unit = { if (args.proxyUser != null) { @@ -150,7 +144,7 @@ private[spark] class SparkSubmit extends Logging { try { proxyUser.doAs(new PrivilegedExceptionAction[Unit]() { override def run(): Unit = { - runMain(childArgs, childClasspath, sparkConf, childMainClass, args.verbose) + runMain(args) } }) } catch { @@ -165,7 +159,7 @@ private[spark] class SparkSubmit extends Logging { } } } else { - runMain(childArgs, childClasspath, sparkConf, childMainClass, args.verbose) + runMain(args) } } @@ -774,18 +768,20 @@ private[spark] class SparkSubmit extends Logging { } /** - * Run the main method of the child class using the provided launch environment. + * Run the main method of the child class using the submit arguments. + * + * This runs in two steps. First, we prepare the launch environment by setting up + * the appropriate classpath, system properties, and application arguments for + * running the child main class based on the cluster manager and the deploy mode. + * Second, we use this launch environment to invoke the main method of the child + * main class. * * Note that this main class will not be the one provided by the user if we're * running cluster deploy mode or python applications. */ - private def runMain( - childArgs: Seq[String], - childClasspath: Seq[String], - sparkConf: SparkConf, - childMainClass: String, - verbose: Boolean): Unit = { - if (verbose) { + private def runMain(args: SparkSubmitArguments): Unit = { + val (childArgs, childClasspath, sparkConf, childMainClass) = prepareSubmitEnvironment(args) + if (args.verbose) { logInfo(s"Main class:\n$childMainClass") logInfo(s"Arguments:\n${childArgs.mkString("\n")}") // sysProps may contain sensitive information, so redact before printing