[SPARK-29080][CORE][SPARKR] Support R file extension case-insensitively

### What changes were proposed in this pull request? Make r file extension check case insensitive for spark-submit. ### Why are the changes needed? spark-submit does not accept `.r` files as R scripts. Some codebases have r files that end with lowercase file extensions. It is inconvenient to use spark-submit with lowercase extension R files. The error is not very clear (https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala#L232). ``` $ ./bin/spark-submit examples/src/main/r/dataframe.r Exception in thread "main" org.apache.spark.SparkException: Cannot load main class from JAR file:/Users/dongjoon/APACHE/spark-release/spark-2.4.4-bin-hadoop2.7/examples/src/main/r/dataframe.r ``` ### Does this PR introduce any user-facing change? Yes. spark-submit can now be used to run R scripts with `.r` file extension. ### How was this patch tested? Manual. ``` $ mv examples/src/main/r/dataframe.R examples/src/main/r/dataframe.r $ ./bin/spark-submit examples/src/main/r/dataframe.r ``` Closes #25778 from Loquats/r-case. Authored-by: Andy Zhang <yue.zhang@databricks.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
2019-09-15 00:17:11 -07:00 · 2019-09-15 00:17:11 -07:00 · 956f6e988c
parent d334fee502
commit 956f6e988c
4 changed files with 6 additions and 4 deletions
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@ -1028,7 +1028,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
   * Return whether the given primary resource requires running R.
   */
  private[deploy] def isR(res: String): Boolean = {
-    res != null && res.endsWith(".R") || res == SPARKR_SHELL
+    res != null && (res.endsWith(".R") || res.endsWith(".r")) || res == SPARKR_SHELL
  }

  private[deploy] def isInternal(res: String): Boolean = {
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@ -348,7 +348,7 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
  }

  private List<String> buildSparkRCommand(Map<String, String> env) throws IOException {
-    if (!appArgs.isEmpty() && appArgs.get(0).endsWith(".R")) {
+    if (!appArgs.isEmpty() && (appArgs.get(0).endsWith(".R") || appArgs.get(0).endsWith(".r"))) {
      System.err.println(
        "Running R applications through 'sparkR' is not supported as of Spark 2.0.\n" +
        "Use ./bin/spark-submit <R file>");
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@ -703,7 +703,8 @@ private[spark] class ApplicationMaster(
      // of files to add to PYTHONPATH, which Client.scala already handles, so it's empty.
      userArgs = Seq(args.primaryPyFile, "") ++ userArgs
    }
-    if (args.primaryRFile != null && args.primaryRFile.endsWith(".R")) {
+    if (args.primaryRFile != null &&
+        (args.primaryRFile.endsWith(".R") || args.primaryRFile.endsWith(".r"))) {
      // TODO(davies): add R dependencies here
    }

--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@ -973,7 +973,8 @@ private[spark] class Client(
      } else {
        Utils.classForName("org.apache.spark.deploy.yarn.ExecutorLauncher").getName
      }
-    if (args.primaryRFile != null && args.primaryRFile.endsWith(".R")) {
+    if (args.primaryRFile != null &&
+        (args.primaryRFile.endsWith(".R") || args.primaryRFile.endsWith(".r"))) {
      args.userArgs = ArrayBuffer(args.primaryRFile) ++ args.userArgs
    }
    val userArgs = args.userArgs.flatMap { arg =>