[SPARK-29080][CORE][SPARKR] Support R file extension case-insensitively

### What changes were proposed in this pull request?

Make r file extension check case insensitive for spark-submit.

### Why are the changes needed?

spark-submit does not accept `.r` files as R scripts. Some codebases have r files that end with lowercase file extensions. It is inconvenient to use spark-submit with lowercase extension R files. The error is not very clear (https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala#L232).

```
$ ./bin/spark-submit examples/src/main/r/dataframe.r
Exception in thread "main" org.apache.spark.SparkException: Cannot load main class from JAR file:/Users/dongjoon/APACHE/spark-release/spark-2.4.4-bin-hadoop2.7/examples/src/main/r/dataframe.r
```

### Does this PR introduce any user-facing change?

Yes. spark-submit can now be used to run R scripts with `.r` file extension.

### How was this patch tested?

Manual.

```
$ mv examples/src/main/r/dataframe.R examples/src/main/r/dataframe.r
$ ./bin/spark-submit examples/src/main/r/dataframe.r
```

Closes #25778 from Loquats/r-case.

Authored-by: Andy Zhang <yue.zhang@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
Andy Zhang 2019-09-15 00:17:11 -07:00 committed by Dongjoon Hyun
parent d334fee502
commit 956f6e988c
4 changed files with 6 additions and 4 deletions

View file

@ -1028,7 +1028,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
* Return whether the given primary resource requires running R.
*/
private[deploy] def isR(res: String): Boolean = {
res != null && res.endsWith(".R") || res == SPARKR_SHELL
res != null && (res.endsWith(".R") || res.endsWith(".r")) || res == SPARKR_SHELL
}
private[deploy] def isInternal(res: String): Boolean = {

View file

@ -348,7 +348,7 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
}
private List<String> buildSparkRCommand(Map<String, String> env) throws IOException {
if (!appArgs.isEmpty() && appArgs.get(0).endsWith(".R")) {
if (!appArgs.isEmpty() && (appArgs.get(0).endsWith(".R") || appArgs.get(0).endsWith(".r"))) {
System.err.println(
"Running R applications through 'sparkR' is not supported as of Spark 2.0.\n" +
"Use ./bin/spark-submit <R file>");

View file

@ -703,7 +703,8 @@ private[spark] class ApplicationMaster(
// of files to add to PYTHONPATH, which Client.scala already handles, so it's empty.
userArgs = Seq(args.primaryPyFile, "") ++ userArgs
}
if (args.primaryRFile != null && args.primaryRFile.endsWith(".R")) {
if (args.primaryRFile != null &&
(args.primaryRFile.endsWith(".R") || args.primaryRFile.endsWith(".r"))) {
// TODO(davies): add R dependencies here
}

View file

@ -973,7 +973,8 @@ private[spark] class Client(
} else {
Utils.classForName("org.apache.spark.deploy.yarn.ExecutorLauncher").getName
}
if (args.primaryRFile != null && args.primaryRFile.endsWith(".R")) {
if (args.primaryRFile != null &&
(args.primaryRFile.endsWith(".R") || args.primaryRFile.endsWith(".r"))) {
args.userArgs = ArrayBuffer(args.primaryRFile) ++ args.userArgs
}
val userArgs = args.userArgs.flatMap { arg =>