[SPARK-35074][CORE] hardcoded configs move to config package

### What changes were proposed in this pull request? Currently spark.jars.xxx property keys (e.g. spark.jars.ivySettings and spark.jars.packages) are hardcoded in multiple places within Spark code across multiple modules. We should define them in config/package.scala and reference them in all other places. ### Why are the changes needed? improvement ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? no Closes #32746 from dgd-contributor/SPARK-35074_configs_should_be_moved_to_config_package.scala. Authored-by: dgd-contributor <dgd_contributor@viettel.com.vn> Signed-off-by: Thomas Graves <tgraves@apache.org>
2021-06-07 09:55:03 -05:00 · 2021-06-07 09:55:03 -05:00 · 6c3b7f92cf
parent 33f26275f4
commit 6c3b7f92cf
4 changed files with 77 additions and 19 deletions
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@ -588,7 +588,8 @@ private[spark] class SparkSubmit extends Logging {
      OptionAssigner(args.deployMode, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
        confKey = SUBMIT_DEPLOY_MODE.key),
      OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, confKey = "spark.app.name"),
-      OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT, confKey = "spark.jars.ivy"),
+      OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT,
+        confKey = JAR_IVY_REPO_PATH.key),
      OptionAssigner(args.driverMemory, ALL_CLUSTER_MGRS, CLIENT,
        confKey = DRIVER_MEMORY.key),
      OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
@ -605,13 +606,13 @@ private[spark] class SparkSubmit extends Logging {

      // Propagate attributes for dependency resolution at the driver side
      OptionAssigner(args.packages, STANDALONE | MESOS | KUBERNETES,
-        CLUSTER, confKey = "spark.jars.packages"),
+        CLUSTER, confKey = JAR_PACKAGES.key),
      OptionAssigner(args.repositories, STANDALONE | MESOS | KUBERNETES,
-        CLUSTER, confKey = "spark.jars.repositories"),
+        CLUSTER, confKey = JAR_REPOSITORIES.key),
      OptionAssigner(args.ivyRepoPath, STANDALONE | MESOS | KUBERNETES,
-        CLUSTER, confKey = "spark.jars.ivy"),
+        CLUSTER, confKey = JAR_IVY_REPO_PATH.key),
      OptionAssigner(args.packagesExclusions, STANDALONE | MESOS | KUBERNETES,
-        CLUSTER, confKey = "spark.jars.excludes"),
+        CLUSTER, confKey = JAR_PACKAGES_EXCLUSIONS.key),

      // Yarn only
      OptionAssigner(args.queue, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.queue"),
@ -646,7 +647,7 @@ private[spark] class SparkSubmit extends Logging {
        confKey = DRIVER_CORES.key),
      OptionAssigner(args.supervise.toString, STANDALONE | MESOS, CLUSTER,
        confKey = DRIVER_SUPERVISE.key),
-      OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, confKey = "spark.jars.ivy"),
+      OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, confKey = JAR_IVY_REPO_PATH.key),

      // An internal option used only for spark-shell to add user jars to repl's classloader,
      // previously it uses "spark.jars" or "spark.yarn.dist.jars" which now may be pointed to
@ -1299,7 +1300,7 @@ private[spark] object SparkSubmitUtils extends Logging {
    val file = Option(uri.getScheme).getOrElse("file") match {
      case "file" => new File(uri.getPath)
      case scheme => throw new IllegalArgumentException(s"Scheme $scheme not supported in " +
-        "spark.jars.ivySettings")
+        JAR_IVY_SETTING_PATH.key)
    }
    require(file.exists(), s"Ivy settings file $file does not exist")
    require(file.isFile(), s"Ivy settings file $file is not a normal file")
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@ -185,13 +185,13 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
    files = Option(files).orElse(sparkProperties.get(config.FILES.key)).orNull
    archives = Option(archives).orElse(sparkProperties.get(config.ARCHIVES.key)).orNull
    pyFiles = Option(pyFiles).orElse(sparkProperties.get(config.SUBMIT_PYTHON_FILES.key)).orNull
-    ivyRepoPath = sparkProperties.get("spark.jars.ivy").orNull
-    ivySettingsPath = sparkProperties.get("spark.jars.ivySettings")
-    packages = Option(packages).orElse(sparkProperties.get("spark.jars.packages")).orNull
+    ivyRepoPath = sparkProperties.get(config.JAR_IVY_REPO_PATH.key).orNull
+    ivySettingsPath = sparkProperties.get(config.JAR_IVY_SETTING_PATH.key)
+    packages = Option(packages).orElse(sparkProperties.get(config.JAR_PACKAGES.key)).orNull
    packagesExclusions = Option(packagesExclusions)
-      .orElse(sparkProperties.get("spark.jars.excludes")).orNull
+      .orElse(sparkProperties.get(config.JAR_PACKAGES_EXCLUSIONS.key)).orNull
    repositories = Option(repositories)
-      .orElse(sparkProperties.get("spark.jars.repositories")).orNull
+      .orElse(sparkProperties.get(config.JAR_REPOSITORIES.key)).orNull
    deployMode = Option(deployMode)
      .orElse(sparkProperties.get(config.SUBMIT_DEPLOY_MODE.key))
      .orElse(env.get("DEPLOY_MODE"))
@ -200,11 +200,11 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
      .getOrElse(sparkProperties.get(config.EXECUTOR_INSTANCES.key).orNull)
    queue = Option(queue).orElse(sparkProperties.get("spark.yarn.queue")).orNull
    keytab = Option(keytab)
-      .orElse(sparkProperties.get("spark.kerberos.keytab"))
+      .orElse(sparkProperties.get(config.KEYTAB.key))
      .orElse(sparkProperties.get("spark.yarn.keytab"))
      .orNull
    principal = Option(principal)
-      .orElse(sparkProperties.get("spark.kerberos.principal"))
+      .orElse(sparkProperties.get(config.PRINCIPAL.key))
      .orElse(sparkProperties.get("spark.yarn.principal"))
      .orNull
    dynamicAllocationEnabled =
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@ -2148,4 +2148,60 @@ package object config {
      // batch of block will be loaded in memory with memory mapping, which has higher overhead
      // with small MB sized chunk of data.
      .createWithDefaultString("3m")
+
+  private[spark] val JAR_IVY_REPO_PATH =
+    ConfigBuilder("spark.jars.ivy")
+      .doc("Path to specify the Ivy user directory, used for the local Ivy cache and " +
+        "package files from spark.jars.packages. " +
+        "This will override the Ivy property ivy.default.ivy.user.dir " +
+        "which defaults to ~/.ivy2.")
+      .version("1.3.0")
+      .stringConf
+      .createOptional
+
+  private[spark] val JAR_IVY_SETTING_PATH =
+    ConfigBuilder("spark.jars.ivySettings")
+      .doc("Path to an Ivy settings file to customize resolution of jars specified " +
+        "using spark.jars.packages instead of the built-in defaults, such as maven central. " +
+        "Additional repositories given by the command-line option --repositories " +
+        "or spark.jars.repositories will also be included. " +
+        "Useful for allowing Spark to resolve artifacts from behind a firewall " +
+        "e.g. via an in-house artifact server like Artifactory. " +
+        "Details on the settings file format can be found at Settings Files")
+      .version("2.2.0")
+      .stringConf
+      .createOptional
+
+  private[spark] val JAR_PACKAGES =
+    ConfigBuilder("spark.jars.packages")
+      .doc("Comma-separated list of Maven coordinates of jars to include " +
+        "on the driver and executor classpaths. The coordinates should be " +
+        "groupId:artifactId:version. If spark.jars.ivySettings is given artifacts " +
+        "will be resolved according to the configuration in the file, otherwise artifacts " +
+        "will be searched for in the local maven repo, then maven central and finally " +
+        "any additional remote repositories given by the command-line option --repositories. " +
+        "For more details, see Advanced Dependency Management.")
+      .version("1.5.0")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
+
+  private[spark] val JAR_PACKAGES_EXCLUSIONS =
+    ConfigBuilder("spark.jars.excludes")
+      .doc("Comma-separated list of groupId:artifactId, " +
+        "to exclude while resolving the dependencies provided in spark.jars.packages " +
+        "to avoid dependency conflicts.")
+      .version("1.5.0")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
+
+  private[spark] val JAR_REPOSITORIES =
+    ConfigBuilder("spark.jars.repositories")
+      .doc("Comma-separated list of additional remote repositories to search " +
+        "for the maven coordinates given with --packages or spark.jars.packages.")
+      .version("2.3.0")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
 }
--- a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
@ -27,6 +27,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.deploy.SparkSubmitUtils
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._

 case class IvyProperties(
    packagesExclusions: String,
@ -39,11 +40,11 @@ private[spark] object DependencyUtils extends Logging {

  def getIvyProperties(): IvyProperties = {
    val Seq(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath) = Seq(
-      "spark.jars.excludes",
-      "spark.jars.packages",
-      "spark.jars.repositories",
-      "spark.jars.ivy",
-      "spark.jars.ivySettings"
+      JAR_PACKAGES_EXCLUSIONS.key,
+      JAR_PACKAGES.key,
+      JAR_REPOSITORIES.key,
+      JAR_IVY_REPO_PATH.key,
+      JAR_IVY_SETTING_PATH.key
    ).map(sys.props.get(_).orNull)
    IvyProperties(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath)
  }