[SPARK-35074][CORE] hardcoded configs move to config package

### What changes were proposed in this pull request?
Currently spark.jars.xxx property keys (e.g. spark.jars.ivySettings and spark.jars.packages) are hardcoded in multiple places within Spark code across multiple modules. We should define them in config/package.scala and reference them in all other places.

### Why are the changes needed?
improvement

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
no

Closes #32746 from dgd-contributor/SPARK-35074_configs_should_be_moved_to_config_package.scala.

Authored-by: dgd-contributor <dgd_contributor@viettel.com.vn>
Signed-off-by: Thomas Graves <tgraves@apache.org>
This commit is contained in:
dgd-contributor 2021-06-07 09:55:03 -05:00 committed by Thomas Graves
parent 33f26275f4
commit 6c3b7f92cf
4 changed files with 77 additions and 19 deletions

View file

@ -588,7 +588,8 @@ private[spark] class SparkSubmit extends Logging {
OptionAssigner(args.deployMode, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
confKey = SUBMIT_DEPLOY_MODE.key),
OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, confKey = "spark.app.name"),
OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT, confKey = "spark.jars.ivy"),
OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT,
confKey = JAR_IVY_REPO_PATH.key),
OptionAssigner(args.driverMemory, ALL_CLUSTER_MGRS, CLIENT,
confKey = DRIVER_MEMORY.key),
OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
@ -605,13 +606,13 @@ private[spark] class SparkSubmit extends Logging {
// Propagate attributes for dependency resolution at the driver side
OptionAssigner(args.packages, STANDALONE | MESOS | KUBERNETES,
CLUSTER, confKey = "spark.jars.packages"),
CLUSTER, confKey = JAR_PACKAGES.key),
OptionAssigner(args.repositories, STANDALONE | MESOS | KUBERNETES,
CLUSTER, confKey = "spark.jars.repositories"),
CLUSTER, confKey = JAR_REPOSITORIES.key),
OptionAssigner(args.ivyRepoPath, STANDALONE | MESOS | KUBERNETES,
CLUSTER, confKey = "spark.jars.ivy"),
CLUSTER, confKey = JAR_IVY_REPO_PATH.key),
OptionAssigner(args.packagesExclusions, STANDALONE | MESOS | KUBERNETES,
CLUSTER, confKey = "spark.jars.excludes"),
CLUSTER, confKey = JAR_PACKAGES_EXCLUSIONS.key),
// Yarn only
OptionAssigner(args.queue, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.queue"),
@ -646,7 +647,7 @@ private[spark] class SparkSubmit extends Logging {
confKey = DRIVER_CORES.key),
OptionAssigner(args.supervise.toString, STANDALONE | MESOS, CLUSTER,
confKey = DRIVER_SUPERVISE.key),
OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, confKey = "spark.jars.ivy"),
OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, confKey = JAR_IVY_REPO_PATH.key),
// An internal option used only for spark-shell to add user jars to repl's classloader,
// previously it uses "spark.jars" or "spark.yarn.dist.jars" which now may be pointed to
@ -1299,7 +1300,7 @@ private[spark] object SparkSubmitUtils extends Logging {
val file = Option(uri.getScheme).getOrElse("file") match {
case "file" => new File(uri.getPath)
case scheme => throw new IllegalArgumentException(s"Scheme $scheme not supported in " +
"spark.jars.ivySettings")
JAR_IVY_SETTING_PATH.key)
}
require(file.exists(), s"Ivy settings file $file does not exist")
require(file.isFile(), s"Ivy settings file $file is not a normal file")

View file

@ -185,13 +185,13 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
files = Option(files).orElse(sparkProperties.get(config.FILES.key)).orNull
archives = Option(archives).orElse(sparkProperties.get(config.ARCHIVES.key)).orNull
pyFiles = Option(pyFiles).orElse(sparkProperties.get(config.SUBMIT_PYTHON_FILES.key)).orNull
ivyRepoPath = sparkProperties.get("spark.jars.ivy").orNull
ivySettingsPath = sparkProperties.get("spark.jars.ivySettings")
packages = Option(packages).orElse(sparkProperties.get("spark.jars.packages")).orNull
ivyRepoPath = sparkProperties.get(config.JAR_IVY_REPO_PATH.key).orNull
ivySettingsPath = sparkProperties.get(config.JAR_IVY_SETTING_PATH.key)
packages = Option(packages).orElse(sparkProperties.get(config.JAR_PACKAGES.key)).orNull
packagesExclusions = Option(packagesExclusions)
.orElse(sparkProperties.get("spark.jars.excludes")).orNull
.orElse(sparkProperties.get(config.JAR_PACKAGES_EXCLUSIONS.key)).orNull
repositories = Option(repositories)
.orElse(sparkProperties.get("spark.jars.repositories")).orNull
.orElse(sparkProperties.get(config.JAR_REPOSITORIES.key)).orNull
deployMode = Option(deployMode)
.orElse(sparkProperties.get(config.SUBMIT_DEPLOY_MODE.key))
.orElse(env.get("DEPLOY_MODE"))
@ -200,11 +200,11 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
.getOrElse(sparkProperties.get(config.EXECUTOR_INSTANCES.key).orNull)
queue = Option(queue).orElse(sparkProperties.get("spark.yarn.queue")).orNull
keytab = Option(keytab)
.orElse(sparkProperties.get("spark.kerberos.keytab"))
.orElse(sparkProperties.get(config.KEYTAB.key))
.orElse(sparkProperties.get("spark.yarn.keytab"))
.orNull
principal = Option(principal)
.orElse(sparkProperties.get("spark.kerberos.principal"))
.orElse(sparkProperties.get(config.PRINCIPAL.key))
.orElse(sparkProperties.get("spark.yarn.principal"))
.orNull
dynamicAllocationEnabled =

View file

@ -2148,4 +2148,60 @@ package object config {
// batch of block will be loaded in memory with memory mapping, which has higher overhead
// with small MB sized chunk of data.
.createWithDefaultString("3m")
private[spark] val JAR_IVY_REPO_PATH =
ConfigBuilder("spark.jars.ivy")
.doc("Path to specify the Ivy user directory, used for the local Ivy cache and " +
"package files from spark.jars.packages. " +
"This will override the Ivy property ivy.default.ivy.user.dir " +
"which defaults to ~/.ivy2.")
.version("1.3.0")
.stringConf
.createOptional
private[spark] val JAR_IVY_SETTING_PATH =
ConfigBuilder("spark.jars.ivySettings")
.doc("Path to an Ivy settings file to customize resolution of jars specified " +
"using spark.jars.packages instead of the built-in defaults, such as maven central. " +
"Additional repositories given by the command-line option --repositories " +
"or spark.jars.repositories will also be included. " +
"Useful for allowing Spark to resolve artifacts from behind a firewall " +
"e.g. via an in-house artifact server like Artifactory. " +
"Details on the settings file format can be found at Settings Files")
.version("2.2.0")
.stringConf
.createOptional
private[spark] val JAR_PACKAGES =
ConfigBuilder("spark.jars.packages")
.doc("Comma-separated list of Maven coordinates of jars to include " +
"on the driver and executor classpaths. The coordinates should be " +
"groupId:artifactId:version. If spark.jars.ivySettings is given artifacts " +
"will be resolved according to the configuration in the file, otherwise artifacts " +
"will be searched for in the local maven repo, then maven central and finally " +
"any additional remote repositories given by the command-line option --repositories. " +
"For more details, see Advanced Dependency Management.")
.version("1.5.0")
.stringConf
.toSequence
.createWithDefault(Nil)
private[spark] val JAR_PACKAGES_EXCLUSIONS =
ConfigBuilder("spark.jars.excludes")
.doc("Comma-separated list of groupId:artifactId, " +
"to exclude while resolving the dependencies provided in spark.jars.packages " +
"to avoid dependency conflicts.")
.version("1.5.0")
.stringConf
.toSequence
.createWithDefault(Nil)
private[spark] val JAR_REPOSITORIES =
ConfigBuilder("spark.jars.repositories")
.doc("Comma-separated list of additional remote repositories to search " +
"for the maven coordinates given with --packages or spark.jars.packages.")
.version("2.3.0")
.stringConf
.toSequence
.createWithDefault(Nil)
}

View file

@ -27,6 +27,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.deploy.SparkSubmitUtils
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._
case class IvyProperties(
packagesExclusions: String,
@ -39,11 +40,11 @@ private[spark] object DependencyUtils extends Logging {
def getIvyProperties(): IvyProperties = {
val Seq(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath) = Seq(
"spark.jars.excludes",
"spark.jars.packages",
"spark.jars.repositories",
"spark.jars.ivy",
"spark.jars.ivySettings"
JAR_PACKAGES_EXCLUSIONS.key,
JAR_PACKAGES.key,
JAR_REPOSITORIES.key,
JAR_IVY_REPO_PATH.key,
JAR_IVY_SETTING_PATH.key
).map(sys.props.get(_).orNull)
IvyProperties(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath)
}