[SPARK-7841][BUILD] Stop using retrieveManaged to retrieve dependencies in SBT
This patch modifies Spark's SBT build so that it no longer uses `retrieveManaged` / `lib_managed` to store its dependencies. The motivations for this change are nicely described on the JIRA ticket ([SPARK-7841](https://issues.apache.org/jira/browse/SPARK-7841)); my personal interest in doing this stems from the fact that `lib_managed` has caused me some pain while debugging dependency issues in another PR of mine. Removing our use of `lib_managed` would be trivial except for one snag: the Datanucleus JARs, required by Spark SQL's Hive integration, cannot be included in assembly JARs due to problems with merging OSGI `plugin.xml` files. As a result, several places in the packaging and deployment pipeline assume that these Datanucleus JARs are copied to `lib_managed/jars`. In the interest of maintaining compatibility, I have chosen to retain the `lib_managed/jars` directory _only_ for these Datanucleus JARs and have added custom code to `SparkBuild.scala` to automatically copy those JARs to that folder as part of the `assembly` task. `dev/mima` also depended on `lib_managed` in a hacky way in order to set classpaths when generating MiMa excludes; I've updated this to obtain the classpaths directly from SBT instead. /cc dragos marmbrus pwendell srowen Author: Josh Rosen <joshrosen@databricks.com> Closes #9575 from JoshRosen/SPARK-7841.
This commit is contained in:
parent
a81f47ff74
commit
689386b1c6
2
dev/mima
2
dev/mima
|
@ -38,7 +38,7 @@ generate_mima_ignore() {
|
||||||
# it did not process the new classes (which are in assembly jar).
|
# it did not process the new classes (which are in assembly jar).
|
||||||
generate_mima_ignore
|
generate_mima_ignore
|
||||||
|
|
||||||
export SPARK_CLASSPATH="`find lib_managed \( -name '*spark*jar' -a -type f \) | tr "\\n" ":"`"
|
export SPARK_CLASSPATH="$(build/sbt "export oldDeps/fullClasspath" | tail -n1)"
|
||||||
echo "SPARK_CLASSPATH=$SPARK_CLASSPATH"
|
echo "SPARK_CLASSPATH=$SPARK_CLASSPATH"
|
||||||
|
|
||||||
generate_mima_ignore
|
generate_mima_ignore
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io._
|
import java.io._
|
||||||
|
import java.nio.file.Files
|
||||||
|
|
||||||
import scala.util.Properties
|
import scala.util.Properties
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
|
@ -135,8 +136,6 @@ object SparkBuild extends PomBuild {
|
||||||
.orElse(sys.props.get("java.home").map { p => new File(p).getParentFile().getAbsolutePath() })
|
.orElse(sys.props.get("java.home").map { p => new File(p).getParentFile().getAbsolutePath() })
|
||||||
.map(file),
|
.map(file),
|
||||||
incOptions := incOptions.value.withNameHashing(true),
|
incOptions := incOptions.value.withNameHashing(true),
|
||||||
retrieveManaged := true,
|
|
||||||
retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
|
|
||||||
publishMavenStyle := true,
|
publishMavenStyle := true,
|
||||||
unidocGenjavadocVersion := "0.9-spark0",
|
unidocGenjavadocVersion := "0.9-spark0",
|
||||||
|
|
||||||
|
@ -326,8 +325,6 @@ object OldDeps {
|
||||||
def oldDepsSettings() = Defaults.coreDefaultSettings ++ Seq(
|
def oldDepsSettings() = Defaults.coreDefaultSettings ++ Seq(
|
||||||
name := "old-deps",
|
name := "old-deps",
|
||||||
scalaVersion := "2.10.5",
|
scalaVersion := "2.10.5",
|
||||||
retrieveManaged := true,
|
|
||||||
retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
|
|
||||||
libraryDependencies := Seq("spark-streaming-mqtt", "spark-streaming-zeromq",
|
libraryDependencies := Seq("spark-streaming-mqtt", "spark-streaming-zeromq",
|
||||||
"spark-streaming-flume", "spark-streaming-kafka", "spark-streaming-twitter",
|
"spark-streaming-flume", "spark-streaming-kafka", "spark-streaming-twitter",
|
||||||
"spark-streaming", "spark-mllib", "spark-bagel", "spark-graphx",
|
"spark-streaming", "spark-mllib", "spark-bagel", "spark-graphx",
|
||||||
|
@ -404,6 +401,8 @@ object Assembly {
|
||||||
|
|
||||||
val hadoopVersion = taskKey[String]("The version of hadoop that spark is compiled against.")
|
val hadoopVersion = taskKey[String]("The version of hadoop that spark is compiled against.")
|
||||||
|
|
||||||
|
val deployDatanucleusJars = taskKey[Unit]("Deploy datanucleus jars to the spark/lib_managed/jars directory")
|
||||||
|
|
||||||
lazy val settings = assemblySettings ++ Seq(
|
lazy val settings = assemblySettings ++ Seq(
|
||||||
test in assembly := {},
|
test in assembly := {},
|
||||||
hadoopVersion := {
|
hadoopVersion := {
|
||||||
|
@ -429,7 +428,20 @@ object Assembly {
|
||||||
case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines
|
case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines
|
||||||
case "reference.conf" => MergeStrategy.concat
|
case "reference.conf" => MergeStrategy.concat
|
||||||
case _ => MergeStrategy.first
|
case _ => MergeStrategy.first
|
||||||
}
|
},
|
||||||
|
deployDatanucleusJars := {
|
||||||
|
val jars: Seq[File] = (fullClasspath in assembly).value.map(_.data)
|
||||||
|
.filter(_.getPath.contains("org.datanucleus"))
|
||||||
|
var libManagedJars = new File(BuildCommons.sparkHome, "lib_managed/jars")
|
||||||
|
libManagedJars.mkdirs()
|
||||||
|
jars.foreach { jar =>
|
||||||
|
val dest = new File(libManagedJars, jar.getName)
|
||||||
|
if (!dest.exists()) {
|
||||||
|
Files.copy(jar.toPath, dest.toPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
assembly <<= assembly.dependsOn(deployDatanucleusJars)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue