[SPARK-1776] Have Spark's SBT build read dependencies from Maven.

Patch introduces the new way of working also retaining the existing ways of doing things.

For example build instruction for yarn in maven is
`mvn -Pyarn -PHadoop2.2 clean package -DskipTests`
in sbt it can become
`MAVEN_PROFILES="yarn, hadoop-2.2" sbt/sbt clean assembly`
Also supports
`sbt/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 clean assembly`

Author: Prashant Sharma <prashant.s@imaginea.com>
Author: Patrick Wendell <pwendell@gmail.com>

Closes #772 from ScrapCodes/sbt-maven and squashes the following commits:

a8ac951 [Prashant Sharma] Updated sbt version.
62b09bb [Prashant Sharma] Improvements.
fa6221d [Prashant Sharma] Excluding sql from mima
4b8875e [Prashant Sharma] Sbt assembly no longer builds tools by default.
72651ca [Prashant Sharma] Addresses code reivew comments.
acab73d [Prashant Sharma] Revert "Small fix to run-examples script."
ac4312c [Prashant Sharma] Revert "minor fix"
6af91ac [Prashant Sharma] Ported oldDeps back. + fixes issues with prev commit.
65cf06c [Prashant Sharma] Servelet API jars mess up with the other servlet jars on the class path.
446768e [Prashant Sharma] minor fix
89b9777 [Prashant Sharma] Merge conflicts
d0a02f2 [Prashant Sharma] Bumped up pom versions, Since the build now depends on pom it is better updated there. + general cleanups.
dccc8ac [Prashant Sharma] updated mima to check against 1.0
a49c61b [Prashant Sharma] Fix for tools jar
a2f5ae1 [Prashant Sharma] Fixes a bug in dependencies.
cf88758 [Prashant Sharma] cleanup
9439ea3 [Prashant Sharma] Small fix to run-examples script.
96cea1f [Prashant Sharma] SPARK-1776 Have Spark's SBT build read dependencies from Maven.
36efa62 [Patrick Wendell] Set project name in pom files and added eclipse/intellij plugins.
4973dbd [Patrick Wendell] Example build using pom reader.
This commit is contained in:
Prashant Sharma 2014-07-10 11:03:37 -07:00 committed by Patrick Wendell
parent c2babc089b
commit 628932b8d0
32 changed files with 333 additions and 614 deletions

View file

@ -32,6 +32,7 @@
<packaging>pom</packaging>
<properties>
<sbt.project.name>assembly</sbt.project.name>
<spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
<spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
<spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>

View file

@ -27,6 +27,9 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-bagel_2.10</artifactId>
<properties>
<sbt.project.name>bagel</sbt.project.name>
</properties>
<packaging>jar</packaging>
<name>Spark Project Bagel</name>
<url>http://spark.apache.org/</url>

View file

@ -110,9 +110,9 @@ export JAVA_OPTS
TOOLS_DIR="$FWDIR"/tools
SPARK_TOOLS_JAR=""
if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then
if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar ]; then
# Use the JAR from the SBT build
export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar`
export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar`
fi
if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then
# Use the JAR from the Maven build

View file

@ -27,6 +27,9 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<properties>
<sbt.project.name>core</sbt.project.name>
</properties>
<packaging>jar</packaging>
<name>Spark Project Core</name>
<url>http://spark.apache.org/</url>

View file

@ -66,10 +66,10 @@ echo "========================================================================="
# (either resolution or compilation) prompts the user for input either q, r,
# etc to quit or retry. This echo is there to make it not block.
if [ -n "$_RUN_SQL_TESTS" ]; then
echo -e "q\n" | SPARK_HIVE=true sbt/sbt clean assembly test | \
echo -e "q\n" | SPARK_HIVE=true sbt/sbt clean package assembly/assembly test | \
grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
else
echo -e "q\n" | sbt/sbt clean assembly test | \
echo -e "q\n" | sbt/sbt clean package assembly/assembly test | \
grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
fi

View file

@ -27,6 +27,9 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-examples_2.10</artifactId>
<properties>
<sbt.project.name>examples</sbt.project.name>
</properties>
<packaging>jar</packaging>
<name>Spark Project Examples</name>
<url>http://spark.apache.org/</url>

View file

@ -27,6 +27,9 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-flume_2.10</artifactId>
<properties>
<sbt.project.name>streaming-flume</sbt.project.name>
</properties>
<packaging>jar</packaging>
<name>Spark Project External Flume</name>
<url>http://spark.apache.org/</url>

View file

@ -27,6 +27,9 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka_2.10</artifactId>
<properties>
<sbt.project.name>streaming-kafka</sbt.project.name>
</properties>
<packaging>jar</packaging>
<name>Spark Project External Kafka</name>
<url>http://spark.apache.org/</url>

View file

@ -27,6 +27,9 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-mqtt_2.10</artifactId>
<properties>
<sbt.project.name>streaming-mqtt</sbt.project.name>
</properties>
<packaging>jar</packaging>
<name>Spark Project External MQTT</name>
<url>http://spark.apache.org/</url>

View file

@ -27,6 +27,9 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-twitter_2.10</artifactId>
<properties>
<sbt.project.name>streaming-twitter</sbt.project.name>
</properties>
<packaging>jar</packaging>
<name>Spark Project External Twitter</name>
<url>http://spark.apache.org/</url>

View file

@ -27,6 +27,9 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-zeromq_2.10</artifactId>
<properties>
<sbt.project.name>streaming-zeromq</sbt.project.name>
</properties>
<packaging>jar</packaging>
<name>Spark Project External ZeroMQ</name>
<url>http://spark.apache.org/</url>

View file

@ -29,6 +29,10 @@
<packaging>pom</packaging>
<name>Spark Project Java8 Tests POM</name>
<properties>
<sbt.project.name>java8-tests</sbt.project.name>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>

View file

@ -30,6 +30,10 @@
<packaging>jar</packaging>
<name>Spark Ganglia Integration</name>
<properties>
<sbt.project.name>ganglia-lgpl</sbt.project.name>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>

View file

@ -27,6 +27,9 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_2.10</artifactId>
<properties>
<sbt.project.name>graphx</sbt.project.name>
</properties>
<packaging>jar</packaging>
<name>Spark Project GraphX</name>
<url>http://spark.apache.org/</url>

View file

@ -27,6 +27,9 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.10</artifactId>
<properties>
<sbt.project.name>mllib</sbt.project.name>
</properties>
<packaging>jar</packaging>
<name>Spark Project ML Library</name>
<url>http://spark.apache.org/</url>

10
pom.xml
View file

@ -110,7 +110,7 @@
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<java.version>1.6</java.version>
<sbt.project.name>spark</sbt.project.name>
<scala.version>2.10.4</scala.version>
<scala.binary.version>2.10</scala.binary.version>
<mesos.version>0.18.1</mesos.version>
@ -535,6 +535,10 @@
<groupId>org.mortbay.jetty</groupId>
<artifactId>servlet-api-2.5</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
@ -618,6 +622,10 @@
<artifactId>hadoop-yarn-api</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>

View file

@ -15,13 +15,16 @@
* limitations under the License.
*/
import sbt._
import sbt.Keys.version
import com.typesafe.tools.mima.core._
import com.typesafe.tools.mima.core.MissingClassProblem
import com.typesafe.tools.mima.core.MissingTypesProblem
import com.typesafe.tools.mima.core.ProblemFilters._
import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact}
import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
import sbt._
object MimaBuild {
@ -53,7 +56,7 @@ object MimaBuild {
excludePackage("org.apache.spark." + packageName)
}
def ignoredABIProblems(base: File) = {
def ignoredABIProblems(base: File, currentSparkVersion: String) = {
// Excludes placed here will be used for all Spark versions
val defaultExcludes = Seq()
@ -77,11 +80,16 @@ object MimaBuild {
}
defaultExcludes ++ ignoredClasses.flatMap(excludeClass) ++
ignoredMembers.flatMap(excludeMember) ++ MimaExcludes.excludes
ignoredMembers.flatMap(excludeMember) ++ MimaExcludes.excludes(currentSparkVersion)
}
def mimaSettings(sparkHome: File, projectRef: ProjectRef) = {
val organization = "org.apache.spark"
val previousSparkVersion = "1.0.0"
val fullId = "spark-" + projectRef.project + "_2.10"
mimaDefaultSettings ++
Seq(previousArtifact := Some(organization % fullId % previousSparkVersion),
binaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value))
}
def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq(
previousArtifact := None,
binaryIssueFilters ++= ignoredABIProblems(sparkHome)
)
}

View file

@ -31,8 +31,8 @@ import com.typesafe.tools.mima.core._
* MimaBuild.excludeSparkClass("graphx.util.collection.GraphXPrimitiveKeyOpenHashMap")
*/
object MimaExcludes {
val excludes =
SparkBuild.SPARK_VERSION match {
def excludes(version: String) =
version match {
case v if v.startsWith("1.1") =>
Seq(
MimaBuild.excludeSparkPackage("deploy"),

View file

@ -15,523 +15,158 @@
* limitations under the License.
*/
import sbt._
import sbt.Classpaths.publishTask
import sbt.Keys._
import sbtassembly.Plugin._
import AssemblyKeys._
import scala.util.Properties
import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings}
import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact
import sbtunidoc.Plugin._
import UnidocKeys._
import scala.collection.JavaConversions._
// For Sonatype publishing
// import com.jsuereth.pgp.sbtplugin.PgpKeys._
import sbt._
import sbt.Keys._
import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings}
import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys}
import net.virtualvoid.sbt.graph.Plugin.graphSettings
object SparkBuild extends Build {
val SPARK_VERSION = "1.1.0-SNAPSHOT"
val SPARK_VERSION_SHORT = SPARK_VERSION.replaceAll("-SNAPSHOT", "")
object BuildCommons {
// Hadoop version to build against. For example, "1.0.4" for Apache releases, or
// "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set
// through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN.
val DEFAULT_HADOOP_VERSION = "1.0.4"
private val buildLocation = file(".").getAbsoluteFile.getParentFile
// Whether the Hadoop version to build against is 2.2.x, or a variant of it. This can be set
// through the SPARK_IS_NEW_HADOOP environment variable.
val DEFAULT_IS_NEW_HADOOP = false
val allProjects@Seq(bagel, catalyst, core, graphx, hive, mllib, repl, spark, sql, streaming,
streamingFlume, streamingKafka, streamingMqtt, streamingTwitter, streamingZeromq) =
Seq("bagel", "catalyst", "core", "graphx", "hive", "mllib", "repl", "spark", "sql",
"streaming", "streaming-flume", "streaming-kafka", "streaming-mqtt", "streaming-twitter",
"streaming-zeromq").map(ProjectRef(buildLocation, _))
val DEFAULT_YARN = false
val optionallyEnabledProjects@Seq(yarn, yarnStable, yarnAlpha, java8Tests, sparkGangliaLgpl) =
Seq("yarn", "yarn-stable", "yarn-alpha", "java8-tests", "ganglia-lgpl")
.map(ProjectRef(buildLocation, _))
val DEFAULT_HIVE = false
val assemblyProjects@Seq(assembly, examples) = Seq("assembly", "examples")
.map(ProjectRef(buildLocation, _))
// HBase version; set as appropriate.
val HBASE_VERSION = "0.94.6"
val tools = "tools"
// Target JVM version
val SCALAC_JVM_VERSION = "jvm-1.6"
val JAVAC_JVM_VERSION = "1.6"
val sparkHome = buildLocation
}
lazy val root = Project("root", file("."), settings = rootSettings) aggregate(allProjects: _*)
object SparkBuild extends PomBuild {
lazy val core = Project("core", file("core"), settings = coreSettings)
import BuildCommons._
import scala.collection.mutable.Map
val projectsMap: Map[String, Seq[Setting[_]]] = Map.empty
// Provides compatibility for older versions of the Spark build
def backwardCompatibility = {
import scala.collection.mutable
var isAlphaYarn = false
var profiles: mutable.Seq[String] = mutable.Seq.empty
if (Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined) {
println("NOTE: SPARK_GANGLIA_LGPL is deprecated, please use -Pganglia-lgpl flag.")
profiles ++= Seq("spark-ganglia-lgpl")
}
if (Properties.envOrNone("SPARK_HIVE").isDefined) {
println("NOTE: SPARK_HIVE is deprecated, please use -Phive flag.")
profiles ++= Seq("hive")
}
Properties.envOrNone("SPARK_HADOOP_VERSION") match {
case Some(v) =>
if (v.matches("0.23.*")) isAlphaYarn = true
println("NOTE: SPARK_HADOOP_VERSION is deprecated, please use -Dhadoop.version=" + v)
System.setProperty("hadoop.version", v)
case None =>
}
if (Properties.envOrNone("SPARK_YARN").isDefined) {
if(isAlphaYarn) {
println("NOTE: SPARK_YARN is deprecated, please use -Pyarn-alpha flag.")
profiles ++= Seq("yarn-alpha")
}
else {
println("NOTE: SPARK_YARN is deprecated, please use -Pyarn flag.")
profiles ++= Seq("yarn")
}
}
profiles
}
override val profiles = Properties.envOrNone("MAVEN_PROFILES") match {
case None => backwardCompatibility
// Rationale: If -P option exists no need to support backwardCompatibility.
case Some(v) =>
if (backwardCompatibility.nonEmpty)
println("Note: We ignore environment variables, when use of profile is detected in " +
"conjunction with environment variable.")
v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq
}
override val userPropertiesMap = System.getProperties.toMap
lazy val sharedSettings = graphSettings ++ ScalaStyleSettings ++ Seq (
javaHome := Properties.envOrNone("JAVA_HOME").map(file),
incOptions := incOptions.value.withNameHashing(true),
retrieveManaged := true,
retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
publishMavenStyle := true
)
/** Following project only exists to pull previous artifacts of Spark for generating
Mima ignores. For more information see: SPARK 2071 */
lazy val oldDeps = Project("oldDeps", file("dev"), settings = oldDepsSettings)
def replDependencies = Seq[ProjectReference](core, graphx, bagel, mllib, sql) ++ maybeHiveRef
lazy val repl = Project("repl", file("repl"), settings = replSettings)
.dependsOn(replDependencies.map(a => a: sbt.ClasspathDep[sbt.ProjectReference]): _*)
lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming)
lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn(core)
lazy val graphx = Project("graphx", file("graphx"), settings = graphxSettings) dependsOn(core)
lazy val catalyst = Project("catalyst", file("sql/catalyst"), settings = catalystSettings) dependsOn(core)
lazy val sql = Project("sql", file("sql/core"), settings = sqlCoreSettings) dependsOn(core) dependsOn(catalyst % "compile->compile;test->test")
lazy val hive = Project("hive", file("sql/hive"), settings = hiveSettings) dependsOn(sql)
lazy val maybeHive: Seq[ClasspathDependency] = if (isHiveEnabled) Seq(hive) else Seq()
lazy val maybeHiveRef: Seq[ProjectReference] = if (isHiveEnabled) Seq(hive) else Seq()
lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn(core)
lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core)
lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings)
.dependsOn(core, graphx, bagel, mllib, streaming, repl, sql) dependsOn(maybeYarn: _*) dependsOn(maybeHive: _*) dependsOn(maybeGanglia: _*)
lazy val assembleDepsTask = TaskKey[Unit]("assemble-deps")
lazy val assembleDeps = assembleDepsTask := {
println()
println("**** NOTE ****")
println("'sbt/sbt assemble-deps' is no longer supported.")
println("Instead create a normal assembly and:")
println(" export SPARK_PREPEND_CLASSES=1 (toggle on)")
println(" unset SPARK_PREPEND_CLASSES (toggle off)")
println()
def versionArtifact(id: String): Option[sbt.ModuleID] = {
val fullId = id + "_2.10"
Some("org.apache.spark" % fullId % "1.0.0")
}
// A configuration to set an alternative publishLocalConfiguration
lazy val MavenCompile = config("m2r") extend(Compile)
lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy")
val sparkHome = System.getProperty("user.dir")
// Allows build configuration to be set through environment variables
lazy val hadoopVersion = Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION)
lazy val isNewHadoop = Properties.envOrNone("SPARK_IS_NEW_HADOOP") match {
case None => {
val isNewHadoopVersion = "^2\\.[2-9]+".r.findFirstIn(hadoopVersion).isDefined
(isNewHadoopVersion|| DEFAULT_IS_NEW_HADOOP)
}
case Some(v) => v.toBoolean
}
lazy val isYarnEnabled = Properties.envOrNone("SPARK_YARN") match {
case None => DEFAULT_YARN
case Some(v) => v.toBoolean
}
lazy val hadoopClient = if (hadoopVersion.startsWith("0.20.") || hadoopVersion == "1.0.0") "hadoop-core" else "hadoop-client"
val maybeAvro = if (hadoopVersion.startsWith("0.23.")) Seq("org.apache.avro" % "avro" % "1.7.4") else Seq()
lazy val isHiveEnabled = Properties.envOrNone("SPARK_HIVE") match {
case None => DEFAULT_HIVE
case Some(v) => v.toBoolean
}
// Include Ganglia integration if the user has enabled Ganglia
// This is isolated from the normal build due to LGPL-licensed code in the library
lazy val isGangliaEnabled = Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined
lazy val gangliaProj = Project("spark-ganglia-lgpl", file("extras/spark-ganglia-lgpl"), settings = gangliaSettings).dependsOn(core)
val maybeGanglia: Seq[ClasspathDependency] = if (isGangliaEnabled) Seq(gangliaProj) else Seq()
val maybeGangliaRef: Seq[ProjectReference] = if (isGangliaEnabled) Seq(gangliaProj) else Seq()
// Include the Java 8 project if the JVM version is 8+
lazy val javaVersion = System.getProperty("java.specification.version")
lazy val isJava8Enabled = javaVersion.toDouble >= "1.8".toDouble
val maybeJava8Tests = if (isJava8Enabled) Seq[ProjectReference](java8Tests) else Seq[ProjectReference]()
lazy val java8Tests = Project("java8-tests", file("extras/java8-tests"), settings = java8TestsSettings).
dependsOn(core) dependsOn(streaming % "compile->compile;test->test")
// Include the YARN project if the user has enabled YARN
lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core)
lazy val yarn = Project("yarn", file("yarn/stable"), settings = yarnSettings) dependsOn(core)
lazy val maybeYarn: Seq[ClasspathDependency] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq()
lazy val maybeYarnRef: Seq[ProjectReference] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq()
lazy val externalTwitter = Project("external-twitter", file("external/twitter"), settings = twitterSettings)
.dependsOn(streaming % "compile->compile;test->test")
lazy val externalKafka = Project("external-kafka", file("external/kafka"), settings = kafkaSettings)
.dependsOn(streaming % "compile->compile;test->test")
lazy val externalFlume = Project("external-flume", file("external/flume"), settings = flumeSettings)
.dependsOn(streaming % "compile->compile;test->test")
lazy val externalZeromq = Project("external-zeromq", file("external/zeromq"), settings = zeromqSettings)
.dependsOn(streaming % "compile->compile;test->test")
lazy val externalMqtt = Project("external-mqtt", file("external/mqtt"), settings = mqttSettings)
.dependsOn(streaming % "compile->compile;test->test")
lazy val allExternal = Seq[ClasspathDependency](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt)
lazy val allExternalRefs = Seq[ProjectReference](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt)
lazy val examples = Project("examples", file("examples"), settings = examplesSettings)
.dependsOn(core, mllib, graphx, bagel, streaming, hive) dependsOn(allExternal: _*)
// Everything except assembly, hive, tools, java8Tests and examples belong to packageProjects
lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx, catalyst, sql) ++ maybeYarnRef ++ maybeHiveRef ++ maybeGangliaRef
lazy val allProjects = packageProjects ++ allExternalRefs ++
Seq[ProjectReference](examples, tools, assemblyProj) ++ maybeJava8Tests
def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings(file(sparkHome)) ++ Seq(
organization := "org.apache.spark",
version := SPARK_VERSION,
def oldDepsSettings() = Defaults.defaultSettings ++ Seq(
name := "old-deps",
scalaVersion := "2.10.4",
scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation", "-feature",
"-target:" + SCALAC_JVM_VERSION),
javacOptions := Seq("-target", JAVAC_JVM_VERSION, "-source", JAVAC_JVM_VERSION),
unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath },
retrieveManaged := true,
javaHome := Properties.envOrNone("JAVA_HOME").map(file),
// This is to add convenience of enabling sbt -Dsbt.offline=true for making the build offline.
offline := "true".equalsIgnoreCase(sys.props("sbt.offline")),
retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
transitiveClassifiers in Scope.GlobalScope := Seq("sources"),
testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))),
incOptions := incOptions.value.withNameHashing(true),
// Fork new JVMs for tests and set Java options for those
fork := true,
javaOptions in Test += "-Dspark.home=" + sparkHome,
javaOptions in Test += "-Dspark.testing=1",
javaOptions in Test += "-Dsun.io.serialization.extendedDebugInfo=true",
javaOptions in Test ++= System.getProperties.filter(_._1 startsWith "spark").map { case (k,v) => s"-D$k=$v" }.toSeq,
javaOptions in Test ++= "-Xmx3g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g".split(" ").toSeq,
javaOptions += "-Xmx3g",
// Show full stack trace and duration in test cases.
testOptions in Test += Tests.Argument("-oDF"),
// Remove certain packages from Scaladoc
scalacOptions in (Compile, doc) := Seq(
"-groups",
"-skip-packages", Seq(
"akka",
"org.apache.spark.api.python",
"org.apache.spark.network",
"org.apache.spark.deploy",
"org.apache.spark.util.collection"
).mkString(":"),
"-doc-title", "Spark " + SPARK_VERSION_SHORT + " ScalaDoc"
),
libraryDependencies := Seq("spark-streaming-mqtt", "spark-streaming-zeromq",
"spark-streaming-flume", "spark-streaming-kafka", "spark-streaming-twitter",
"spark-streaming", "spark-mllib", "spark-bagel", "spark-graphx",
"spark-core").map(versionArtifact(_).get intransitive())
)
// Only allow one test at a time, even across projects, since they run in the same JVM
concurrentRestrictions in Global += Tags.limit(Tags.Test, 1),
resolvers ++= Seq(
// HTTPS is unavailable for Maven Central
"Maven Repository" at "http://repo.maven.apache.org/maven2",
"Apache Repository" at "https://repository.apache.org/content/repositories/releases",
"JBoss Repository" at "https://repository.jboss.org/nexus/content/repositories/releases/",
"MQTT Repository" at "https://repo.eclipse.org/content/repositories/paho-releases/",
"Cloudera Repository" at "http://repository.cloudera.com/artifactory/cloudera-repos/",
"Pivotal Repository" at "http://repo.spring.io/libs-release/",
// For Sonatype publishing
// "sonatype-snapshots" at "https://oss.sonatype.org/content/repositories/snapshots",
// "sonatype-staging" at "https://oss.sonatype.org/service/local/staging/deploy/maven2/",
// also check the local Maven repository ~/.m2
Resolver.mavenLocal
),
publishMavenStyle := true,
// useGpg in Global := true,
pomExtra := (
<parent>
<groupId>org.apache</groupId>
<artifactId>apache</artifactId>
<version>14</version>
</parent>
<url>http://spark.apache.org/</url>
<licenses>
<license>
<name>Apache 2.0 License</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.html</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<connection>scm:git:git@github.com:apache/spark.git</connection>
<url>scm:git:git@github.com:apache/spark.git</url>
</scm>
<developers>
<developer>
<id>matei</id>
<name>Matei Zaharia</name>
<email>matei.zaharia@gmail.com</email>
<url>http://www.cs.berkeley.edu/~matei</url>
<organization>Apache Software Foundation</organization>
<organizationUrl>http://spark.apache.org</organizationUrl>
</developer>
</developers>
<issueManagement>
<system>JIRA</system>
<url>https://issues.apache.org/jira/browse/SPARK</url>
</issueManagement>
),
/*
publishTo <<= version { (v: String) =>
val nexus = "https://oss.sonatype.org/"
if (v.trim.endsWith("SNAPSHOT"))
Some("sonatype-snapshots" at nexus + "content/repositories/snapshots")
else
Some("sonatype-staging" at nexus + "service/local/staging/deploy/maven2")
},
*/
libraryDependencies ++= Seq(
"io.netty" % "netty-all" % "4.0.17.Final",
"org.eclipse.jetty" % "jetty-server" % jettyVersion,
"org.eclipse.jetty" % "jetty-util" % jettyVersion,
"org.eclipse.jetty" % "jetty-plus" % jettyVersion,
"org.eclipse.jetty" % "jetty-security" % jettyVersion,
"org.scalatest" %% "scalatest" % "2.1.5" % "test",
"org.scalacheck" %% "scalacheck" % "1.11.3" % "test",
"com.novocode" % "junit-interface" % "0.10" % "test",
"org.easymock" % "easymockclassextension" % "3.1" % "test",
"org.mockito" % "mockito-all" % "1.9.0" % "test",
"junit" % "junit" % "4.10" % "test",
// Needed by cglib which is needed by easymock.
"asm" % "asm" % "3.3.1" % "test"
),
testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
parallelExecution := true,
/* Workaround for issue #206 (fixed after SBT 0.11.0) */
watchTransitiveSources <<= Defaults.inDependencies[Task[Seq[File]]](watchSources.task,
const(std.TaskExtra.constant(Nil)), aggregate = true, includeRoot = true) apply { _.join.map(_.flatten) },
otherResolvers := Seq(Resolver.file("dotM2", file(Path.userHome + "/.m2/repository"))),
publishLocalConfiguration in MavenCompile <<= (packagedArtifacts, deliverLocal, ivyLoggingLevel) map {
(arts, _, level) => new PublishConfiguration(None, "dotM2", arts, Seq(), level)
},
publishMavenStyle in MavenCompile := true,
publishLocal in MavenCompile <<= publishTask(publishLocalConfiguration in MavenCompile, deliverLocal),
publishLocalBoth <<= Seq(publishLocal in MavenCompile, publishLocal).dependOn
) ++ net.virtualvoid.sbt.graph.Plugin.graphSettings ++ ScalaStyleSettings ++ genjavadocSettings
val akkaVersion = "2.2.3-shaded-protobuf"
val chillVersion = "0.3.6"
val codahaleMetricsVersion = "3.0.0"
val jblasVersion = "1.2.3"
val jets3tVersion = if ("^2\\.[3-9]+".r.findFirstIn(hadoopVersion).isDefined) "0.9.0" else "0.7.1"
val jettyVersion = "8.1.14.v20131031"
val hiveVersion = "0.12.0"
val parquetVersion = "1.4.3"
val slf4jVersion = "1.7.5"
val excludeJBossNetty = ExclusionRule(organization = "org.jboss.netty")
val excludeIONetty = ExclusionRule(organization = "io.netty")
val excludeEclipseJetty = ExclusionRule(organization = "org.eclipse.jetty")
val excludeAsm = ExclusionRule(organization = "org.ow2.asm")
val excludeOldAsm = ExclusionRule(organization = "asm")
val excludeCommonsLogging = ExclusionRule(organization = "commons-logging")
val excludeSLF4J = ExclusionRule(organization = "org.slf4j")
val excludeScalap = ExclusionRule(organization = "org.scala-lang", artifact = "scalap")
val excludeHadoop = ExclusionRule(organization = "org.apache.hadoop")
val excludeCurator = ExclusionRule(organization = "org.apache.curator")
val excludePowermock = ExclusionRule(organization = "org.powermock")
val excludeFastutil = ExclusionRule(organization = "it.unimi.dsi")
val excludeJruby = ExclusionRule(organization = "org.jruby")
val excludeThrift = ExclusionRule(organization = "org.apache.thrift")
val excludeServletApi = ExclusionRule(organization = "javax.servlet", artifact = "servlet-api")
val excludeJUnit = ExclusionRule(organization = "junit")
def sparkPreviousArtifact(id: String, organization: String = "org.apache.spark",
version: String = "1.0.0", crossVersion: String = "2.10"): Option[sbt.ModuleID] = {
val fullId = if (crossVersion.isEmpty) id else id + "_" + crossVersion
Some(organization % fullId % version) // the artifact to compare binary compatibility with
def enable(settings: Seq[Setting[_]])(projectRef: ProjectRef) = {
val existingSettings = projectsMap.getOrElse(projectRef.project, Seq[Setting[_]]())
projectsMap += (projectRef.project -> (existingSettings ++ settings))
}
def coreSettings = sharedSettings ++ Seq(
name := "spark-core",
libraryDependencies ++= Seq(
"com.google.guava" % "guava" % "14.0.1",
"org.apache.commons" % "commons-lang3" % "3.3.2",
"org.apache.commons" % "commons-math3" % "3.3" % "test",
"com.google.code.findbugs" % "jsr305" % "1.3.9",
"log4j" % "log4j" % "1.2.17",
"org.slf4j" % "slf4j-api" % slf4jVersion,
"org.slf4j" % "slf4j-log4j12" % slf4jVersion,
"org.slf4j" % "jul-to-slf4j" % slf4jVersion,
"org.slf4j" % "jcl-over-slf4j" % slf4jVersion,
"commons-daemon" % "commons-daemon" % "1.0.10", // workaround for bug HADOOP-9407
"com.ning" % "compress-lzf" % "1.0.0",
"org.xerial.snappy" % "snappy-java" % "1.0.5",
"org.spark-project.akka" %% "akka-remote" % akkaVersion,
"org.spark-project.akka" %% "akka-slf4j" % akkaVersion,
"org.spark-project.akka" %% "akka-testkit" % akkaVersion % "test",
"org.json4s" %% "json4s-jackson" % "3.2.6" excludeAll(excludeScalap),
"colt" % "colt" % "1.2.0",
"org.apache.mesos" % "mesos" % "0.18.1" classifier("shaded-protobuf") exclude("com.google.protobuf", "protobuf-java"),
"commons-net" % "commons-net" % "2.2",
"net.java.dev.jets3t" % "jets3t" % jets3tVersion excludeAll(excludeCommonsLogging),
"commons-codec" % "commons-codec" % "1.5", // Prevent jets3t from including the older version of commons-codec
"org.apache.derby" % "derby" % "10.4.2.0" % "test",
"org.apache.hadoop" % hadoopClient % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeCommonsLogging, excludeSLF4J, excludeOldAsm, excludeServletApi),
"org.apache.curator" % "curator-recipes" % "2.4.0" excludeAll(excludeJBossNetty),
"com.codahale.metrics" % "metrics-core" % codahaleMetricsVersion,
"com.codahale.metrics" % "metrics-jvm" % codahaleMetricsVersion,
"com.codahale.metrics" % "metrics-json" % codahaleMetricsVersion,
"com.codahale.metrics" % "metrics-graphite" % codahaleMetricsVersion,
"com.twitter" %% "chill" % chillVersion excludeAll(excludeAsm),
"com.twitter" % "chill-java" % chillVersion excludeAll(excludeAsm),
"org.tachyonproject" % "tachyon" % "0.4.1-thrift" excludeAll(excludeHadoop, excludeCurator, excludeEclipseJetty, excludePowermock),
"com.clearspring.analytics" % "stream" % "2.7.0" excludeAll(excludeFastutil), // Only HyperLogLogPlus is used, which does not depend on fastutil.
"org.spark-project" % "pyrolite" % "2.0.1",
"net.sf.py4j" % "py4j" % "0.8.1"
),
libraryDependencies ++= maybeAvro,
assembleDeps,
previousArtifact := sparkPreviousArtifact("spark-core")
)
// Note ordering of these settings matter.
/* Enable shared settings on all projects */
(allProjects ++ optionallyEnabledProjects ++ assemblyProjects).foreach(enable(sharedSettings))
// Create a colon-separate package list adding "org.apache.spark" in front of all of them,
// for easier specification of JavaDoc package groups
def packageList(names: String*): String = {
names.map(s => "org.apache.spark." + s).mkString(":")
/* Enable tests settings for all projects except examples, assembly and tools */
(allProjects ++ optionallyEnabledProjects).foreach(enable(TestSettings.settings))
/* Enable Mima for all projects except spark, hive, catalyst, sql and repl */
// TODO: Add Sql to mima checks
allProjects.filterNot(y => Seq(spark, sql, hive, catalyst, repl).exists(x => x == y)).
foreach (x => enable(MimaBuild.mimaSettings(sparkHome, x))(x))
/* Enable Assembly for all assembly projects */
assemblyProjects.foreach(enable(Assembly.settings))
/* Enable unidoc only for the root spark project */
enable(Unidoc.settings)(spark)
/* Hive console settings */
enable(Hive.settings)(hive)
// TODO: move this to its upstream project.
override def projectDefinitions(baseDirectory: File): Seq[Project] = {
super.projectDefinitions(baseDirectory).map { x =>
if (projectsMap.exists(_._1 == x.id)) x.settings(projectsMap(x.id): _*)
else x.settings(Seq[Setting[_]](): _*)
} ++ Seq[Project](oldDeps)
}
def rootSettings = sharedSettings ++ scalaJavaUnidocSettings ++ Seq(
publish := {},
}
unidocProjectFilter in (ScalaUnidoc, unidoc) :=
inAnyProject -- inProjects(repl, examples, tools, catalyst, yarn, yarnAlpha),
unidocProjectFilter in (JavaUnidoc, unidoc) :=
inAnyProject -- inProjects(repl, examples, bagel, graphx, catalyst, tools, yarn, yarnAlpha),
object Hive {
// Skip class names containing $ and some internal packages in Javadocs
unidocAllSources in (JavaUnidoc, unidoc) := {
(unidocAllSources in (JavaUnidoc, unidoc)).value
.map(_.filterNot(_.getName.contains("$")))
.map(_.filterNot(_.getCanonicalPath.contains("akka")))
.map(_.filterNot(_.getCanonicalPath.contains("deploy")))
.map(_.filterNot(_.getCanonicalPath.contains("network")))
.map(_.filterNot(_.getCanonicalPath.contains("executor")))
.map(_.filterNot(_.getCanonicalPath.contains("python")))
.map(_.filterNot(_.getCanonicalPath.contains("collection")))
},
lazy val settings = Seq(
// Javadoc options: create a window title, and group key packages on index page
javacOptions in doc := Seq(
"-windowtitle", "Spark " + SPARK_VERSION_SHORT + " JavaDoc",
"-public",
"-group", "Core Java API", packageList("api.java", "api.java.function"),
"-group", "Spark Streaming", packageList(
"streaming.api.java", "streaming.flume", "streaming.kafka",
"streaming.mqtt", "streaming.twitter", "streaming.zeromq"
),
"-group", "MLlib", packageList(
"mllib.classification", "mllib.clustering", "mllib.evaluation.binary", "mllib.linalg",
"mllib.linalg.distributed", "mllib.optimization", "mllib.rdd", "mllib.recommendation",
"mllib.regression", "mllib.stat", "mllib.tree", "mllib.tree.configuration",
"mllib.tree.impurity", "mllib.tree.model", "mllib.util"
),
"-group", "Spark SQL", packageList("sql.api.java", "sql.hive.api.java"),
"-noqualifier", "java.lang"
)
)
def replSettings = sharedSettings ++ Seq(
name := "spark-repl",
libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-compiler" % v),
libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "jline" % v),
libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-reflect" % v)
)
def examplesSettings = sharedSettings ++ Seq(
name := "spark-examples",
jarName in assembly <<= version map {
v => "spark-examples-" + v + "-hadoop" + hadoopVersion + ".jar" },
libraryDependencies ++= Seq(
"com.twitter" %% "algebird-core" % "0.1.11",
"org.apache.hbase" % "hbase" % HBASE_VERSION excludeAll(excludeIONetty, excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging, excludeJruby),
"org.apache.cassandra" % "cassandra-all" % "1.2.6"
exclude("com.google.guava", "guava")
exclude("com.googlecode.concurrentlinkedhashmap", "concurrentlinkedhashmap-lru")
exclude("com.ning","compress-lzf")
exclude("io.netty", "netty")
exclude("jline","jline")
exclude("org.apache.cassandra.deps", "avro")
excludeAll(excludeSLF4J, excludeIONetty),
"com.github.scopt" %% "scopt" % "3.2.0"
)
) ++ assemblySettings ++ extraAssemblySettings
def toolsSettings = sharedSettings ++ Seq(
name := "spark-tools",
libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-compiler" % v),
libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-reflect" % v )
) ++ assemblySettings ++ extraAssemblySettings
def graphxSettings = sharedSettings ++ Seq(
name := "spark-graphx",
previousArtifact := sparkPreviousArtifact("spark-graphx"),
libraryDependencies ++= Seq(
"org.jblas" % "jblas" % jblasVersion
)
)
def bagelSettings = sharedSettings ++ Seq(
name := "spark-bagel",
previousArtifact := sparkPreviousArtifact("spark-bagel")
)
def mllibSettings = sharedSettings ++ Seq(
name := "spark-mllib",
previousArtifact := sparkPreviousArtifact("spark-mllib"),
libraryDependencies ++= Seq(
"org.jblas" % "jblas" % jblasVersion,
"org.scalanlp" %% "breeze" % "0.7" excludeAll(excludeJUnit)
)
)
def catalystSettings = sharedSettings ++ Seq(
name := "catalyst",
// The mechanics of rewriting expression ids to compare trees in some test cases makes
// assumptions about the the expression ids being contiguous. Running tests in parallel breaks
// this non-deterministically. TODO: FIX THIS.
parallelExecution in Test := false,
libraryDependencies ++= Seq(
"com.typesafe" %% "scalalogging-slf4j" % "1.0.1"
)
)
def sqlCoreSettings = sharedSettings ++ Seq(
name := "spark-sql",
libraryDependencies ++= Seq(
"com.twitter" % "parquet-column" % parquetVersion,
"com.twitter" % "parquet-hadoop" % parquetVersion,
"com.fasterxml.jackson.core" % "jackson-databind" % "2.3.0" // json4s-jackson 3.2.6 requires jackson-databind 2.3.0.
),
initialCommands in console :=
"""
|import org.apache.spark.sql.catalyst.analysis._
|import org.apache.spark.sql.catalyst.dsl._
|import org.apache.spark.sql.catalyst.errors._
|import org.apache.spark.sql.catalyst.expressions._
|import org.apache.spark.sql.catalyst.plans.logical._
|import org.apache.spark.sql.catalyst.rules._
|import org.apache.spark.sql.catalyst.types._
|import org.apache.spark.sql.catalyst.util._
|import org.apache.spark.sql.execution
|import org.apache.spark.sql.test.TestSQLContext._
|import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin
)
// Since we don't include hive in the main assembly this project also acts as an alternative
// assembly jar.
def hiveSettings = sharedSettings ++ Seq(
name := "spark-hive",
javaOptions += "-XX:MaxPermSize=1g",
libraryDependencies ++= Seq(
"org.spark-project.hive" % "hive-metastore" % hiveVersion,
"org.spark-project.hive" % "hive-exec" % hiveVersion excludeAll(excludeCommonsLogging),
"org.spark-project.hive" % "hive-serde" % hiveVersion
),
// Multiple queries rely on the TestHive singleton. See comments there for more details.
parallelExecution in Test := false,
// Supporting all SerDes requires us to depend on deprecated APIs, so we turn off the warnings
@ -555,67 +190,16 @@ object SparkBuild extends Build {
|import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin
)
def streamingSettings = sharedSettings ++ Seq(
name := "spark-streaming",
previousArtifact := sparkPreviousArtifact("spark-streaming")
)
}
def yarnCommonSettings = sharedSettings ++ Seq(
unmanagedSourceDirectories in Compile <++= baseDirectory { base =>
Seq(
base / "../common/src/main/scala"
)
},
object Assembly {
import sbtassembly.Plugin._
import AssemblyKeys._
unmanagedSourceDirectories in Test <++= baseDirectory { base =>
Seq(
base / "../common/src/test/scala"
)
}
) ++ extraYarnSettings
def yarnAlphaSettings = yarnCommonSettings ++ Seq(
name := "spark-yarn-alpha"
)
def yarnSettings = yarnCommonSettings ++ Seq(
name := "spark-yarn"
)
def gangliaSettings = sharedSettings ++ Seq(
name := "spark-ganglia-lgpl",
libraryDependencies += "com.codahale.metrics" % "metrics-ganglia" % "3.0.0"
)
def java8TestsSettings = sharedSettings ++ Seq(
name := "java8-tests",
javacOptions := Seq("-target", "1.8", "-source", "1.8"),
testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a")
)
// Conditionally include the YARN dependencies because some tools look at all sub-projects and will complain
// if we refer to nonexistent dependencies (e.g. hadoop-yarn-api from a Hadoop version without YARN).
def extraYarnSettings = if(isYarnEnabled) yarnEnabledSettings else Seq()
def yarnEnabledSettings = Seq(
libraryDependencies ++= Seq(
// Exclude rule required for all ?
"org.apache.hadoop" % hadoopClient % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm),
"org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging),
"org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging),
"org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging),
"org.apache.hadoop" % "hadoop-yarn-server-web-proxy" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging, excludeServletApi)
)
)
def assemblyProjSettings = sharedSettings ++ Seq(
name := "spark-assembly",
jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" }
) ++ assemblySettings ++ extraAssemblySettings
def extraAssemblySettings() = Seq(
lazy val settings = assemblySettings ++ Seq(
test in assembly := {},
jarName in assembly <<= (version, moduleName) map { (v, mName) => mName + "-"+v + "-hadoop" +
Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar" },
mergeStrategy in assembly := {
case PathList("org", "datanucleus", xs @ _*) => MergeStrategy.discard
case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard
@ -627,57 +211,95 @@ object SparkBuild extends Build {
}
)
def oldDepsSettings() = Defaults.defaultSettings ++ Seq(
name := "old-deps",
scalaVersion := "2.10.4",
retrieveManaged := true,
retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
libraryDependencies := Seq("spark-streaming-mqtt", "spark-streaming-zeromq",
"spark-streaming-flume", "spark-streaming-kafka", "spark-streaming-twitter",
"spark-streaming", "spark-mllib", "spark-bagel", "spark-graphx",
"spark-core").map(sparkPreviousArtifact(_).get intransitive())
)
}
def twitterSettings() = sharedSettings ++ Seq(
name := "spark-streaming-twitter",
previousArtifact := sparkPreviousArtifact("spark-streaming-twitter"),
libraryDependencies ++= Seq(
"org.twitter4j" % "twitter4j-stream" % "3.0.3"
)
)
object Unidoc {
def kafkaSettings() = sharedSettings ++ Seq(
name := "spark-streaming-kafka",
previousArtifact := sparkPreviousArtifact("spark-streaming-kafka"),
libraryDependencies ++= Seq(
"com.github.sgroschupf" % "zkclient" % "0.1",
"org.apache.kafka" %% "kafka" % "0.8.0"
exclude("com.sun.jdmk", "jmxtools")
exclude("com.sun.jmx", "jmxri")
exclude("net.sf.jopt-simple", "jopt-simple")
excludeAll(excludeSLF4J)
)
)
import BuildCommons._
import sbtunidoc.Plugin._
import UnidocKeys._
def flumeSettings() = sharedSettings ++ Seq(
name := "spark-streaming-flume",
previousArtifact := sparkPreviousArtifact("spark-streaming-flume"),
libraryDependencies ++= Seq(
"org.apache.flume" % "flume-ng-sdk" % "1.4.0" % "compile" excludeAll(excludeIONetty, excludeThrift)
)
)
// for easier specification of JavaDoc package groups
private def packageList(names: String*): String = {
names.map(s => "org.apache.spark." + s).mkString(":")
}
def zeromqSettings() = sharedSettings ++ Seq(
name := "spark-streaming-zeromq",
previousArtifact := sparkPreviousArtifact("spark-streaming-zeromq"),
libraryDependencies ++= Seq(
"org.spark-project.akka" %% "akka-zeromq" % akkaVersion
)
)
lazy val settings = scalaJavaUnidocSettings ++ Seq (
publish := {},
def mqttSettings() = streamingSettings ++ Seq(
name := "spark-streaming-mqtt",
previousArtifact := sparkPreviousArtifact("spark-streaming-mqtt"),
libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0")
unidocProjectFilter in(ScalaUnidoc, unidoc) :=
inAnyProject -- inProjects(repl, examples, tools, catalyst, yarn, yarnAlpha),
unidocProjectFilter in(JavaUnidoc, unidoc) :=
inAnyProject -- inProjects(repl, bagel, graphx, examples, tools, catalyst, yarn, yarnAlpha),
// Skip class names containing $ and some internal packages in Javadocs
unidocAllSources in (JavaUnidoc, unidoc) := {
(unidocAllSources in (JavaUnidoc, unidoc)).value
.map(_.filterNot(_.getName.contains("$")))
.map(_.filterNot(_.getCanonicalPath.contains("akka")))
.map(_.filterNot(_.getCanonicalPath.contains("deploy")))
.map(_.filterNot(_.getCanonicalPath.contains("network")))
.map(_.filterNot(_.getCanonicalPath.contains("executor")))
.map(_.filterNot(_.getCanonicalPath.contains("python")))
.map(_.filterNot(_.getCanonicalPath.contains("collection")))
},
// Javadoc options: create a window title, and group key packages on index page
javacOptions in doc := Seq(
"-windowtitle", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc",
"-public",
"-group", "Core Java API", packageList("api.java", "api.java.function"),
"-group", "Spark Streaming", packageList(
"streaming.api.java", "streaming.flume", "streaming.kafka",
"streaming.mqtt", "streaming.twitter", "streaming.zeromq"
),
"-group", "MLlib", packageList(
"mllib.classification", "mllib.clustering", "mllib.evaluation.binary", "mllib.linalg",
"mllib.linalg.distributed", "mllib.optimization", "mllib.rdd", "mllib.recommendation",
"mllib.regression", "mllib.stat", "mllib.tree", "mllib.tree.configuration",
"mllib.tree.impurity", "mllib.tree.model", "mllib.util"
),
"-group", "Spark SQL", packageList("sql.api.java", "sql.hive.api.java"),
"-noqualifier", "java.lang"
)
)
}
object TestSettings {
import BuildCommons._
lazy val settings = Seq (
// Fork new JVMs for tests and set Java options for those
fork := true,
javaOptions in Test += "-Dspark.home=" + sparkHome,
javaOptions in Test += "-Dspark.testing=1",
javaOptions in Test += "-Dsun.io.serialization.extendedDebugInfo=true",
javaOptions in Test ++= System.getProperties.filter(_._1 startsWith "spark")
.map { case (k,v) => s"-D$k=$v" }.toSeq,
javaOptions in Test ++= "-Xmx3g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g"
.split(" ").toSeq,
javaOptions += "-Xmx3g",
// Show full stack trace and duration in test cases.
testOptions in Test += Tests.Argument("-oDF"),
testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
// Enable Junit testing.
libraryDependencies += "com.novocode" % "junit-interface" % "0.9" % "test",
// Only allow one test at a time, even across projects, since they run in the same JVM
parallelExecution in Test := false,
concurrentRestrictions in Global += Tags.limit(Tags.Test, 1),
// Remove certain packages from Scaladoc
scalacOptions in (Compile, doc) := Seq(
"-groups",
"-skip-packages", Seq(
"akka",
"org.apache.spark.api.python",
"org.apache.spark.network",
"org.apache.spark.deploy",
"org.apache.spark.util.collection"
).mkString(":"),
"-doc-title", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " ScalaDoc"
)
)
}

View file

@ -14,4 +14,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
sbt.version=0.13.2
sbt.version=0.13.5

View file

@ -24,8 +24,10 @@ import sbt.Keys._
* becomes available for scalastyle sbt plugin.
*/
object SparkPluginDef extends Build {
lazy val root = Project("plugins", file(".")) dependsOn(sparkStyle)
lazy val root = Project("plugins", file(".")) dependsOn(sparkStyle, sbtPomReader)
lazy val sparkStyle = Project("spark-style", file("spark-style"), settings = styleSettings)
lazy val sbtPomReader = uri("https://github.com/ScrapCodes/sbt-pom-reader.git")
// There is actually no need to publish this artifact.
def styleSettings = Defaults.defaultSettings ++ Seq (
name := "spark-style",

View file

@ -32,6 +32,7 @@
<url>http://spark.apache.org/</url>
<properties>
<sbt.project.name>repl</sbt.project.name>
<deb.install.path>/usr/share/spark</deb.install.path>
<deb.user>root</deb.user>
</properties>

View file

@ -72,6 +72,7 @@ Usage: $script_name [options]
-J-X pass option -X directly to the java runtime
(-J is stripped)
-S-X add -X to sbt's scalacOptions (-J is stripped)
-PmavenProfiles Enable a maven profile for the build.
In the case of duplicated or conflicting options, the order above
shows precedence: JAVA_OPTS lowest, command line options highest.

View file

@ -16,6 +16,7 @@ declare -a residual_args
declare -a java_args
declare -a scalac_args
declare -a sbt_commands
declare -a maven_profiles
if test -x "$JAVA_HOME/bin/java"; then
echo -e "Using $JAVA_HOME as default JAVA_HOME."
@ -87,6 +88,13 @@ addJava () {
dlog "[addJava] arg = '$1'"
java_args=( "${java_args[@]}" "$1" )
}
enableProfile () {
dlog "[enableProfile] arg = '$1'"
maven_profiles=( "${maven_profiles[@]}" "$1" )
export MAVEN_PROFILES="${maven_profiles[@]}"
}
addSbt () {
dlog "[addSbt] arg = '$1'"
sbt_commands=( "${sbt_commands[@]}" "$1" )
@ -142,6 +150,7 @@ process_args () {
-D*) addJava "$1" && shift ;;
-J*) addJava "${1:2}" && shift ;;
-P*) enableProfile "$1" && shift ;;
*) addResidual "$1" && shift ;;
esac
done

View file

@ -31,6 +31,9 @@
<packaging>jar</packaging>
<name>Spark Project Catalyst</name>
<url>http://spark.apache.org/</url>
<properties>
<sbt.project.name>catalyst</sbt.project.name>
</properties>
<dependencies>
<dependency>

View file

@ -31,6 +31,9 @@
<packaging>jar</packaging>
<name>Spark Project SQL</name>
<url>http://spark.apache.org/</url>
<properties>
<sbt.project.name>sql</sbt.project.name>
</properties>
<dependencies>
<dependency>

View file

@ -31,6 +31,9 @@
<packaging>jar</packaging>
<name>Spark Project Hive</name>
<url>http://spark.apache.org/</url>
<properties>
<sbt.project.name>hive</sbt.project.name>
</properties>
<dependencies>
<dependency>
@ -48,6 +51,11 @@
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
<version>3.1</version>
</dependency>
<dependency>
<groupId>org.spark-project.hive</groupId>
<artifactId>hive-exec</artifactId>

View file

@ -27,6 +27,9 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
<properties>
<sbt.project.name>streaming</sbt.project.name>
</properties>
<packaging>jar</packaging>
<name>Spark Project Streaming</name>
<url>http://spark.apache.org/</url>

View file

@ -26,6 +26,9 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-tools_2.10</artifactId>
<properties>
<sbt.project.name>tools</sbt.project.name>
</properties>
<packaging>jar</packaging>
<name>Spark Project Tools</name>
<url>http://spark.apache.org/</url>

View file

@ -23,6 +23,9 @@
<version>1.1.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<properties>
<sbt.project.name>yarn-alpha</sbt.project.name>
</properties>
<groupId>org.apache.spark</groupId>
<artifactId>spark-yarn-alpha_2.10</artifactId>

View file

@ -28,6 +28,9 @@
<artifactId>yarn-parent_2.10</artifactId>
<packaging>pom</packaging>
<name>Spark Project YARN Parent POM</name>
<properties>
<sbt.project.name>yarn</sbt.project.name>
</properties>
<dependencies>
<dependency>

View file

@ -23,6 +23,9 @@
<version>1.1.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<properties>
<sbt.project.name>yarn-stable</sbt.project.name>
</properties>
<groupId>org.apache.spark</groupId>
<artifactId>spark-yarn_2.10</artifactId>