SPARK-1094 Support MiMa for reporting binary compatibility accross versions.
This adds some changes on top of the initial work by @scrapcodes in #20: The goal here is to do automated checking of Spark commits to determine whether they break binary compatibility. 1. Special case for inner classes of package-private objects. 2. Made tools classes accessible when running `spark-class`. 3. Made some declared types in MLLib more general. 4. Various other improvements to exclude-generation script. 5. In-code documentation. Author: Patrick Wendell <pwendell@gmail.com> Author: Prashant Sharma <prashant.s@imaginea.com> Author: Prashant Sharma <scrapcodes@gmail.com> Closes #207 from pwendell/mima and squashes the following commits: 22ae267 [Patrick Wendell] New binary changes after upmerge 6c2030d [Patrick Wendell] Merge remote-tracking branch 'apache/master' into mima 3666cf1 [Patrick Wendell] Minor style change 0e0f570 [Patrick Wendell] Small fix and removing directory listings 647c547 [Patrick Wendell] Reveiw feedback. c39f3b5 [Patrick Wendell] Some enhancements to binary checking. 4c771e0 [Prashant Sharma] Added a tool to generate mima excludes and also adapted build to pick automatically. b551519 [Prashant Sharma] adding a new exclude after rebasing with master 651844c [Prashant Sharma] Support MiMa for reporting binary compatibility accross versions.
This commit is contained in:
parent
8043b7bc74
commit
dc126f2121
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -7,6 +7,7 @@
|
|||
sbt/*.jar
|
||||
.settings
|
||||
.cache
|
||||
.mima-excludes
|
||||
/build/
|
||||
work/
|
||||
out/
|
||||
|
|
|
@ -58,6 +58,7 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
|
|||
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
|
||||
|
|
|
@ -137,8 +137,7 @@ fi
|
|||
|
||||
# Compute classpath using external script
|
||||
CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
|
||||
|
||||
if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
|
||||
if [[ "$1" =~ org.apache.spark.tools.* ]]; then
|
||||
CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
|
||||
fi
|
||||
|
||||
|
|
|
@ -59,3 +59,10 @@ if [ -z "$PYSPARK_PYTHON" ]; then
|
|||
export PYSPARK_PYTHON=/usr/local/bin/python2.7
|
||||
fi
|
||||
./python/run-tests
|
||||
|
||||
echo "========================================================================="
|
||||
echo "Detecting binary incompatibilites with MiMa"
|
||||
echo "========================================================================="
|
||||
./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
|
||||
sbt/sbt mima-report-binary-issues
|
||||
|
||||
|
|
83
project/MimaBuild.scala
Normal file
83
project/MimaBuild.scala
Normal file
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact}
|
||||
import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
|
||||
import sbt._
|
||||
|
||||
object MimaBuild {
|
||||
|
||||
def ignoredABIProblems(base: File) = {
|
||||
import com.typesafe.tools.mima.core._
|
||||
import com.typesafe.tools.mima.core.ProblemFilters._
|
||||
|
||||
// Excludes placed here will be used for all Spark versions
|
||||
val defaultExcludes = Seq()
|
||||
|
||||
// Read package-private excludes from file
|
||||
val excludeFilePath = (base.getAbsolutePath + "/.mima-excludes")
|
||||
val excludeFile = file(excludeFilePath)
|
||||
val packagePrivateList: Seq[String] =
|
||||
if (!excludeFile.exists()) {
|
||||
Seq()
|
||||
} else {
|
||||
IO.read(excludeFile).split("\n")
|
||||
}
|
||||
|
||||
def excludeClass(className: String) = {
|
||||
Seq(
|
||||
excludePackage(className),
|
||||
ProblemFilters.exclude[MissingClassProblem](className),
|
||||
ProblemFilters.exclude[MissingTypesProblem](className),
|
||||
excludePackage(className + "$"),
|
||||
ProblemFilters.exclude[MissingClassProblem](className + "$"),
|
||||
ProblemFilters.exclude[MissingTypesProblem](className + "$")
|
||||
)
|
||||
}
|
||||
def excludeSparkClass(className: String) = excludeClass("org.apache.spark." + className)
|
||||
|
||||
val packagePrivateExcludes = packagePrivateList.flatMap(excludeClass)
|
||||
|
||||
/* Excludes specific to a given version of Spark. When comparing the given version against
|
||||
its immediate predecessor, the excludes listed here will be applied. */
|
||||
val versionExcludes =
|
||||
SparkBuild.SPARK_VERSION match {
|
||||
case v if v.startsWith("1.0") =>
|
||||
Seq(
|
||||
excludePackage("org.apache.spark.api.java"),
|
||||
excludePackage("org.apache.spark.streaming.api.java"),
|
||||
excludePackage("org.apache.spark.mllib")
|
||||
) ++
|
||||
excludeSparkClass("rdd.ClassTags") ++
|
||||
excludeSparkClass("util.XORShiftRandom") ++
|
||||
excludeSparkClass("mllib.recommendation.MFDataGenerator") ++
|
||||
excludeSparkClass("mllib.optimization.SquaredGradient") ++
|
||||
excludeSparkClass("mllib.regression.RidgeRegressionWithSGD") ++
|
||||
excludeSparkClass("mllib.regression.LassoWithSGD") ++
|
||||
excludeSparkClass("mllib.regression.LinearRegressionWithSGD")
|
||||
case _ => Seq()
|
||||
}
|
||||
|
||||
defaultExcludes ++ packagePrivateExcludes ++ versionExcludes
|
||||
}
|
||||
|
||||
def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq(
|
||||
previousArtifact := None,
|
||||
binaryIssueFilters ++= ignoredABIProblems(sparkHome)
|
||||
)
|
||||
|
||||
}
|
|
@ -22,6 +22,7 @@ import sbtassembly.Plugin._
|
|||
import AssemblyKeys._
|
||||
import scala.util.Properties
|
||||
import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings}
|
||||
import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact
|
||||
|
||||
import scala.collection.JavaConversions._
|
||||
|
||||
|
@ -29,6 +30,8 @@ import scala.collection.JavaConversions._
|
|||
//import com.jsuereth.pgp.sbtplugin.PgpKeys._
|
||||
|
||||
object SparkBuild extends Build {
|
||||
val SPARK_VERSION = "1.0.0-SNAPSHOT"
|
||||
|
||||
// Hadoop version to build against. For example, "1.0.4" for Apache releases, or
|
||||
// "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set
|
||||
// through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN.
|
||||
|
@ -146,9 +149,9 @@ object SparkBuild extends Build {
|
|||
lazy val allProjects = packageProjects ++ allExternalRefs ++
|
||||
Seq[ProjectReference](examples, tools, assemblyProj, hive) ++ maybeJava8Tests
|
||||
|
||||
def sharedSettings = Defaults.defaultSettings ++ Seq(
|
||||
def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings(file(sparkHome)) ++ Seq(
|
||||
organization := "org.apache.spark",
|
||||
version := "1.0.0-SNAPSHOT",
|
||||
version := SPARK_VERSION,
|
||||
scalaVersion := "2.10.3",
|
||||
scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation",
|
||||
"-target:" + SCALAC_JVM_VERSION),
|
||||
|
@ -284,9 +287,14 @@ object SparkBuild extends Build {
|
|||
val excludeSLF4J = ExclusionRule(organization = "org.slf4j")
|
||||
val excludeScalap = ExclusionRule(organization = "org.scala-lang", artifact = "scalap")
|
||||
|
||||
def sparkPreviousArtifact(id: String, organization: String = "org.apache.spark",
|
||||
version: String = "0.9.0-incubating", crossVersion: String = "2.10"): Option[sbt.ModuleID] = {
|
||||
val fullId = if (crossVersion.isEmpty) id else id + "_" + crossVersion
|
||||
Some(organization % fullId % version) // the artifact to compare binary compatibility with
|
||||
}
|
||||
|
||||
def coreSettings = sharedSettings ++ Seq(
|
||||
name := "spark-core",
|
||||
|
||||
libraryDependencies ++= Seq(
|
||||
"com.google.guava" % "guava" % "14.0.1",
|
||||
"com.google.code.findbugs" % "jsr305" % "1.3.9",
|
||||
|
@ -354,17 +362,20 @@ object SparkBuild extends Build {
|
|||
|
||||
def graphxSettings = sharedSettings ++ Seq(
|
||||
name := "spark-graphx",
|
||||
previousArtifact := sparkPreviousArtifact("spark-graphx"),
|
||||
libraryDependencies ++= Seq(
|
||||
"org.jblas" % "jblas" % "1.2.3"
|
||||
)
|
||||
)
|
||||
|
||||
def bagelSettings = sharedSettings ++ Seq(
|
||||
name := "spark-bagel"
|
||||
name := "spark-bagel",
|
||||
previousArtifact := sparkPreviousArtifact("spark-bagel")
|
||||
)
|
||||
|
||||
def mllibSettings = sharedSettings ++ Seq(
|
||||
name := "spark-mllib",
|
||||
previousArtifact := sparkPreviousArtifact("spark-mllib"),
|
||||
libraryDependencies ++= Seq(
|
||||
"org.jblas" % "jblas" % "1.2.3",
|
||||
"org.scalanlp" %% "breeze" % "0.7"
|
||||
|
@ -428,6 +439,7 @@ object SparkBuild extends Build {
|
|||
|
||||
def streamingSettings = sharedSettings ++ Seq(
|
||||
name := "spark-streaming",
|
||||
previousArtifact := sparkPreviousArtifact("spark-streaming"),
|
||||
libraryDependencies ++= Seq(
|
||||
"commons-io" % "commons-io" % "2.4"
|
||||
)
|
||||
|
@ -503,6 +515,7 @@ object SparkBuild extends Build {
|
|||
|
||||
def twitterSettings() = sharedSettings ++ Seq(
|
||||
name := "spark-streaming-twitter",
|
||||
previousArtifact := sparkPreviousArtifact("spark-streaming-twitter"),
|
||||
libraryDependencies ++= Seq(
|
||||
"org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty)
|
||||
)
|
||||
|
@ -510,6 +523,7 @@ object SparkBuild extends Build {
|
|||
|
||||
def kafkaSettings() = sharedSettings ++ Seq(
|
||||
name := "spark-streaming-kafka",
|
||||
previousArtifact := sparkPreviousArtifact("spark-streaming-kafka"),
|
||||
libraryDependencies ++= Seq(
|
||||
"com.github.sgroschupf" % "zkclient" % "0.1" excludeAll(excludeNetty),
|
||||
"org.apache.kafka" %% "kafka" % "0.8.0"
|
||||
|
@ -522,6 +536,7 @@ object SparkBuild extends Build {
|
|||
|
||||
def flumeSettings() = sharedSettings ++ Seq(
|
||||
name := "spark-streaming-flume",
|
||||
previousArtifact := sparkPreviousArtifact("spark-streaming-flume"),
|
||||
libraryDependencies ++= Seq(
|
||||
"org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty)
|
||||
)
|
||||
|
@ -529,6 +544,7 @@ object SparkBuild extends Build {
|
|||
|
||||
def zeromqSettings() = sharedSettings ++ Seq(
|
||||
name := "spark-streaming-zeromq",
|
||||
previousArtifact := sparkPreviousArtifact("spark-streaming-zeromq"),
|
||||
libraryDependencies ++= Seq(
|
||||
"org.spark-project.akka" %% "akka-zeromq" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty)
|
||||
)
|
||||
|
@ -536,6 +552,7 @@ object SparkBuild extends Build {
|
|||
|
||||
def mqttSettings() = streamingSettings ++ Seq(
|
||||
name := "spark-streaming-mqtt",
|
||||
previousArtifact := sparkPreviousArtifact("spark-streaming-mqtt"),
|
||||
libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0")
|
||||
)
|
||||
}
|
||||
|
|
|
@ -19,4 +19,6 @@ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4")
|
|||
|
||||
addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.4.0")
|
||||
|
||||
addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6")
|
||||
|
||||
addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.0")
|
||||
|
|
|
@ -0,0 +1,132 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.tools
|
||||
|
||||
import java.io.File
|
||||
import java.util.jar.JarFile
|
||||
|
||||
import scala.collection.mutable
|
||||
import scala.collection.JavaConversions._
|
||||
import scala.reflect.runtime.universe.runtimeMirror
|
||||
|
||||
/**
|
||||
* A tool for generating classes to be excluded during binary checking with MIMA. It is expected
|
||||
* that this tool is run with ./spark-class.
|
||||
*
|
||||
* MIMA itself only supports JVM-level visibility and doesn't account for package-private classes.
|
||||
* This tool looks at all currently package-private classes and generates exclusions for them. Note
|
||||
* that this approach is not sound. It can lead to false positives if we move or rename a previously
|
||||
* package-private class. It can lead to false negatives if someone explicitly makes a class
|
||||
* package-private that wasn't before. This exists only to help catch certain classes of changes
|
||||
* which might be difficult to catch during review.
|
||||
*/
|
||||
object GenerateMIMAIgnore {
|
||||
private val classLoader = Thread.currentThread().getContextClassLoader
|
||||
private val mirror = runtimeMirror(classLoader)
|
||||
|
||||
private def classesPrivateWithin(packageName: String): Set[String] = {
|
||||
|
||||
val classes = getClasses(packageName, classLoader)
|
||||
val privateClasses = mutable.HashSet[String]()
|
||||
|
||||
def isPackagePrivate(className: String) = {
|
||||
try {
|
||||
/* Couldn't figure out if it's possible to determine a-priori whether a given symbol
|
||||
is a module or class. */
|
||||
|
||||
val privateAsClass = mirror
|
||||
.staticClass(className)
|
||||
.privateWithin
|
||||
.fullName
|
||||
.startsWith(packageName)
|
||||
|
||||
val privateAsModule = mirror
|
||||
.staticModule(className)
|
||||
.privateWithin
|
||||
.fullName
|
||||
.startsWith(packageName)
|
||||
|
||||
privateAsClass || privateAsModule
|
||||
} catch {
|
||||
case _: Throwable => {
|
||||
println("Error determining visibility: " + className)
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (className <- classes) {
|
||||
val directlyPrivateSpark = isPackagePrivate(className)
|
||||
|
||||
/* Inner classes defined within a private[spark] class or object are effectively
|
||||
invisible, so we account for them as package private. */
|
||||
val indirectlyPrivateSpark = {
|
||||
val maybeOuter = className.toString.takeWhile(_ != '$')
|
||||
if (maybeOuter != className) {
|
||||
isPackagePrivate(maybeOuter)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
if (directlyPrivateSpark || indirectlyPrivateSpark) privateClasses += className
|
||||
}
|
||||
privateClasses.flatMap(c => Seq(c, c.replace("$", "#"))).toSet
|
||||
}
|
||||
|
||||
def main(args: Array[String]) {
|
||||
scala.tools.nsc.io.File(".mima-excludes").
|
||||
writeAll(classesPrivateWithin("org.apache.spark").mkString("\n"))
|
||||
println("Created : .mima-excludes in current directory.")
|
||||
}
|
||||
|
||||
|
||||
private def shouldExclude(name: String) = {
|
||||
// Heuristic to remove JVM classes that do not correspond to user-facing classes in Scala
|
||||
name.contains("anon") ||
|
||||
name.endsWith("$class") ||
|
||||
name.contains("$sp")
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans all classes accessible from the context class loader which belong to the given package
|
||||
* and subpackages both from directories and jars present on the classpath.
|
||||
*/
|
||||
private def getClasses(packageName: String,
|
||||
classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Set[String] = {
|
||||
val path = packageName.replace('.', '/')
|
||||
val resources = classLoader.getResources(path)
|
||||
|
||||
val jars = resources.filter(x => x.getProtocol == "jar")
|
||||
.map(_.getFile.split(":")(1).split("!")(0)).toSeq
|
||||
|
||||
jars.flatMap(getClassesFromJar(_, path))
|
||||
.map(_.getName)
|
||||
.filterNot(shouldExclude).toSet
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all classes in a package from a jar file.
|
||||
*/
|
||||
private def getClassesFromJar(jarPath: String, packageName: String) = {
|
||||
val jar = new JarFile(new File(jarPath))
|
||||
val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName))
|
||||
val classes = for (entry <- enums if entry.endsWith(".class"))
|
||||
yield Class.forName(entry.replace('/', '.').stripSuffix(".class"))
|
||||
classes
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue