[SPARK-10359] Enumerate dependencies in a file and diff against it for new pull requests

This patch adds a new build check which enumerates Spark's resolved runtime classpath and saves it to a file, then diffs against that file to detect whether pull requests have introduced dependency changes. The aim of this check is to make it simpler to reason about whether pull request which modify the build have introduced new dependencies or changed transitive dependencies in a way that affects the final classpath.

This supplants the checks added in SPARK-4123 / #5093, which are currently disabled due to bugs.

This patch is based on pwendell's work in #8531.

Closes #8531.

Author: Josh Rosen <joshrosen@databricks.com>
Author: Patrick Wendell <patrick@databricks.com>

Closes #10461 from JoshRosen/SPARK-10359.
This commit is contained in:
Josh Rosen 2015-12-30 12:47:42 -08:00
parent d1ca634db4
commit 27a42c7108
10 changed files with 512 additions and 120 deletions

View file

@ -85,3 +85,4 @@ org.apache.spark.sql.sources.DataSourceRegister
org.apache.spark.scheduler.SparkHistoryListenerFactory
.*parquet
LZ4BlockInputStream.java
spark-deps-.*

View file

@ -0,0 +1,184 @@
JavaEWAH-0.3.2.jar
RoaringBitmap-0.5.11.jar
ST4-4.0.4.jar
activation-1.1.1.jar
akka-actor_2.10-2.3.11.jar
akka-remote_2.10-2.3.11.jar
akka-slf4j_2.10-2.3.11.jar
antlr-2.7.7.jar
antlr-runtime-3.4.jar
aopalliance-1.0.jar
apache-log4j-extras-1.2.17.jar
arpack_combined_all-0.1.jar
asm-3.1.jar
asm-commons-3.1.jar
asm-tree-3.1.jar
avro-1.7.7.jar
avro-ipc-1.7.7-tests.jar
avro-ipc-1.7.7.jar
avro-mapred-1.7.7-hadoop2.jar
base64-2.3.8.jar
bcprov-jdk15on-1.51.jar
bonecp-0.8.0.RELEASE.jar
breeze-macros_2.10-0.11.2.jar
breeze_2.10-0.11.2.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
chill-java-0.5.0.jar
chill_2.10-0.5.0.jar
commons-beanutils-1.7.0.jar
commons-beanutils-core-1.8.0.jar
commons-cli-1.2.jar
commons-codec-1.10.jar
commons-collections-3.2.2.jar
commons-compiler-2.7.6.jar
commons-compress-1.4.1.jar
commons-configuration-1.6.jar
commons-dbcp-1.4.jar
commons-digester-1.8.jar
commons-httpclient-3.1.jar
commons-io-2.4.jar
commons-lang-2.6.jar
commons-lang3-3.3.2.jar
commons-logging-1.1.3.jar
commons-math3-3.4.1.jar
commons-net-2.2.jar
commons-pool-1.5.4.jar
compress-lzf-1.0.3.jar
config-1.2.1.jar
core-1.1.2.jar
curator-client-2.4.0.jar
curator-framework-2.4.0.jar
curator-recipes-2.4.0.jar
datanucleus-api-jdo-3.2.6.jar
datanucleus-core-3.2.10.jar
datanucleus-rdbms-3.2.9.jar
derby-10.10.1.1.jar
eigenbase-properties-1.1.5.jar
geronimo-annotation_1.0_spec-1.1.1.jar
geronimo-jaspic_1.0_spec-1.0.jar
geronimo-jta_1.1_spec-1.1.1.jar
groovy-all-2.1.6.jar
guice-3.0.jar
guice-servlet-3.0.jar
hadoop-annotations-2.3.0.jar
hadoop-auth-2.3.0.jar
hadoop-client-2.3.0.jar
hadoop-common-2.3.0.jar
hadoop-hdfs-2.3.0.jar
hadoop-mapreduce-client-app-2.3.0.jar
hadoop-mapreduce-client-common-2.3.0.jar
hadoop-mapreduce-client-core-2.3.0.jar
hadoop-mapreduce-client-jobclient-2.3.0.jar
hadoop-mapreduce-client-shuffle-2.3.0.jar
hadoop-yarn-api-2.3.0.jar
hadoop-yarn-client-2.3.0.jar
hadoop-yarn-common-2.3.0.jar
hadoop-yarn-server-common-2.3.0.jar
hadoop-yarn-server-web-proxy-2.3.0.jar
httpclient-4.3.2.jar
httpcore-4.3.2.jar
ivy-2.4.0.jar
jackson-annotations-2.4.4.jar
jackson-core-2.4.4.jar
jackson-core-asl-1.9.13.jar
jackson-databind-2.4.4.jar
jackson-jaxrs-1.9.13.jar
jackson-mapper-asl-1.9.13.jar
jackson-module-scala_2.10-2.4.4.jar
jackson-xc-1.9.13.jar
janino-2.7.8.jar
jansi-1.4.jar
java-xmlbuilder-1.0.jar
javax.inject-1.jar
javax.servlet-3.0.0.v201112011016.jar
javolution-5.5.1.jar
jaxb-api-2.2.2.jar
jaxb-impl-2.2.3-1.jar
jcl-over-slf4j-1.7.10.jar
jdo-api-3.0.1.jar
jersey-core-1.9.jar
jersey-guice-1.9.jar
jersey-json-1.9.jar
jersey-server-1.9.jar
jets3t-0.9.3.jar
jettison-1.1.jar
jetty-6.1.26.jar
jetty-all-7.6.0.v20120127.jar
jetty-util-6.1.26.jar
jline-2.10.5.jar
jline-2.12.jar
joda-time-2.9.jar
jodd-core-3.5.2.jar
jpam-1.1.jar
json-20090211.jar
json4s-ast_2.10-3.2.10.jar
json4s-core_2.10-3.2.10.jar
json4s-jackson_2.10-3.2.10.jar
jsr305-1.3.9.jar
jta-1.1.jar
jtransforms-2.4.0.jar
jul-to-slf4j-1.7.10.jar
kryo-2.21.jar
leveldbjni-all-1.8.jar
libfb303-0.9.2.jar
libthrift-0.9.2.jar
log4j-1.2.17.jar
lz4-1.3.0.jar
mail-1.4.7.jar
mesos-0.21.1-shaded-protobuf.jar
metrics-core-3.1.2.jar
metrics-graphite-3.1.2.jar
metrics-json-3.1.2.jar
metrics-jvm-3.1.2.jar
minlog-1.2.jar
mx4j-3.0.2.jar
netty-3.8.0.Final.jar
netty-all-4.0.29.Final.jar
objenesis-1.2.jar
opencsv-2.3.jar
oro-2.0.8.jar
paranamer-2.6.jar
parquet-column-1.7.0.jar
parquet-common-1.7.0.jar
parquet-encoding-1.7.0.jar
parquet-format-2.3.0-incubating.jar
parquet-generator-1.7.0.jar
parquet-hadoop-1.7.0.jar
parquet-hadoop-bundle-1.6.0.jar
parquet-jackson-1.7.0.jar
pmml-agent-1.2.7.jar
pmml-model-1.2.7.jar
pmml-schema-1.2.7.jar
protobuf-java-2.5.0.jar
py4j-0.9.jar
pyrolite-4.9.jar
quasiquotes_2.10-2.0.0-M8.jar
reflectasm-1.07-shaded.jar
scala-compiler-2.10.5.jar
scala-library-2.10.5.jar
scala-reflect-2.10.5.jar
scalap-2.10.5.jar
servlet-api-2.5.jar
slf4j-api-1.7.10.jar
slf4j-log4j12-1.7.10.jar
snappy-0.2.jar
snappy-java-1.1.2.jar
spire-macros_2.10-0.7.4.jar
spire_2.10-0.7.4.jar
stax-api-1.0-2.jar
stax-api-1.0.1.jar
stream-2.7.0.jar
stringtemplate-3.2.1.jar
super-csv-2.2.0.jar
tachyon-client-0.8.2.jar
tachyon-underfs-hdfs-0.8.2.jar
tachyon-underfs-local-0.8.2.jar
tachyon-underfs-s3-0.8.2.jar
uncommons-maths-1.2.2a.jar
unused-1.0.0.jar
xbean-asm5-shaded-4.4.jar
xmlenc-0.52.jar
xz-1.0.jar
zookeeper-3.4.5.jar

View file

@ -0,0 +1,185 @@
JavaEWAH-0.3.2.jar
RoaringBitmap-0.5.11.jar
ST4-4.0.4.jar
activation-1.1.1.jar
akka-actor_2.10-2.3.11.jar
akka-remote_2.10-2.3.11.jar
akka-slf4j_2.10-2.3.11.jar
antlr-2.7.7.jar
antlr-runtime-3.4.jar
aopalliance-1.0.jar
apache-log4j-extras-1.2.17.jar
arpack_combined_all-0.1.jar
asm-3.1.jar
asm-commons-3.1.jar
asm-tree-3.1.jar
avro-1.7.7.jar
avro-ipc-1.7.7-tests.jar
avro-ipc-1.7.7.jar
avro-mapred-1.7.7-hadoop2.jar
base64-2.3.8.jar
bcprov-jdk15on-1.51.jar
bonecp-0.8.0.RELEASE.jar
breeze-macros_2.10-0.11.2.jar
breeze_2.10-0.11.2.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
chill-java-0.5.0.jar
chill_2.10-0.5.0.jar
commons-beanutils-1.7.0.jar
commons-beanutils-core-1.8.0.jar
commons-cli-1.2.jar
commons-codec-1.10.jar
commons-collections-3.2.2.jar
commons-compiler-2.7.6.jar
commons-compress-1.4.1.jar
commons-configuration-1.6.jar
commons-dbcp-1.4.jar
commons-digester-1.8.jar
commons-httpclient-3.1.jar
commons-io-2.4.jar
commons-lang-2.6.jar
commons-lang3-3.3.2.jar
commons-logging-1.1.3.jar
commons-math3-3.4.1.jar
commons-net-2.2.jar
commons-pool-1.5.4.jar
compress-lzf-1.0.3.jar
config-1.2.1.jar
core-1.1.2.jar
curator-client-2.4.0.jar
curator-framework-2.4.0.jar
curator-recipes-2.4.0.jar
datanucleus-api-jdo-3.2.6.jar
datanucleus-core-3.2.10.jar
datanucleus-rdbms-3.2.9.jar
derby-10.10.1.1.jar
eigenbase-properties-1.1.5.jar
geronimo-annotation_1.0_spec-1.1.1.jar
geronimo-jaspic_1.0_spec-1.0.jar
geronimo-jta_1.1_spec-1.1.1.jar
groovy-all-2.1.6.jar
guice-3.0.jar
guice-servlet-3.0.jar
hadoop-annotations-2.4.0.jar
hadoop-auth-2.4.0.jar
hadoop-client-2.4.0.jar
hadoop-common-2.4.0.jar
hadoop-hdfs-2.4.0.jar
hadoop-mapreduce-client-app-2.4.0.jar
hadoop-mapreduce-client-common-2.4.0.jar
hadoop-mapreduce-client-core-2.4.0.jar
hadoop-mapreduce-client-jobclient-2.4.0.jar
hadoop-mapreduce-client-shuffle-2.4.0.jar
hadoop-yarn-api-2.4.0.jar
hadoop-yarn-client-2.4.0.jar
hadoop-yarn-common-2.4.0.jar
hadoop-yarn-server-common-2.4.0.jar
hadoop-yarn-server-web-proxy-2.4.0.jar
httpclient-4.3.2.jar
httpcore-4.3.2.jar
ivy-2.4.0.jar
jackson-annotations-2.4.4.jar
jackson-core-2.4.4.jar
jackson-core-asl-1.9.13.jar
jackson-databind-2.4.4.jar
jackson-jaxrs-1.9.13.jar
jackson-mapper-asl-1.9.13.jar
jackson-module-scala_2.10-2.4.4.jar
jackson-xc-1.9.13.jar
janino-2.7.8.jar
jansi-1.4.jar
java-xmlbuilder-1.0.jar
javax.inject-1.jar
javax.servlet-3.0.0.v201112011016.jar
javolution-5.5.1.jar
jaxb-api-2.2.2.jar
jaxb-impl-2.2.3-1.jar
jcl-over-slf4j-1.7.10.jar
jdo-api-3.0.1.jar
jersey-client-1.9.jar
jersey-core-1.9.jar
jersey-guice-1.9.jar
jersey-json-1.9.jar
jersey-server-1.9.jar
jets3t-0.9.3.jar
jettison-1.1.jar
jetty-6.1.26.jar
jetty-all-7.6.0.v20120127.jar
jetty-util-6.1.26.jar
jline-2.10.5.jar
jline-2.12.jar
joda-time-2.9.jar
jodd-core-3.5.2.jar
jpam-1.1.jar
json-20090211.jar
json4s-ast_2.10-3.2.10.jar
json4s-core_2.10-3.2.10.jar
json4s-jackson_2.10-3.2.10.jar
jsr305-1.3.9.jar
jta-1.1.jar
jtransforms-2.4.0.jar
jul-to-slf4j-1.7.10.jar
kryo-2.21.jar
leveldbjni-all-1.8.jar
libfb303-0.9.2.jar
libthrift-0.9.2.jar
log4j-1.2.17.jar
lz4-1.3.0.jar
mail-1.4.7.jar
mesos-0.21.1-shaded-protobuf.jar
metrics-core-3.1.2.jar
metrics-graphite-3.1.2.jar
metrics-json-3.1.2.jar
metrics-jvm-3.1.2.jar
minlog-1.2.jar
mx4j-3.0.2.jar
netty-3.8.0.Final.jar
netty-all-4.0.29.Final.jar
objenesis-1.2.jar
opencsv-2.3.jar
oro-2.0.8.jar
paranamer-2.6.jar
parquet-column-1.7.0.jar
parquet-common-1.7.0.jar
parquet-encoding-1.7.0.jar
parquet-format-2.3.0-incubating.jar
parquet-generator-1.7.0.jar
parquet-hadoop-1.7.0.jar
parquet-hadoop-bundle-1.6.0.jar
parquet-jackson-1.7.0.jar
pmml-agent-1.2.7.jar
pmml-model-1.2.7.jar
pmml-schema-1.2.7.jar
protobuf-java-2.5.0.jar
py4j-0.9.jar
pyrolite-4.9.jar
quasiquotes_2.10-2.0.0-M8.jar
reflectasm-1.07-shaded.jar
scala-compiler-2.10.5.jar
scala-library-2.10.5.jar
scala-reflect-2.10.5.jar
scalap-2.10.5.jar
servlet-api-2.5.jar
slf4j-api-1.7.10.jar
slf4j-log4j12-1.7.10.jar
snappy-0.2.jar
snappy-java-1.1.2.jar
spire-macros_2.10-0.7.4.jar
spire_2.10-0.7.4.jar
stax-api-1.0-2.jar
stax-api-1.0.1.jar
stream-2.7.0.jar
stringtemplate-3.2.1.jar
super-csv-2.2.0.jar
tachyon-client-0.8.2.jar
tachyon-underfs-hdfs-0.8.2.jar
tachyon-underfs-local-0.8.2.jar
tachyon-underfs-s3-0.8.2.jar
uncommons-maths-1.2.2a.jar
unused-1.0.0.jar
xbean-asm5-shaded-4.4.jar
xmlenc-0.52.jar
xz-1.0.jar
zookeeper-3.4.5.jar

View file

@ -124,6 +124,7 @@ def run_tests(tests_timeout):
ERROR_CODES["BLOCK_R_STYLE"]: 'R style tests',
ERROR_CODES["BLOCK_DOCUMENTATION"]: 'to generate documentation',
ERROR_CODES["BLOCK_BUILD"]: 'to build',
ERROR_CODES["BLOCK_BUILD_TESTS"]: 'build dependency tests',
ERROR_CODES["BLOCK_MIMA"]: 'MiMa tests',
ERROR_CODES["BLOCK_SPARK_UNIT_TESTS"]: 'Spark unit tests',
ERROR_CODES["BLOCK_PYSPARK_UNIT_TESTS"]: 'PySpark unit tests',
@ -193,7 +194,6 @@ def main():
pr_tests = [
"pr_merge_ability",
"pr_public_classes"
# DISABLED (pwendell) "pr_new_dependencies"
]
# `bind_message_base` returns a function to generate messages for Github posting

View file

@ -417,6 +417,11 @@ def run_python_tests(test_modules, parallelism):
run_cmd(command)
def run_build_tests():
set_title_and_block("Running build tests", "BLOCK_BUILD_TESTS")
run_cmd([os.path.join(SPARK_HOME, "dev", "test-dependencies.sh")])
def run_sparkr_tests():
set_title_and_block("Running SparkR tests", "BLOCK_SPARKR_UNIT_TESTS")
@ -537,6 +542,9 @@ def main():
# if "DOCS" in changed_modules and test_env == "amplab_jenkins":
# build_spark_documentation()
if any(m.should_run_build_tests for m in test_modules):
run_build_tests()
# spark build
build_apache_spark(build_tool, hadoop_version)

View file

@ -32,5 +32,6 @@ ERROR_CODES = {
"BLOCK_PYSPARK_UNIT_TESTS": 19,
"BLOCK_SPARKR_UNIT_TESTS": 20,
"BLOCK_JAVA_STYLE": 21,
"BLOCK_BUILD_TESTS": 22,
"BLOCK_TIMEOUT": 124
}

View file

@ -31,7 +31,7 @@ class Module(object):
def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=(), environ={},
sbt_test_goals=(), python_test_goals=(), blacklisted_python_implementations=(),
test_tags=(), should_run_r_tests=False):
test_tags=(), should_run_r_tests=False, should_run_build_tests=False):
"""
Define a new module.
@ -53,6 +53,7 @@ class Module(object):
:param test_tags A set of tags that will be excluded when running unit tests if the module
is not explicitly changed.
:param should_run_r_tests: If true, changes in this module will trigger all R tests.
:param should_run_build_tests: If true, changes in this module will trigger build tests.
"""
self.name = name
self.dependencies = dependencies
@ -64,6 +65,7 @@ class Module(object):
self.blacklisted_python_implementations = blacklisted_python_implementations
self.test_tags = test_tags
self.should_run_r_tests = should_run_r_tests
self.should_run_build_tests = should_run_build_tests
self.dependent_modules = set()
for dep in dependencies:
@ -394,6 +396,14 @@ docs = Module(
]
)
build = Module(
name="build",
dependencies=[],
source_file_regexes=[
".*pom.xml",
"dev/test-dependencies.sh",
]
)
ec2 = Module(
name="ec2",
@ -433,5 +443,6 @@ root = Module(
"test",
],
python_test_goals=list(itertools.chain.from_iterable(m.python_test_goals for m in all_modules)),
should_run_r_tests=True
should_run_r_tests=True,
should_run_build_tests=True
)

102
dev/test-dependencies.sh Executable file
View file

@ -0,0 +1,102 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -e
FWDIR="$(cd "`dirname $0`"/..; pwd)"
cd "$FWDIR"
# TODO: This would be much nicer to do in SBT, once SBT supports Maven-style resolution.
# NOTE: These should match those in the release publishing script
HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pyarn -Phive"
MVN="build/mvn --force"
HADOOP_PROFILES=(
hadoop-2.3
hadoop-2.4
)
# We'll switch the version to a temp. one, publish POMs using that new version, then switch back to
# the old version. We need to do this because the `dependency:build-classpath` task needs to
# resolve Spark's internal submodule dependencies.
# See http://stackoverflow.com/a/3545363 for an explanation of this one-liner:
OLD_VERSION=$(mvn help:evaluate -Dexpression=project.version|grep -Ev '(^\[|Download\w+:)')
TEMP_VERSION="spark-$(date +%s | tail -c6)"
function reset_version {
# Delete the temporary POMs that we wrote to the local Maven repo:
find "$HOME/.m2/" | grep "$TEMP_VERSION" | xargs rm -rf
# Restore the original version number:
$MVN -q versions:set -DnewVersion=$OLD_VERSION -DgenerateBackupPoms=false > /dev/null
}
trap reset_version EXIT
$MVN -q versions:set -DnewVersion=$TEMP_VERSION -DgenerateBackupPoms=false > /dev/null
# Generate manifests for each Hadoop profile:
for HADOOP_PROFILE in "${HADOOP_PROFILES[@]}"; do
echo "Performing Maven install for $HADOOP_PROFILE"
$MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE jar:jar install:install -q \
-pl '!assembly' \
-pl '!examples' \
-pl '!external/flume-assembly' \
-pl '!external/kafka-assembly' \
-pl '!external/twitter' \
-pl '!external/flume' \
-pl '!external/mqtt' \
-pl '!external/mqtt-assembly' \
-pl '!external/zeromq' \
-pl '!external/kafka' \
-pl '!tags' \
-DskipTests
echo "Generating dependency manifest for $HADOOP_PROFILE"
mkdir -p dev/pr-deps
$MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE dependency:build-classpath -pl assembly \
| grep "Building Spark Project Assembly" -A 5 \
| tail -n 1 | tr ":" "\n" | rev | cut -d "/" -f 1 | rev | sort \
| grep -v spark > dev/pr-deps/spark-deps-$HADOOP_PROFILE
done
if [[ $@ == **replace-manifest** ]]; then
echo "Replacing manifests and creating new files at dev/deps"
rm -rf dev/deps
mv dev/pr-deps dev/deps
exit 0
fi
for HADOOP_PROFILE in "${HADOOP_PROFILES[@]}"; do
set +e
dep_diff="$(
git diff \
--no-index \
dev/deps/spark-deps-$HADOOP_PROFILE \
dev/pr-deps/spark-deps-$HADOOP_PROFILE \
)"
set -e
if [ "$dep_diff" != "" ]; then
echo "Spark's published dependencies DO NOT MATCH the manifest file (dev/spark-deps)."
echo "To update the manifest file, run './dev/test-dependencies.sh --replace-manifest'."
echo "$dep_diff"
rm -rf dev/pr-deps
exit 1
fi
done

View file

@ -1,117 +0,0 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# This script follows the base format for testing pull requests against
# another branch and returning results to be published. More details can be
# found at dev/run-tests-jenkins.
#
# Arg1: The Github Pull Request Actual Commit
#+ known as `ghprbActualCommit` in `run-tests-jenkins`
# Arg2: The SHA1 hash
#+ known as `sha1` in `run-tests-jenkins`
# Arg3: Current PR Commit Hash
#+ the PR hash for the current commit
#
ghprbActualCommit="$1"
sha1="$2"
current_pr_head="$3"
MVN_BIN="build/mvn"
CURR_CP_FILE="my-classpath.txt"
MASTER_CP_FILE="master-classpath.txt"
# First switch over to the master branch
git checkout -f master
# Find and copy all pom.xml files into a *.gate file that we can check
# against through various `git` changes
find -name "pom.xml" -exec cp {} {}.gate \;
# Switch back to the current PR
git checkout -f "${current_pr_head}"
# Check if any *.pom files from the current branch are different from the master
difference_q=""
for p in $(find -name "pom.xml"); do
[[ -f "${p}" && -f "${p}.gate" ]] && \
difference_q="${difference_q}$(diff $p.gate $p)"
done
# If no pom files were changed we can easily say no new dependencies were added
if [ -z "${difference_q}" ]; then
echo " * This patch does not change any dependencies."
else
# Else we need to manually build spark to determine what, if any, dependencies
# were added into the Spark assembly jar
${MVN_BIN} clean package dependency:build-classpath -DskipTests 2>/dev/null | \
sed -n -e '/Building Spark Project Assembly/,$p' | \
grep --context=1 -m 2 "Dependencies classpath:" | \
head -n 3 | \
tail -n 1 | \
tr ":" "\n" | \
rev | \
cut -d "/" -f 1 | \
rev | \
sort > ${CURR_CP_FILE}
# Checkout the master branch to compare against
git checkout -f master
${MVN_BIN} clean package dependency:build-classpath -DskipTests 2>/dev/null | \
sed -n -e '/Building Spark Project Assembly/,$p' | \
grep --context=1 -m 2 "Dependencies classpath:" | \
head -n 3 | \
tail -n 1 | \
tr ":" "\n" | \
rev | \
cut -d "/" -f 1 | \
rev | \
sort > ${MASTER_CP_FILE}
DIFF_RESULTS="`diff ${CURR_CP_FILE} ${MASTER_CP_FILE}`"
if [ -z "${DIFF_RESULTS}" ]; then
echo " * This patch does not change any dependencies."
else
# Pretty print the new dependencies
added_deps=$(echo "${DIFF_RESULTS}" | grep "<" | cut -d' ' -f2 | awk '{printf " * \`"$1"\`\\n"}')
removed_deps=$(echo "${DIFF_RESULTS}" | grep ">" | cut -d' ' -f2 | awk '{printf " * \`"$1"\`\\n"}')
added_deps_text=" * This patch **adds the following new dependencies:**\n${added_deps}"
removed_deps_text=" * This patch **removes the following dependencies:**\n${removed_deps}"
# Construct the final returned message with proper
return_mssg=""
[ -n "${added_deps}" ] && return_mssg="${added_deps_text}"
if [ -n "${removed_deps}" ]; then
if [ -n "${return_mssg}" ]; then
return_mssg="${return_mssg}\n${removed_deps_text}"
else
return_mssg="${removed_deps_text}"
fi
fi
echo "${return_mssg}"
fi
# Remove the files we've left over
[ -f "${CURR_CP_FILE}" ] && rm -f "${CURR_CP_FILE}"
[ -f "${MASTER_CP_FILE}" ] && rm -f "${MASTER_CP_FILE}"
# Clean up our mess from the Maven builds just in case
${MVN_BIN} clean &>/dev/null
fi

17
pom.xml
View file

@ -2113,6 +2113,23 @@
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>default-cli</id>
<goals>
<goal>build-classpath</goal>
</goals>
<configuration>
<!-- This includes dependencies with 'runtime' and 'compile' scopes;
see the docs for includeScope for more details -->
<includeScope>runtime</includeScope>
</configuration>
</execution>
</executions>
</plugin>
<!-- This plugin's configuration is used to store Eclipse m2e settings only. -->
<!-- It has no influence on the Maven build itself. -->
<plugin>