SPARK-1119 and other build improvements
1. Makes assembly and examples jar naming consistent in maven/sbt. 2. Updates make-distribution.sh to use Maven and fixes some bugs. 3. Updates the create-release script to call make-distribution script. Author: Patrick Wendell <pwendell@gmail.com> Closes #502 from pwendell/make-distribution and squashes the following commits: 1a97f0d [Patrick Wendell] SPARK-1119 and other build improvements
This commit is contained in:
parent
39f85e0322
commit
cd4ed29326
|
@ -33,7 +33,7 @@
|
|||
|
||||
<properties>
|
||||
<spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
|
||||
<spark.jar.basename>${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
|
||||
<spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
|
||||
<spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
|
||||
<deb.pkg.name>spark</deb.pkg.name>
|
||||
<deb.install.path>/usr/share/spark</deb.install.path>
|
||||
|
|
|
@ -50,9 +50,9 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
|
|||
else
|
||||
# Else use spark-assembly jar from either RELEASE or assembly directory
|
||||
if [ -f "$FWDIR/RELEASE" ]; then
|
||||
ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark*-assembly*.jar`
|
||||
ASSEMBLY_JAR=`ls "$FWDIR"/lib/spark-assembly*hadoop*.jar`
|
||||
else
|
||||
ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark*-assembly*hadoop*.jar`
|
||||
ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar`
|
||||
fi
|
||||
CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
|
||||
fi
|
||||
|
|
|
@ -40,12 +40,15 @@ fi
|
|||
# Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
|
||||
# to avoid the -sources and -doc packages that are built by publish-local.
|
||||
EXAMPLES_DIR="$FWDIR"/examples
|
||||
SPARK_EXAMPLES_JAR=""
|
||||
if [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then
|
||||
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar`
|
||||
|
||||
if [ -f "$FWDIR/RELEASE" ]; then
|
||||
export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
|
||||
elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
|
||||
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`
|
||||
fi
|
||||
|
||||
if [[ -z $SPARK_EXAMPLES_JAR ]]; then
|
||||
echo "Failed to find Spark examples assembly in $FWDIR/examples/target" >&2
|
||||
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" >&2
|
||||
echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
@ -83,15 +83,15 @@ rm -rf spark-$RELEASE_VERSION
|
|||
|
||||
make_binary_release() {
|
||||
NAME=$1
|
||||
MAVEN_FLAGS=$2
|
||||
|
||||
FLAGS=$2
|
||||
cp -r spark spark-$RELEASE_VERSION-bin-$NAME
|
||||
|
||||
cd spark-$RELEASE_VERSION-bin-$NAME
|
||||
export MAVEN_OPTS="-Xmx3g -XX:MaxPermSize=1g -XX:ReservedCodeCacheSize=1g"
|
||||
mvn $MAVEN_FLAGS -DskipTests clean package
|
||||
find . -name test-classes -type d | xargs rm -rf
|
||||
find . -name classes -type d | xargs rm -rf
|
||||
./make-distribution.sh $FLAGS --name $NAME --tgz
|
||||
cd ..
|
||||
cp spark-$RELEASE_VERSION-bin-$NAME/spark-$RELEASE_VERSION-bin-$NAME.tgz .
|
||||
rm -rf spark-$RELEASE_VERSION-bin-$NAME
|
||||
|
||||
tar cvzf spark-$RELEASE_VERSION-bin-$NAME.tgz spark-$RELEASE_VERSION-bin-$NAME
|
||||
echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --armour \
|
||||
--output spark-$RELEASE_VERSION-bin-$NAME.tgz.asc \
|
||||
|
@ -105,9 +105,9 @@ make_binary_release() {
|
|||
rm -rf spark-$RELEASE_VERSION-bin-$NAME
|
||||
}
|
||||
|
||||
make_binary_release "hadoop1" "-Dhadoop.version=1.0.4"
|
||||
make_binary_release "cdh4" "-Dhadoop.version=2.0.0-mr1-cdh4.2.0"
|
||||
make_binary_release "hadoop2" "-Pyarn -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0"
|
||||
make_binary_release "hadoop1" "--hadoop 1.0.4"
|
||||
make_binary_release "cdh4" "--hadoop 2.0.0-mr1-cdh4.2.0"
|
||||
make_binary_release "hadoop2" "--with-yarn --hadoop 2.2.0"
|
||||
|
||||
# Copy data
|
||||
echo "Copying release tarballs"
|
||||
|
|
|
@ -187,7 +187,7 @@
|
|||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<configuration>
|
||||
<shadedArtifactAttached>false</shadedArtifactAttached>
|
||||
<outputFile>${project.build.directory}/scala-${scala.binary.version}/${project.artifactId}-assembly-${project.version}.jar</outputFile>
|
||||
<outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-examples-${project.version}-hadoop${hadoop.version}.jar</outputFile>
|
||||
<artifactSet>
|
||||
<includes>
|
||||
<include>*:*</include>
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
# --tgz: Additionally creates spark-$VERSION-bin.tar.gz
|
||||
# --hadoop VERSION: Builds against specified version of Hadoop.
|
||||
# --with-yarn: Enables support for Hadoop YARN.
|
||||
# --with-hive: Enable support for reading Hive tables.
|
||||
# --name: A moniker for the release target. Defaults to the Hadoop verison.
|
||||
#
|
||||
# Recommended deploy/testing procedure (standalone mode):
|
||||
# 1) Rsync / deploy the dist/ dir to one host
|
||||
|
@ -41,25 +43,20 @@
|
|||
FWDIR="$(cd `dirname $0`; pwd)"
|
||||
DISTDIR="$FWDIR/dist"
|
||||
|
||||
# Get version from SBT
|
||||
export TERM=dumb # Prevents color codes in SBT output
|
||||
|
||||
VERSIONSTRING=$($FWDIR/sbt/sbt "show version")
|
||||
|
||||
VERSION=$(mvn help:evaluate -Dexpression=project.version |grep -v "INFO")
|
||||
if [ $? == -1 ] ;then
|
||||
echo -e "You need sbt installed and available on your path."
|
||||
echo -e "Download sbt from http://www.scala-sbt.org/"
|
||||
echo -e "You need Maven installed to build Spark."
|
||||
echo -e "Download Maven from https://maven.apache.org."
|
||||
exit -1;
|
||||
fi
|
||||
|
||||
VERSION=$(echo "${VERSIONSTRING}" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/')
|
||||
echo "Version is ${VERSION}"
|
||||
|
||||
# Initialize defaults
|
||||
SPARK_HADOOP_VERSION=1.0.4
|
||||
SPARK_YARN=false
|
||||
SPARK_HIVE=false
|
||||
SPARK_TACHYON=false
|
||||
MAKE_TGZ=false
|
||||
NAME=none
|
||||
|
||||
# Parse arguments
|
||||
while (( "$#" )); do
|
||||
|
@ -71,23 +68,37 @@ while (( "$#" )); do
|
|||
--with-yarn)
|
||||
SPARK_YARN=true
|
||||
;;
|
||||
--with-hive)
|
||||
SPARK_HIVE=true
|
||||
;;
|
||||
--with-tachyon)
|
||||
SPARK_TACHYON=true
|
||||
;;
|
||||
--tgz)
|
||||
MAKE_TGZ=true
|
||||
;;
|
||||
--name)
|
||||
NAME="$2"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [ "$NAME" == "none" ]; then
|
||||
NAME=$SPARK_HADOOP_VERSION
|
||||
fi
|
||||
|
||||
echo "Spark version is $VERSION"
|
||||
|
||||
if [ "$MAKE_TGZ" == "true" ]; then
|
||||
echo "Making spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz"
|
||||
echo "Making spark-$VERSION-bin-$NAME.tgz"
|
||||
else
|
||||
echo "Making distribution for Spark $VERSION in $DISTDIR..."
|
||||
fi
|
||||
|
||||
echo "Hadoop version set to $SPARK_HADOOP_VERSION"
|
||||
echo "Release name set to $NAME"
|
||||
if [ "$SPARK_YARN" == "true" ]; then
|
||||
echo "YARN enabled"
|
||||
else
|
||||
|
@ -100,20 +111,32 @@ else
|
|||
echo "Tachyon Disabled"
|
||||
fi
|
||||
|
||||
# Build fat JAR
|
||||
export SPARK_HADOOP_VERSION
|
||||
export SPARK_YARN
|
||||
# Build uber fat JAR
|
||||
cd $FWDIR
|
||||
|
||||
"sbt/sbt" "assembly/assembly"
|
||||
export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
|
||||
|
||||
if [ "$SPARK_HIVE" == "true" ]; then
|
||||
MAYBE_HIVE="-Phive"
|
||||
else
|
||||
MAYBE_HIVE=""
|
||||
fi
|
||||
|
||||
if [ "$SPARK_YARN" == "true" ]; then
|
||||
mvn clean package -DskipTests -Pyarn -Dhadoop.version=$SPARK_HADOOP_VERSION \
|
||||
-Dyarn.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
|
||||
else
|
||||
mvn clean package -DskipTests -Dhadoop.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
|
||||
fi
|
||||
|
||||
# Make directories
|
||||
rm -rf "$DISTDIR"
|
||||
mkdir -p "$DISTDIR/jars"
|
||||
mkdir -p "$DISTDIR/lib"
|
||||
echo "Spark $VERSION built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
|
||||
|
||||
# Copy jars
|
||||
cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/jars/"
|
||||
cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
|
||||
cp $FWDIR/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
|
||||
|
||||
# Copy other things
|
||||
mkdir "$DISTDIR"/conf
|
||||
|
@ -135,16 +158,16 @@ if [ "$SPARK_TACHYON" == "true" ]; then
|
|||
wget "$TACHYON_URL"
|
||||
|
||||
tar xf "tachyon-${TACHYON_VERSION}-bin.tar.gz"
|
||||
cp "tachyon-${TACHYON_VERSION}/target/tachyon-${TACHYON_VERSION}-jar-with-dependencies.jar" "$DISTDIR/jars"
|
||||
cp "tachyon-${TACHYON_VERSION}/target/tachyon-${TACHYON_VERSION}-jar-with-dependencies.jar" "$DISTDIR/lib"
|
||||
mkdir -p "$DISTDIR/tachyon/src/main/java/tachyon/web"
|
||||
cp -r "tachyon-${TACHYON_VERSION}"/{bin,conf,libexec} "$DISTDIR/tachyon"
|
||||
cp -r "tachyon-${TACHYON_VERSION}"/src/main/java/tachyon/web/resources "$DISTDIR/tachyon/src/main/java/tachyon/web"
|
||||
|
||||
if [[ `uname -a` == Darwin* ]]; then
|
||||
# need to run sed differently on osx
|
||||
nl=$'\n'; sed -i "" -e "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\\$nl export TACHYON_JAR=\$TACHYON_HOME/../jars/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
|
||||
nl=$'\n'; sed -i "" -e "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\\$nl export TACHYON_JAR=\$TACHYON_HOME/../lib/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
|
||||
else
|
||||
sed -i "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\n export TACHYON_JAR=\$TACHYON_HOME/../jars/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
|
||||
sed -i "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\n export TACHYON_JAR=\$TACHYON_HOME/../lib/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
|
||||
fi
|
||||
|
||||
popd > /dev/null
|
||||
|
@ -152,8 +175,9 @@ if [ "$SPARK_TACHYON" == "true" ]; then
|
|||
fi
|
||||
|
||||
if [ "$MAKE_TGZ" == "true" ]; then
|
||||
TARDIR="$FWDIR/spark-$VERSION"
|
||||
TARDIR_NAME=spark-$VERSION-bin-$NAME
|
||||
TARDIR="$FWDIR/$TARDIR_NAME"
|
||||
cp -r "$DISTDIR" "$TARDIR"
|
||||
tar -zcf "spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz" -C "$FWDIR" "spark-$VERSION"
|
||||
tar czf "spark-$VERSION-bin-$NAME.tgz" -C "$FWDIR" "$TARDIR_NAME"
|
||||
rm -rf "$TARDIR"
|
||||
fi
|
||||
|
|
|
@ -412,6 +412,8 @@ object SparkBuild extends Build {
|
|||
|
||||
def examplesSettings = sharedSettings ++ Seq(
|
||||
name := "spark-examples",
|
||||
jarName in assembly <<= version map {
|
||||
v => "spark-examples-" + v + "-hadoop" + hadoopVersion + ".jar" },
|
||||
libraryDependencies ++= Seq(
|
||||
"com.twitter" %% "algebird-core" % "0.1.11",
|
||||
"org.apache.hbase" % "hbase" % HBASE_VERSION excludeAll(excludeNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging),
|
||||
|
|
Loading…
Reference in a new issue