spark-instrumented-optimizer/dev/make-distribution.sh

216 lines
6.4 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Script to create a binary distribution for easy deploys of Spark.
# The distribution directory defaults to dist/ but can be overridden below.
# The distribution contains fat (assembly) jars that include the Scala library,
# so it is completely self contained.
2013-06-25 03:15:58 -04:00
# It does not contain source or *.class files.
set -o pipefail
set -e
set -x
# Figure out where the Spark framework is installed
SPARK_HOME="$(cd "`dirname "$0"`/.."; pwd)"
DISTDIR="$SPARK_HOME/dist"
MAKE_TGZ=false
NAME=none
MVN="$SPARK_HOME/build/mvn"
function exit_with_usage {
echo "make-distribution.sh - tool for making binary distributions of Spark"
echo ""
echo "usage:"
cl_options="[--name] [--tgz] [--mvn <mvn-command>]"
echo "make-distribution.sh $cl_options <maven build options>"
SPARK-3069 [DOCS] Build instructions in README are outdated Here's my crack at Bertrand's suggestion. The Github `README.md` contains build info that's outdated. It should just point to the current online docs, and reflect that Maven is the primary build now. (Incidentally, the stanza at the end about contributions of original work should go in https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark too. It won't hurt to be crystal clear about the agreement to license, given that ICLAs are not required of anyone here.) Author: Sean Owen <sowen@cloudera.com> Closes #2014 from srowen/SPARK-3069 and squashes the following commits: 501507e [Sean Owen] Note that Zinc is for Maven builds too db2bd97 [Sean Owen] sbt -> sbt/sbt and add note about zinc be82027 [Sean Owen] Fix additional occurrences of building-with-maven -> building-spark 91c921f [Sean Owen] Move building-with-maven to building-spark and create a redirect. Update doc links to building-spark.html Add jekyll-redirect-from plugin and make associated config changes (including fixing pygments deprecation). Add example of SBT to README.md 999544e [Sean Owen] Change "Building Spark with Maven" title to "Building Spark"; reinstate tl;dr info about dev/run-tests in README.md; add brief note about building with SBT c18d140 [Sean Owen] Optionally, remove the copy of contributing text from main README.md 8e83934 [Sean Owen] Add CONTRIBUTING.md to trigger notice on new pull request page b1c04a1 [Sean Owen] Refer to current online documentation for building, and remove slightly outdated copy in README.md
2014-09-16 12:18:03 -04:00
echo "See Spark's \"Building Spark\" doc for correct Maven options."
echo ""
exit 1
}
# Parse arguments
while (( "$#" )); do
case $1 in
--hadoop)
echo "Error: '--hadoop' is no longer supported:"
echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead."
echo "Error: Related profiles include hadoop-2.2, hadoop-2.3 and hadoop-2.4."
exit_with_usage
;;
--with-yarn)
echo "Error: '--with-yarn' is no longer supported, use Maven option -Pyarn"
exit_with_usage
;;
--with-hive)
Support cross building for Scala 2.11 Let's give this another go using a version of Hive that shades its JLine dependency. Author: Prashant Sharma <prashant.s@imaginea.com> Author: Patrick Wendell <pwendell@gmail.com> Closes #3159 from pwendell/scala-2.11-prashant and squashes the following commits: e93aa3e [Patrick Wendell] Restoring -Phive-thriftserver profile and cleaning up build script. f65d17d [Patrick Wendell] Fixing build issue due to merge conflict a8c41eb [Patrick Wendell] Reverting dev/run-tests back to master state. 7a6eb18 [Patrick Wendell] Merge remote-tracking branch 'apache/master' into scala-2.11-prashant 583aa07 [Prashant Sharma] REVERT ME: removed hive thirftserver 3680e58 [Prashant Sharma] Revert "REVERT ME: Temporarily removing some Cli tests." 935fb47 [Prashant Sharma] Revert "Fixed by disabling a few tests temporarily." 925e90f [Prashant Sharma] Fixed by disabling a few tests temporarily. 2fffed3 [Prashant Sharma] Exclude groovy from sbt build, and also provide a way for such instances in future. 8bd4e40 [Prashant Sharma] Switched to gmaven plus, it fixes random failures observer with its predecessor gmaven. 5272ce5 [Prashant Sharma] SPARK_SCALA_VERSION related bugs. 2121071 [Patrick Wendell] Migrating version detection to PySpark b1ed44d [Patrick Wendell] REVERT ME: Temporarily removing some Cli tests. 1743a73 [Patrick Wendell] Removing decimal test that doesn't work with Scala 2.11 f5cad4e [Patrick Wendell] Add Scala 2.11 docs 210d7e1 [Patrick Wendell] Revert "Testing new Hive version with shaded jline" 48518ce [Patrick Wendell] Remove association of Hive and Thriftserver profiles. e9d0a06 [Patrick Wendell] Revert "Enable thritfserver for Scala 2.10 only" 67ec364 [Patrick Wendell] Guard building of thriftserver around Scala 2.10 check 8502c23 [Patrick Wendell] Enable thritfserver for Scala 2.10 only e22b104 [Patrick Wendell] Small fix in pom file ec402ab [Patrick Wendell] Various fixes 0be5a9d [Patrick Wendell] Testing new Hive version with shaded jline 4eaec65 [Prashant Sharma] Changed scripts to ignore target. 5167bea [Prashant Sharma] small correction a4fcac6 [Prashant Sharma] Run against scala 2.11 on jenkins. 80285f4 [Prashant Sharma] MAven equivalent of setting spark.executor.extraClasspath during tests. 034b369 [Prashant Sharma] Setting test jars on executor classpath during tests from sbt. d4874cb [Prashant Sharma] Fixed Python Runner suite. null check should be first case in scala 2.11. 6f50f13 [Prashant Sharma] Fixed build after rebasing with master. We should use ${scala.binary.version} instead of just 2.10 e56ca9d [Prashant Sharma] Print an error if build for 2.10 and 2.11 is spotted. 937c0b8 [Prashant Sharma] SCALA_VERSION -> SPARK_SCALA_VERSION cb059b0 [Prashant Sharma] Code review 0476e5e [Prashant Sharma] Scala 2.11 support with repl and all build changes.
2014-11-12 00:36:48 -05:00
echo "Error: '--with-hive' is no longer supported, use Maven options -Phive and -Phive-thriftserver"
exit_with_usage
;;
--tgz)
MAKE_TGZ=true
;;
--mvn)
MVN="$2"
shift
;;
--name)
NAME="$2"
shift
;;
--help)
exit_with_usage
;;
*)
break
;;
esac
shift
done
if [ -z "$JAVA_HOME" ]; then
# Fall back on JAVA_HOME from rpm, if found
if [ $(command -v rpm) ]; then
RPM_JAVA_HOME="$(rpm -E %java_home 2>/dev/null)"
if [ "$RPM_JAVA_HOME" != "%java_home" ]; then
JAVA_HOME="$RPM_JAVA_HOME"
echo "No JAVA_HOME set, proceeding with '$JAVA_HOME' learned from rpm"
fi
fi
fi
if [ -z "$JAVA_HOME" ]; then
echo "Error: JAVA_HOME is not set, cannot proceed."
exit -1
fi
if [ $(command -v git) ]; then
GITREV=$(git rev-parse --short HEAD 2>/dev/null || :)
if [ ! -z "$GITREV" ]; then
GITREVSTRING=" (git revision $GITREV)"
fi
unset GITREV
fi
if [ ! "$(command -v "$MVN")" ] ; then
echo -e "Could not locate Maven command: '$MVN'."
echo -e "Specify the Maven command with the --mvn flag"
exit -1;
fi
VERSION=$("$MVN" help:evaluate -Dexpression=project.version $@ 2>/dev/null | grep -v "INFO" | tail -n 1)
SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version $@ 2>/dev/null\
| grep -v "INFO"\
| tail -n 1)
SPARK_HADOOP_VERSION=$("$MVN" help:evaluate -Dexpression=hadoop.version $@ 2>/dev/null\
| grep -v "INFO"\
| tail -n 1)
SPARK_HIVE=$("$MVN" help:evaluate -Dexpression=project.activeProfiles -pl sql/hive $@ 2>/dev/null\
| grep -v "INFO"\
| fgrep --count "<id>hive</id>";\
# Reset exit status to 0, otherwise the script stops here if the last grep finds nothing\
# because we use "set -o pipefail"
echo -n)
if [ "$NAME" == "none" ]; then
NAME=$SPARK_HADOOP_VERSION
fi
echo "Spark version is $VERSION"
if [ "$MAKE_TGZ" == "true" ]; then
echo "Making spark-$VERSION-bin-$NAME.tgz"
else
echo "Making distribution for Spark $VERSION in $DISTDIR..."
fi
# Build uber fat JAR
cd "$SPARK_HOME"
export MAVEN_OPTS="${MAVEN_OPTS:--Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m}"
# Store the command as an array because $MVN variable might have spaces in it.
# Normal quoting tricks don't work.
# See: http://mywiki.wooledge.org/BashFAQ/050
BUILD_COMMAND=("$MVN" clean package -DskipTests $@)
# Actually build the jar
echo -e "\nBuilding with..."
echo -e "\$ ${BUILD_COMMAND[@]}\n"
"${BUILD_COMMAND[@]}"
# Make directories
rm -rf "$DISTDIR"
mkdir -p "$DISTDIR/lib"
echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
echo "Build flags: $@" >> "$DISTDIR/RELEASE"
# Copy jars
cp "$SPARK_HOME"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
cp "$SPARK_HOME"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
# This will fail if the -Pyarn profile is not provided
# In this case, silence the error and ignore the return code of this command
cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || :
# Copy example sources (needed for python and SQL)
mkdir -p "$DISTDIR/examples/src/main"
cp -r "$SPARK_HOME"/examples/src/main "$DISTDIR/examples/src/"
if [ "$SPARK_HIVE" == "1" ]; then
cp "$SPARK_HOME"/lib_managed/jars/datanucleus*.jar "$DISTDIR/lib/"
fi
# Copy license and ASF files
cp "$SPARK_HOME/LICENSE" "$DISTDIR"
cp -r "$SPARK_HOME/licenses" "$DISTDIR"
cp "$SPARK_HOME/NOTICE" "$DISTDIR"
if [ -e "$SPARK_HOME"/CHANGES.txt ]; then
cp "$SPARK_HOME/CHANGES.txt" "$DISTDIR"
fi
# Copy data files
cp -r "$SPARK_HOME/data" "$DISTDIR"
# Copy other things
mkdir "$DISTDIR"/conf
cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
cp "$SPARK_HOME/README.md" "$DISTDIR"
cp -r "$SPARK_HOME/bin" "$DISTDIR"
cp -r "$SPARK_HOME/python" "$DISTDIR"
cp -r "$SPARK_HOME/sbin" "$DISTDIR"
# Copy SparkR if it exists
if [ -d "$SPARK_HOME"/R/lib/SparkR ]; then
mkdir -p "$DISTDIR"/R/lib
cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib
cp "$SPARK_HOME/R/lib/sparkr.zip" "$DISTDIR"/R/lib
fi
if [ "$MAKE_TGZ" == "true" ]; then
TARDIR_NAME=spark-$VERSION-bin-$NAME
TARDIR="$SPARK_HOME/$TARDIR_NAME"
rm -rf "$TARDIR"
cp -r "$DISTDIR" "$TARDIR"
tar czf "spark-$VERSION-bin-$NAME.tgz" -C "$SPARK_HOME" "$TARDIR_NAME"
rm -rf "$TARDIR"
fi