2013-07-15 21:13:17 -04:00
#!/usr/bin/env bash
2013-07-16 20:21:33 -04:00
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
2013-06-24 20:05:37 -04:00
#
# Script to create a binary distribution for easy deploys of Spark.
# The distribution directory defaults to dist/ but can be overridden below.
# The distribution contains fat (assembly) jars that include the Scala library,
# so it is completely self contained.
2013-06-25 03:15:58 -04:00
# It does not contain source or *.class files.
2013-06-24 20:05:37 -04:00
2014-05-06 18:41:46 -04:00
set -o pipefail
2014-05-09 01:26:17 -04:00
set -e
2013-06-24 20:05:37 -04:00
# Figure out where the Spark framework is installed
2014-09-08 13:24:15 -04:00
FWDIR = " $( cd "`dirname " $0 "`" ; pwd ) "
2013-06-24 20:05:37 -04:00
DISTDIR = " $FWDIR /dist "
2014-03-19 01:04:57 -04:00
SPARK_TACHYON = false
2013-08-14 20:34:34 -04:00
MAKE_TGZ = false
2014-04-23 13:19:32 -04:00
NAME = none
2013-08-14 20:34:34 -04:00
2014-07-17 04:02:35 -04:00
function exit_with_usage {
echo "make-distribution.sh - tool for making binary distributions of Spark"
echo ""
echo "usage:"
echo "./make-distribution.sh [--name] [--tgz] [--with-tachyon] <maven build options>"
2014-09-16 12:18:03 -04:00
echo "See Spark's \"Building Spark\" doc for correct Maven options."
2014-07-17 04:02:35 -04:00
echo ""
exit 1
}
2013-08-14 20:34:34 -04:00
# Parse arguments
while ( ( " $# " ) ) ; do
case $1 in
--hadoop)
2014-07-17 04:02:35 -04:00
echo "Error: '--hadoop' is no longer supported:"
2014-09-07 23:38:32 -04:00
echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead."
echo "Error: Related profiles include hadoop-0.23, hdaoop-2.2, hadoop-2.3 and hadoop-2.4."
2014-07-17 04:02:35 -04:00
exit_with_usage
2013-08-14 20:34:34 -04:00
; ;
--with-yarn)
2014-07-17 04:02:35 -04:00
echo "Error: '--with-yarn' is no longer supported, use Maven option -Pyarn"
exit_with_usage
2013-08-14 20:34:34 -04:00
; ;
2014-04-23 13:19:32 -04:00
--with-hive)
2014-11-12 00:36:48 -05:00
echo "Error: '--with-hive' is no longer supported, use Maven options -Phive and -Phive-thriftserver"
2014-07-17 04:02:35 -04:00
exit_with_usage
2014-04-23 13:19:32 -04:00
; ;
2014-05-24 21:27:00 -04:00
--skip-java-test)
SKIP_JAVA_TEST = true
; ;
2014-03-19 01:04:57 -04:00
--with-tachyon)
SPARK_TACHYON = true
; ;
2013-08-14 20:34:34 -04:00
--tgz)
MAKE_TGZ = true
; ;
2014-04-23 13:19:32 -04:00
--name)
NAME = " $2 "
shift
; ;
2014-07-17 04:02:35 -04:00
--help)
exit_with_usage
; ;
*)
break
; ;
2013-08-14 20:34:34 -04:00
esac
shift
done
2014-06-24 22:31:20 -04:00
if [ -z " $JAVA_HOME " ] ; then
# Fall back on JAVA_HOME from rpm, if found
if which rpm & >/dev/null; then
RPM_JAVA_HOME = $( rpm -E %java_home 2>/dev/null)
if [ " $RPM_JAVA_HOME " != "%java_home" ] ; then
JAVA_HOME = $RPM_JAVA_HOME
echo " No JAVA_HOME set, proceeding with ' $JAVA_HOME ' learned from rpm "
fi
fi
fi
2014-05-24 21:27:00 -04:00
if [ -z " $JAVA_HOME " ] ; then
echo "Error: JAVA_HOME is not set, cannot proceed."
exit -1
fi
2014-06-28 16:07:12 -04:00
if which git & >/dev/null; then
GITREV = $( git rev-parse --short HEAD 2>/dev/null || :)
if [ ! -z $GITREV ] ; then
GITREVSTRING = " (git revision $GITREV ) "
fi
unset GITREV
fi
2014-06-23 14:24:05 -04:00
if ! which mvn & >/dev/null; then
2014-05-24 21:27:00 -04:00
echo -e "You need Maven installed to build Spark."
echo -e "Download Maven from https://maven.apache.org/"
exit -1;
fi
2014-08-29 18:29:43 -04:00
2014-06-23 14:24:05 -04:00
VERSION = $( mvn help:evaluate -Dexpression= project.version 2>/dev/null | grep -v "INFO" | tail -n 1)
2014-08-29 18:29:43 -04:00
SPARK_HADOOP_VERSION = $( mvn help:evaluate -Dexpression= hadoop.version $@ 2>/dev/null\
| grep -v "INFO" \
| tail -n 1)
SPARK_HIVE = $( mvn help:evaluate -Dexpression= project.activeProfiles $@ 2>/dev/null\
| grep -v "INFO" \
| fgrep --count "<id>hive</id>" ; \
# Reset exit status to 0, otherwise the script stops here if the last grep finds nothing\
# because we use "set -o pipefail"
echo -n)
2014-05-24 21:27:00 -04:00
JAVA_CMD = " $JAVA_HOME " /bin/java
JAVA_VERSION = $( " $JAVA_CMD " -version 2>& 1)
if [ [ ! " $JAVA_VERSION " = ~ "1.6" && -z " $SKIP_JAVA_TEST " ] ] ; then
echo "***NOTE***: JAVA_HOME is not set to a JDK 6 installation. The resulting"
echo " distribution may not work well with PySpark and will not run"
echo " with Java 6 (See SPARK-1703 and SPARK-1911)."
echo " This test can be disabled by adding --skip-java-test."
echo "Output from 'java -version' was:"
echo " $JAVA_VERSION "
read -p "Would you like to continue anyways? [y,n]: " -r
if [ [ ! $REPLY = ~ ^[ Yy] $ ] ] ; then
echo "Okay, exiting."
exit 1
2014-08-01 01:53:42 -04:00
fi
2014-05-24 21:27:00 -04:00
fi
2014-04-23 13:19:32 -04:00
if [ " $NAME " = = "none" ] ; then
NAME = $SPARK_HADOOP_VERSION
fi
echo " Spark version is $VERSION "
2013-08-14 20:34:34 -04:00
if [ " $MAKE_TGZ " = = "true" ] ; then
2014-04-23 13:19:32 -04:00
echo " Making spark- $VERSION -bin- $NAME .tgz "
2013-07-15 21:13:17 -04:00
else
2014-04-23 13:19:32 -04:00
echo " Making distribution for Spark $VERSION in $DISTDIR ... "
2013-07-15 21:13:17 -04:00
fi
2014-03-19 01:04:57 -04:00
if [ " $SPARK_TACHYON " = = "true" ] ; then
echo "Tachyon Enabled"
else
echo "Tachyon Disabled"
fi
2014-04-23 13:19:32 -04:00
# Build uber fat JAR
2014-07-30 16:04:20 -04:00
cd " $FWDIR "
2014-01-02 03:39:37 -05:00
2014-04-23 13:19:32 -04:00
export MAVEN_OPTS = "-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
2014-07-17 04:02:35 -04:00
BUILD_COMMAND = " mvn clean package -DskipTests $@ "
2014-05-06 01:14:47 -04:00
# Actually build the jar
echo -e "\nBuilding with..."
echo -e " \$ $BUILD_COMMAND \n "
2014-07-17 04:02:35 -04:00
2014-05-06 01:14:47 -04:00
${ BUILD_COMMAND }
2013-06-24 20:05:37 -04:00
# Make directories
rm -rf " $DISTDIR "
2014-04-23 13:19:32 -04:00
mkdir -p " $DISTDIR /lib "
2014-06-28 16:07:12 -04:00
echo " Spark $VERSION $GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION " > " $DISTDIR /RELEASE "
2013-06-24 20:05:37 -04:00
# Copy jars
2014-08-03 22:47:05 -04:00
cp " $FWDIR " /assembly/target/scala*/*assembly*hadoop*.jar " $DISTDIR /lib/ "
cp " $FWDIR " /examples/target/scala*/spark-examples*.jar " $DISTDIR /lib/ "
2014-11-13 14:54:45 -05:00
# This will fail if the -Pyarn profile is not provided
# In this case, silence the error and ignore the return code of this command
cp " $FWDIR " /network/yarn/target/scala*/spark-*-yarn-shuffle.jar " $DISTDIR /lib/ " & > /dev/null || :
2013-06-24 20:05:37 -04:00
2014-05-13 02:02:54 -04:00
# Copy example sources (needed for python and SQL)
mkdir -p " $DISTDIR /examples/src/main "
2014-08-03 22:47:05 -04:00
cp -r " $FWDIR " /examples/src/main " $DISTDIR /examples/src/ "
2014-05-13 02:02:54 -04:00
2014-08-29 18:29:43 -04:00
if [ " $SPARK_HIVE " = = "1" ] ; then
2014-08-03 22:47:05 -04:00
cp " $FWDIR " /lib_managed/jars/datanucleus*.jar " $DISTDIR /lib/ "
2014-05-05 19:28:07 -04:00
fi
2014-05-13 02:02:54 -04:00
# Copy license and ASF files
cp " $FWDIR /LICENSE " " $DISTDIR "
cp " $FWDIR /NOTICE " " $DISTDIR "
2014-08-03 22:47:05 -04:00
if [ -e " $FWDIR " /CHANGES.txt ] ; then
2014-05-13 02:02:54 -04:00
cp " $FWDIR /CHANGES.txt " " $DISTDIR "
fi
2013-06-24 20:05:37 -04:00
# Copy other things
2013-08-23 02:02:09 -04:00
mkdir " $DISTDIR " /conf
2013-10-01 18:42:06 -04:00
cp " $FWDIR " /conf/*.template " $DISTDIR " /conf
2014-05-18 19:51:53 -04:00
cp " $FWDIR /README.md " " $DISTDIR "
2013-06-24 20:05:37 -04:00
cp -r " $FWDIR /bin " " $DISTDIR "
2013-08-23 02:02:09 -04:00
cp -r " $FWDIR /python " " $DISTDIR "
2013-09-22 23:28:58 -04:00
cp -r " $FWDIR /sbin " " $DISTDIR "
2014-05-13 02:02:54 -04:00
cp -r " $FWDIR /ec2 " " $DISTDIR "
2013-07-15 21:13:17 -04:00
2014-03-19 01:04:57 -04:00
# Download and copy in tachyon, if requested
if [ " $SPARK_TACHYON " = = "true" ] ; then
2014-08-01 01:53:42 -04:00
TACHYON_VERSION = "0.5.0"
2014-03-19 01:04:57 -04:00
TACHYON_URL = " https://github.com/amplab/tachyon/releases/download/v ${ TACHYON_VERSION } /tachyon- ${ TACHYON_VERSION } -bin.tar.gz "
2014-03-28 01:45:00 -04:00
TMPD = ` mktemp -d 2>/dev/null || mktemp -d -t 'disttmp' `
2014-03-19 01:04:57 -04:00
pushd $TMPD > /dev/null
2014-05-07 17:24:49 -04:00
echo "Fetching tachyon tgz"
2014-03-19 01:04:57 -04:00
wget " $TACHYON_URL "
tar xf " tachyon- ${ TACHYON_VERSION } -bin.tar.gz "
2014-09-02 20:36:53 -04:00
cp " tachyon- ${ TACHYON_VERSION } /core/target/tachyon- ${ TACHYON_VERSION } -jar-with-dependencies.jar " " $DISTDIR /lib "
2014-03-19 01:04:57 -04:00
mkdir -p " $DISTDIR /tachyon/src/main/java/tachyon/web "
cp -r " tachyon- ${ TACHYON_VERSION } " /{ bin,conf,libexec} " $DISTDIR /tachyon "
2014-09-02 20:36:53 -04:00
cp -r " tachyon- ${ TACHYON_VERSION } " /core/src/main/java/tachyon/web " $DISTDIR /tachyon/src/main/java/tachyon/web "
2014-03-28 01:45:00 -04:00
if [ [ ` uname -a` = = Darwin* ] ] ; then
2014-03-28 16:33:35 -04:00
# need to run sed differently on osx
2014-04-23 13:19:32 -04:00
nl = $'\n' ; sed -i "" -e " s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\\ $nl export TACHYON_JAR=\$TACHYON_HOME/../lib/\1| " " $DISTDIR /tachyon/libexec/tachyon-config.sh "
2014-03-28 01:45:00 -04:00
else
2014-04-23 13:19:32 -04:00
sed -i "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\n export TACHYON_JAR=\$TACHYON_HOME/../lib/\1|" " $DISTDIR /tachyon/libexec/tachyon-config.sh "
2014-03-28 01:45:00 -04:00
fi
2014-03-19 01:04:57 -04:00
popd > /dev/null
rm -rf $TMPD
fi
2013-08-14 20:34:34 -04:00
if [ " $MAKE_TGZ " = = "true" ] ; then
2014-04-23 13:19:32 -04:00
TARDIR_NAME = spark-$VERSION -bin-$NAME
TARDIR = " $FWDIR / $TARDIR_NAME "
2014-04-27 18:50:48 -04:00
rm -rf " $TARDIR "
2013-08-14 20:34:34 -04:00
cp -r " $DISTDIR " " $TARDIR "
2014-04-23 13:19:32 -04:00
tar czf " spark- $VERSION -bin- $NAME .tgz " -C " $FWDIR " " $TARDIR_NAME "
2013-08-14 20:34:34 -04:00
rm -rf " $TARDIR "
2013-07-15 21:13:17 -04:00
fi