2013-07-15 21:13:17 -04:00
#!/usr/bin/env bash
2013-07-16 20:21:33 -04:00
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
2013-06-24 20:05:37 -04:00
#
# Script to create a binary distribution for easy deploys of Spark.
# The distribution directory defaults to dist/ but can be overridden below.
# The distribution contains fat (assembly) jars that include the Scala library,
# so it is completely self contained.
2013-06-25 03:15:58 -04:00
# It does not contain source or *.class files.
#
2013-08-14 20:34:34 -04:00
# Optional Arguments
# --tgz: Additionally creates spark-$VERSION-bin.tar.gz
# --hadoop VERSION: Builds against specified version of Hadoop.
# --with-yarn: Enables support for Hadoop YARN.
2013-07-15 21:13:17 -04:00
#
2013-06-25 03:15:58 -04:00
# Recommended deploy/testing procedure (standalone mode):
# 1) Rsync / deploy the dist/ dir to one host
2014-01-02 08:24:08 -05:00
# 2) cd to deploy dir; ./sbin/start-master.sh
2013-06-25 03:15:58 -04:00
# 3) Verify master is up by visiting web page, ie http://master-ip:8080. Note the spark:// URL.
2014-01-02 08:24:08 -05:00
# 4) ./sbin/start-slave.sh 1 <<spark:// URL>>
2014-01-02 08:07:40 -05:00
# 5) MASTER="spark://my-master-ip:7077" ./bin/spark-shell
2013-07-16 20:21:33 -04:00
#
2013-06-24 20:05:37 -04:00
# Figure out where the Spark framework is installed
FWDIR = " $( cd ` dirname $0 ` ; pwd ) "
DISTDIR = " $FWDIR /dist "
# Get version from SBT
2013-07-13 22:10:00 -04:00
export TERM = dumb # Prevents color codes in SBT output
2014-01-02 03:39:37 -05:00
2014-01-08 11:34:53 -05:00
VERSIONSTRING = $( $FWDIR /sbt/sbt "show version" )
2014-01-06 01:05:30 -05:00
if [ $? = = -1 ] ; then
2014-01-03 20:32:25 -05:00
echo -e "You need sbt installed and available on your path."
echo -e "Download sbt from http://www.scala-sbt.org/"
2014-01-02 03:39:37 -05:00
exit -1;
fi
2014-01-06 01:05:30 -05:00
VERSION = $( echo " ${ VERSIONSTRING } " | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/' )
echo " Version is ${ VERSION } "
2013-07-15 21:13:17 -04:00
2013-08-14 20:34:34 -04:00
# Initialize defaults
2013-08-21 14:54:10 -04:00
SPARK_HADOOP_VERSION = 1.0.4
2013-08-24 02:30:17 -04:00
SPARK_YARN = false
2014-03-19 01:04:57 -04:00
SPARK_TACHYON = false
2013-08-14 20:34:34 -04:00
MAKE_TGZ = false
# Parse arguments
while ( ( " $# " ) ) ; do
case $1 in
--hadoop)
SPARK_HADOOP_VERSION = " $2 "
shift
; ;
--with-yarn)
2013-08-24 02:30:17 -04:00
SPARK_YARN = true
2013-08-14 20:34:34 -04:00
; ;
2014-03-19 01:04:57 -04:00
--with-tachyon)
SPARK_TACHYON = true
; ;
2013-08-14 20:34:34 -04:00
--tgz)
MAKE_TGZ = true
; ;
esac
shift
done
if [ " $MAKE_TGZ " = = "true" ] ; then
echo " Making spark- $VERSION -hadoop_ $SPARK_HADOOP_VERSION -bin.tar.gz "
2013-07-15 21:13:17 -04:00
else
echo " Making distribution for Spark $VERSION in $DISTDIR ... "
fi
2013-08-14 20:34:34 -04:00
echo " Hadoop version set to $SPARK_HADOOP_VERSION "
2013-08-24 02:30:17 -04:00
if [ " $SPARK_YARN " = = "true" ] ; then
2013-08-14 20:34:34 -04:00
echo "YARN enabled"
else
echo "YARN disabled"
fi
2013-06-24 20:05:37 -04:00
2014-03-19 01:04:57 -04:00
if [ " $SPARK_TACHYON " = = "true" ] ; then
echo "Tachyon Enabled"
else
echo "Tachyon Disabled"
fi
2013-06-24 20:05:37 -04:00
# Build fat JAR
2013-08-14 20:34:34 -04:00
export SPARK_HADOOP_VERSION
2013-08-24 02:30:17 -04:00
export SPARK_YARN
2014-01-02 03:39:37 -05:00
cd $FWDIR
2014-01-06 01:12:47 -05:00
"sbt/sbt" "assembly/assembly"
2013-06-24 20:05:37 -04:00
# Make directories
rm -rf " $DISTDIR "
mkdir -p " $DISTDIR /jars "
2013-08-24 02:30:17 -04:00
echo " Spark $VERSION built for Hadoop $SPARK_HADOOP_VERSION " > " $DISTDIR /RELEASE "
2013-06-24 20:05:37 -04:00
# Copy jars
2013-08-27 22:44:59 -04:00
cp $FWDIR /assembly/target/scala*/*assembly*hadoop*.jar " $DISTDIR /jars/ "
2013-06-24 20:05:37 -04:00
# Copy other things
2013-08-23 02:02:09 -04:00
mkdir " $DISTDIR " /conf
2013-10-01 18:42:06 -04:00
cp " $FWDIR " /conf/*.template " $DISTDIR " /conf
2013-06-24 20:05:37 -04:00
cp -r " $FWDIR /bin " " $DISTDIR "
2013-08-23 02:02:09 -04:00
cp -r " $FWDIR /python " " $DISTDIR "
2013-09-22 23:28:58 -04:00
cp -r " $FWDIR /sbin " " $DISTDIR "
2013-07-15 21:13:17 -04:00
2014-03-19 01:04:57 -04:00
# Download and copy in tachyon, if requested
if [ " $SPARK_TACHYON " = = "true" ] ; then
TACHYON_VERSION = "0.4.1"
TACHYON_URL = " https://github.com/amplab/tachyon/releases/download/v ${ TACHYON_VERSION } /tachyon- ${ TACHYON_VERSION } -bin.tar.gz "
2014-03-28 01:45:00 -04:00
TMPD = ` mktemp -d 2>/dev/null || mktemp -d -t 'disttmp' `
2014-03-19 01:04:57 -04:00
pushd $TMPD > /dev/null
echo "Fetchting tachyon tgz"
wget " $TACHYON_URL "
tar xf " tachyon- ${ TACHYON_VERSION } -bin.tar.gz "
cp " tachyon- ${ TACHYON_VERSION } /target/tachyon- ${ TACHYON_VERSION } -jar-with-dependencies.jar " " $DISTDIR /jars "
mkdir -p " $DISTDIR /tachyon/src/main/java/tachyon/web "
cp -r " tachyon- ${ TACHYON_VERSION } " /{ bin,conf,libexec} " $DISTDIR /tachyon "
cp -r " tachyon- ${ TACHYON_VERSION } " /src/main/java/tachyon/web/resources " $DISTDIR /tachyon/src/main/java/tachyon/web "
2014-03-28 01:45:00 -04:00
if [ [ ` uname -a` = = Darwin* ] ] ; then
2014-03-28 16:33:35 -04:00
# need to run sed differently on osx
nl = $'\n' ; sed -i "" -e " s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\\ $nl export TACHYON_JAR=\$TACHYON_HOME/../jars/\1| " " $DISTDIR /tachyon/libexec/tachyon-config.sh "
2014-03-28 01:45:00 -04:00
else
2014-03-28 16:33:35 -04:00
sed -i "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\n export TACHYON_JAR=\$TACHYON_HOME/../jars/\1|" " $DISTDIR /tachyon/libexec/tachyon-config.sh "
2014-03-28 01:45:00 -04:00
fi
2014-03-19 01:04:57 -04:00
popd > /dev/null
rm -rf $TMPD
fi
2013-08-14 20:34:34 -04:00
if [ " $MAKE_TGZ " = = "true" ] ; then
2013-07-15 21:13:17 -04:00
TARDIR = " $FWDIR /spark- $VERSION "
2013-08-14 20:34:34 -04:00
cp -r " $DISTDIR " " $TARDIR "
tar -zcf " spark- $VERSION -hadoop_ $SPARK_HADOOP_VERSION -bin.tar.gz " -C " $FWDIR " " spark- $VERSION "
rm -rf " $TARDIR "
2013-07-15 21:13:17 -04:00
fi