0307db0f55
This is the default mode for running spark-shell and pyspark, intended to allow users running spark for the first time to see the performance benefits of using multiple cores, while not breaking backwards compatibility for users who use "local" mode and expect exactly 1 core. Author: Aaron Davidson <aaron@databricks.com> Closes #182 from aarondav/110 and squashes the following commits: a88294c [Aaron Davidson] Rebased changes for new spark-shell a9f393e [Aaron Davidson] SPARK-1099: Introduce local[*] mode to infer number of cores
252 lines
6.8 KiB
Bash
Executable file
252 lines
6.8 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
|
|
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
#
|
|
# Shell script for starting the Spark Shell REPL
|
|
# Note that it will set MASTER to spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}
|
|
# if those two env vars are set in spark-env.sh but MASTER is not.
|
|
|
|
cygwin=false
|
|
case "`uname`" in
|
|
CYGWIN*) cygwin=true;;
|
|
esac
|
|
|
|
# Enter posix mode for bash
|
|
set -o posix
|
|
|
|
## Global script variables
|
|
FWDIR="$(cd `dirname $0`/..; pwd)"
|
|
|
|
SPARK_REPL_OPTS="${SPARK_REPL_OPTS:-""}"
|
|
DEFAULT_MASTER="local[*]"
|
|
MASTER=${MASTER:-""}
|
|
|
|
info_log=0
|
|
|
|
#CLI Color Templates
|
|
txtund=$(tput sgr 0 1) # Underline
|
|
txtbld=$(tput bold) # Bold
|
|
bldred=${txtbld}$(tput setaf 1) # red
|
|
bldyel=${txtbld}$(tput setaf 3) # yellow
|
|
bldblu=${txtbld}$(tput setaf 4) # blue
|
|
bldwht=${txtbld}$(tput setaf 7) # white
|
|
txtrst=$(tput sgr0) # Reset
|
|
info=${bldwht}*${txtrst} # Feedback
|
|
pass=${bldblu}*${txtrst}
|
|
warn=${bldred}*${txtrst}
|
|
ques=${bldblu}?${txtrst}
|
|
|
|
# Helper function to describe the script usage
|
|
function usage() {
|
|
cat << EOF
|
|
${txtbld}Usage${txtrst}: spark-shell [OPTIONS]
|
|
|
|
${txtbld}OPTIONS${txtrst}:
|
|
-h --help : Print this help information.
|
|
-c --cores : The maximum number of cores to be used by the Spark Shell.
|
|
-em --executor-memory : The memory used by each executor of the Spark Shell, the number
|
|
is followed by m for megabytes or g for gigabytes, e.g. "1g".
|
|
-dm --driver-memory : The memory used by the Spark Shell, the number is followed
|
|
by m for megabytes or g for gigabytes, e.g. "1g".
|
|
-m --master : A full string that describes the Spark Master, defaults to "local[*]"
|
|
e.g. "spark://localhost:7077".
|
|
--log-conf : Enables logging of the supplied SparkConf as INFO at start of the
|
|
Spark Context.
|
|
|
|
e.g.
|
|
spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g
|
|
|
|
EOF
|
|
}
|
|
|
|
function out_error(){
|
|
echo -e "${txtund}${bldred}ERROR${txtrst}: $1"
|
|
usage
|
|
exit 1
|
|
}
|
|
|
|
function log_info(){
|
|
[ $info_log -eq 1 ] && echo -e "${bldyel}INFO${txtrst}: $1"
|
|
}
|
|
|
|
function log_warn(){
|
|
echo -e "${txtund}${bldyel}WARN${txtrst}: $1"
|
|
}
|
|
|
|
# PATTERNS used to validate more than one optional arg.
|
|
ARG_FLAG_PATTERN="^-"
|
|
MEM_PATTERN="^[0-9]+[m|g|M|G]$"
|
|
NUM_PATTERN="^[0-9]+$"
|
|
PORT_PATTERN="^[0-9]+$"
|
|
|
|
# Setters for optional args.
|
|
function set_cores(){
|
|
CORE_PATTERN="^[0-9]+$"
|
|
if [[ "$1" =~ $CORE_PATTERN ]]; then
|
|
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
|
|
else
|
|
out_error "wrong format for $2"
|
|
fi
|
|
}
|
|
|
|
function set_em(){
|
|
if [[ $1 =~ $MEM_PATTERN ]]; then
|
|
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.executor.memory=$1"
|
|
else
|
|
out_error "wrong format for $2"
|
|
fi
|
|
}
|
|
|
|
function set_dm(){
|
|
if [[ $1 =~ $MEM_PATTERN ]]; then
|
|
export SPARK_DRIVER_MEMORY=$1
|
|
else
|
|
out_error "wrong format for $2"
|
|
fi
|
|
}
|
|
|
|
function set_spark_log_conf(){
|
|
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.logConf=$1"
|
|
}
|
|
|
|
function set_spark_master(){
|
|
if ! [[ "$1" =~ $ARG_FLAG_PATTERN ]]; then
|
|
export MASTER="$1"
|
|
else
|
|
out_error "wrong format for $2"
|
|
fi
|
|
}
|
|
|
|
function resolve_spark_master(){
|
|
# Set MASTER from spark-env if possible
|
|
DEFAULT_SPARK_MASTER_PORT=7077
|
|
if [ -z "$MASTER" ]; then
|
|
. $FWDIR/bin/load-spark-env.sh
|
|
if [ -n "$SPARK_MASTER_IP" ]; then
|
|
SPARK_MASTER_PORT="${SPARK_MASTER_PORT:-"$DEFAULT_SPARK_MASTER_PORT"}"
|
|
export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
|
|
fi
|
|
fi
|
|
|
|
if [ -z "$MASTER" ]; then
|
|
export MASTER="$DEFAULT_MASTER"
|
|
fi
|
|
|
|
}
|
|
|
|
function main(){
|
|
log_info "Base Directory set to $FWDIR"
|
|
|
|
resolve_spark_master
|
|
log_info "Spark Master is $MASTER"
|
|
|
|
log_info "Spark REPL options $SPARK_REPL_OPTS"
|
|
if $cygwin; then
|
|
# Workaround for issue involving JLine and Cygwin
|
|
# (see http://sourceforge.net/p/jline/bugs/40/).
|
|
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
|
|
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
|
|
# (see https://github.com/sbt/sbt/issues/562).
|
|
stty -icanon min 1 -echo > /dev/null 2>&1
|
|
export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
|
|
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
|
|
stty icanon echo > /dev/null 2>&1
|
|
else
|
|
export SPARK_REPL_OPTS
|
|
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
|
|
fi
|
|
}
|
|
|
|
for option in "$@"
|
|
do
|
|
case $option in
|
|
-h | --help )
|
|
usage
|
|
exit 1
|
|
;;
|
|
-c | --cores)
|
|
shift
|
|
_1=$1
|
|
shift
|
|
set_cores $_1 "-c/--cores"
|
|
;;
|
|
-em | --executor-memory)
|
|
shift
|
|
_1=$1
|
|
shift
|
|
set_em $_1 "-em/--executor-memory"
|
|
;;
|
|
-dm | --driver-memory)
|
|
shift
|
|
_1=$1
|
|
shift
|
|
set_dm $_1 "-dm/--driver-memory"
|
|
;;
|
|
-m | --master)
|
|
shift
|
|
_1=$1
|
|
shift
|
|
set_spark_master $_1 "-m/--master"
|
|
;;
|
|
--log-conf)
|
|
shift
|
|
set_spark_log_conf "true"
|
|
info_log=1
|
|
;;
|
|
?)
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
|
|
# binary distribution of Spark where Scala is not installed
|
|
exit_status=127
|
|
saved_stty=""
|
|
|
|
# restore stty settings (echo in particular)
|
|
function restoreSttySettings() {
|
|
stty $saved_stty
|
|
saved_stty=""
|
|
}
|
|
|
|
function onExit() {
|
|
if [[ "$saved_stty" != "" ]]; then
|
|
restoreSttySettings
|
|
fi
|
|
exit $exit_status
|
|
}
|
|
|
|
# to reenable echo if we are interrupted before completing.
|
|
trap onExit INT
|
|
|
|
# save terminal settings
|
|
saved_stty=$(stty -g 2>/dev/null)
|
|
# clear on error so we don't later try to restore them
|
|
if [[ ! $? ]]; then
|
|
saved_stty=""
|
|
fi
|
|
|
|
main
|
|
|
|
# record the exit status lest it be overwritten:
|
|
# then reenable echo and propagate the code.
|
|
exit_status=$?
|
|
onExit
|
|
|