SPARK-3337 Paranoid quoting in shell to allow install dirs with spaces within.
... Tested ! TBH, it isn't a great idea to have directory with spaces within. Because emacs doesn't like it then hadoop doesn't like it. and so on... Author: Prashant Sharma <prashant.s@imaginea.com> Closes #2229 from ScrapCodes/SPARK-3337/quoting-shell-scripts and squashes the following commits: d4ad660 [Prashant Sharma] SPARK-3337 Paranoid quoting in shell to allow install dirs with spaces within.
This commit is contained in:
parent
711356b422
commit
e16a8e7db5
|
@ -24,7 +24,7 @@
|
|||
set -o posix
|
||||
|
||||
# Figure out where Spark is installed
|
||||
FWDIR="$(cd `dirname $0`/..; pwd)"
|
||||
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
|
||||
CLASS="org.apache.hive.beeline.BeeLine"
|
||||
exec "$FWDIR/bin/spark-class" $CLASS "$@"
|
||||
|
|
|
@ -23,9 +23,9 @@
|
|||
SCALA_VERSION=2.10
|
||||
|
||||
# Figure out where Spark is installed
|
||||
FWDIR="$(cd `dirname $0`/..; pwd)"
|
||||
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
|
||||
. $FWDIR/bin/load-spark-env.sh
|
||||
. "$FWDIR"/bin/load-spark-env.sh
|
||||
|
||||
# Build up classpath
|
||||
CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH:$FWDIR/conf"
|
||||
|
@ -63,7 +63,7 @@ else
|
|||
assembly_folder="$ASSEMBLY_DIR"
|
||||
fi
|
||||
|
||||
num_jars=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)
|
||||
num_jars="$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)"
|
||||
if [ "$num_jars" -eq "0" ]; then
|
||||
echo "Failed to find Spark assembly in $assembly_folder"
|
||||
echo "You need to build Spark before running this program."
|
||||
|
@ -77,7 +77,7 @@ if [ "$num_jars" -gt "1" ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
ASSEMBLY_JAR=$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)
|
||||
ASSEMBLY_JAR="$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)"
|
||||
|
||||
# Verify that versions of java used to build the jars and run Spark are compatible
|
||||
jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
|
||||
|
@ -103,8 +103,8 @@ else
|
|||
datanucleus_dir="$FWDIR"/lib_managed/jars
|
||||
fi
|
||||
|
||||
datanucleus_jars=$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")
|
||||
datanucleus_jars=$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)
|
||||
datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")"
|
||||
datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)"
|
||||
|
||||
if [ -n "$datanucleus_jars" ]; then
|
||||
hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null)
|
||||
|
|
|
@ -25,9 +25,9 @@ if [ -z "$SPARK_ENV_LOADED" ]; then
|
|||
export SPARK_ENV_LOADED=1
|
||||
|
||||
# Returns the parent of the directory this script lives in.
|
||||
parent_dir="$(cd `dirname $0`/..; pwd)"
|
||||
parent_dir="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
|
||||
user_conf_dir=${SPARK_CONF_DIR:-"$parent_dir/conf"}
|
||||
user_conf_dir="${SPARK_CONF_DIR:-"$parent_dir"/conf}"
|
||||
|
||||
if [ -f "${user_conf_dir}/spark-env.sh" ]; then
|
||||
# Promote all variable declarations to environment (exported) variables
|
||||
|
|
20
bin/pyspark
20
bin/pyspark
|
@ -18,18 +18,18 @@
|
|||
#
|
||||
|
||||
# Figure out where Spark is installed
|
||||
FWDIR="$(cd `dirname $0`/..; pwd)"
|
||||
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
|
||||
# Export this as SPARK_HOME
|
||||
export SPARK_HOME="$FWDIR"
|
||||
|
||||
source $FWDIR/bin/utils.sh
|
||||
source "$FWDIR/bin/utils.sh"
|
||||
|
||||
SCALA_VERSION=2.10
|
||||
|
||||
function usage() {
|
||||
echo "Usage: ./bin/pyspark [options]" 1>&2
|
||||
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
|
||||
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
|
||||
exit 0
|
||||
}
|
||||
|
||||
|
@ -48,7 +48,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
|
|||
fi
|
||||
fi
|
||||
|
||||
. $FWDIR/bin/load-spark-env.sh
|
||||
. "$FWDIR"/bin/load-spark-env.sh
|
||||
|
||||
# Figure out which Python executable to use
|
||||
if [[ -z "$PYSPARK_PYTHON" ]]; then
|
||||
|
@ -57,12 +57,12 @@ fi
|
|||
export PYSPARK_PYTHON
|
||||
|
||||
# Add the PySpark classes to the Python path:
|
||||
export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
|
||||
export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH
|
||||
export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
|
||||
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
|
||||
|
||||
# Load the PySpark shell.py script when ./pyspark is used interactively:
|
||||
export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
|
||||
export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
|
||||
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
|
||||
export PYTHONSTARTUP="$FWDIR/python/pyspark/shell.py"
|
||||
|
||||
# If IPython options are specified, assume user wants to run IPython
|
||||
if [[ -n "$IPYTHON_OPTS" ]]; then
|
||||
|
@ -99,10 +99,10 @@ fi
|
|||
if [[ "$1" =~ \.py$ ]]; then
|
||||
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
|
||||
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
|
||||
primary=$1
|
||||
primary="$1"
|
||||
shift
|
||||
gatherSparkSubmitOpts "$@"
|
||||
exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
|
||||
exec "$FWDIR"/bin/spark-submit "${SUBMISSION_OPTS[@]}" "$primary" "${APPLICATION_OPTS[@]}"
|
||||
else
|
||||
# PySpark shell requires special handling downstream
|
||||
export PYSPARK_SHELL=1
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
SCALA_VERSION=2.10
|
||||
|
||||
FWDIR="$(cd `dirname $0`/..; pwd)"
|
||||
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
export SPARK_HOME="$FWDIR"
|
||||
EXAMPLES_DIR="$FWDIR"/examples
|
||||
|
||||
|
@ -35,12 +35,12 @@ else
|
|||
fi
|
||||
|
||||
if [ -f "$FWDIR/RELEASE" ]; then
|
||||
export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
|
||||
export SPARK_EXAMPLES_JAR="`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`"
|
||||
elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
|
||||
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`
|
||||
export SPARK_EXAMPLES_JAR="`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`"
|
||||
fi
|
||||
|
||||
if [[ -z $SPARK_EXAMPLES_JAR ]]; then
|
||||
if [[ -z "$SPARK_EXAMPLES_JAR" ]]; then
|
||||
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
|
||||
echo "You need to build Spark before running this program" 1>&2
|
||||
exit 1
|
||||
|
|
|
@ -27,12 +27,12 @@ esac
|
|||
SCALA_VERSION=2.10
|
||||
|
||||
# Figure out where Spark is installed
|
||||
FWDIR="$(cd `dirname $0`/..; pwd)"
|
||||
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
|
||||
# Export this as SPARK_HOME
|
||||
export SPARK_HOME="$FWDIR"
|
||||
|
||||
. $FWDIR/bin/load-spark-env.sh
|
||||
. "$FWDIR"/bin/load-spark-env.sh
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: spark-class <class> [<args>]" 1>&2
|
||||
|
@ -105,7 +105,7 @@ else
|
|||
exit 1
|
||||
fi
|
||||
fi
|
||||
JAVA_VERSION=$($RUNNER -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
|
||||
JAVA_VERSION=$("$RUNNER" -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
|
||||
|
||||
# Set JAVA_OPTS to be able to load native libraries and to set heap size
|
||||
if [ "$JAVA_VERSION" -ge 18 ]; then
|
||||
|
@ -117,7 +117,7 @@ JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
|
|||
|
||||
# Load extra JAVA_OPTS from conf/java-opts, if it exists
|
||||
if [ -e "$FWDIR/conf/java-opts" ] ; then
|
||||
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
|
||||
JAVA_OPTS="$JAVA_OPTS `cat "$FWDIR"/conf/java-opts`"
|
||||
fi
|
||||
|
||||
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
|
||||
|
@ -126,21 +126,21 @@ TOOLS_DIR="$FWDIR"/tools
|
|||
SPARK_TOOLS_JAR=""
|
||||
if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar ]; then
|
||||
# Use the JAR from the SBT build
|
||||
export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar`
|
||||
export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar`"
|
||||
fi
|
||||
if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then
|
||||
# Use the JAR from the Maven build
|
||||
# TODO: this also needs to become an assembly!
|
||||
export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar`
|
||||
export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar`"
|
||||
fi
|
||||
|
||||
# Compute classpath using external script
|
||||
classpath_output=$($FWDIR/bin/compute-classpath.sh)
|
||||
classpath_output=$("$FWDIR"/bin/compute-classpath.sh)
|
||||
if [[ "$?" != "0" ]]; then
|
||||
echo "$classpath_output"
|
||||
exit 1
|
||||
else
|
||||
CLASSPATH=$classpath_output
|
||||
CLASSPATH="$classpath_output"
|
||||
fi
|
||||
|
||||
if [[ "$1" =~ org.apache.spark.tools.* ]]; then
|
||||
|
@ -153,9 +153,9 @@ if [[ "$1" =~ org.apache.spark.tools.* ]]; then
|
|||
fi
|
||||
|
||||
if $cygwin; then
|
||||
CLASSPATH=`cygpath -wp $CLASSPATH`
|
||||
CLASSPATH="`cygpath -wp "$CLASSPATH"`"
|
||||
if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
|
||||
export SPARK_TOOLS_JAR=`cygpath -w $SPARK_TOOLS_JAR`
|
||||
export SPARK_TOOLS_JAR="`cygpath -w "$SPARK_TOOLS_JAR"`"
|
||||
fi
|
||||
fi
|
||||
export CLASSPATH
|
||||
|
|
|
@ -29,11 +29,11 @@ esac
|
|||
set -o posix
|
||||
|
||||
## Global script variables
|
||||
FWDIR="$(cd `dirname $0`/..; pwd)"
|
||||
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
|
||||
function usage() {
|
||||
echo "Usage: ./bin/spark-shell [options]"
|
||||
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
|
||||
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
|
||||
exit 0
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,7 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
|
|||
usage
|
||||
fi
|
||||
|
||||
source $FWDIR/bin/utils.sh
|
||||
source "$FWDIR"/bin/utils.sh
|
||||
SUBMIT_USAGE_FUNCTION=usage
|
||||
gatherSparkSubmitOpts "$@"
|
||||
|
||||
|
@ -54,11 +54,11 @@ function main() {
|
|||
# (see https://github.com/sbt/sbt/issues/562).
|
||||
stty -icanon min 1 -echo > /dev/null 2>&1
|
||||
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
|
||||
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
|
||||
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
|
||||
stty icanon echo > /dev/null 2>&1
|
||||
else
|
||||
export SPARK_SUBMIT_OPTS
|
||||
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
|
||||
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
|
||||
fi
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
|
|||
CLASS_NOT_FOUND_EXIT_STATUS=1
|
||||
|
||||
# Figure out where Spark is installed
|
||||
FWDIR="$(cd `dirname $0`/..; pwd)"
|
||||
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
|
||||
function usage {
|
||||
echo "Usage: ./bin/spark-sql [options] [cli option]"
|
||||
|
@ -38,10 +38,10 @@ function usage {
|
|||
pattern+="\|--help"
|
||||
pattern+="\|======="
|
||||
|
||||
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
|
||||
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
|
||||
echo
|
||||
echo "CLI options:"
|
||||
$FWDIR/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
|
||||
"$FWDIR"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
|
||||
}
|
||||
|
||||
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
|
||||
|
@ -49,7 +49,7 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
|
|||
exit 0
|
||||
fi
|
||||
|
||||
source $FWDIR/bin/utils.sh
|
||||
source "$FWDIR"/bin/utils.sh
|
||||
SUBMIT_USAGE_FUNCTION=usage
|
||||
gatherSparkSubmitOpts "$@"
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
# NOTE: Any changes in this file must be reflected in SparkSubmitDriverBootstrapper.scala!
|
||||
|
||||
export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
|
||||
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
ORIG_ARGS=("$@")
|
||||
|
||||
while (($#)); do
|
||||
|
@ -59,5 +59,5 @@ if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FI
|
|||
fi
|
||||
fi
|
||||
|
||||
exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"
|
||||
exec "$SPARK_HOME"/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"
|
||||
|
||||
|
|
|
@ -23,16 +23,16 @@ acquire_rat_jar () {
|
|||
URL1="http://search.maven.org/remotecontent?filepath=org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"
|
||||
URL2="http://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"
|
||||
|
||||
JAR=$rat_jar
|
||||
JAR="$rat_jar"
|
||||
|
||||
if [[ ! -f "$rat_jar" ]]; then
|
||||
# Download rat launch jar if it hasn't been downloaded yet
|
||||
if [ ! -f "$JAR" ]; then
|
||||
# Download
|
||||
printf "Attempting to fetch rat\n"
|
||||
JAR_DL=${JAR}.part
|
||||
JAR_DL="${JAR}.part"
|
||||
if hash curl 2>/dev/null; then
|
||||
(curl --silent ${URL1} > "$JAR_DL" || curl --silent ${URL2} > "$JAR_DL") && mv "$JAR_DL" "$JAR"
|
||||
(curl --silent "${URL1}" > "$JAR_DL" || curl --silent "${URL2}" > "$JAR_DL") && mv "$JAR_DL" "$JAR"
|
||||
elif hash wget 2>/dev/null; then
|
||||
(wget --quiet ${URL1} -O "$JAR_DL" || wget --quiet ${URL2} -O "$JAR_DL") && mv "$JAR_DL" "$JAR"
|
||||
else
|
||||
|
@ -50,7 +50,7 @@ acquire_rat_jar () {
|
|||
}
|
||||
|
||||
# Go to the Spark project root directory
|
||||
FWDIR="$(cd `dirname $0`/..; pwd)"
|
||||
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
cd "$FWDIR"
|
||||
|
||||
if test -x "$JAVA_HOME/bin/java"; then
|
||||
|
@ -60,17 +60,17 @@ else
|
|||
fi
|
||||
|
||||
export RAT_VERSION=0.10
|
||||
export rat_jar=$FWDIR/lib/apache-rat-${RAT_VERSION}.jar
|
||||
mkdir -p $FWDIR/lib
|
||||
export rat_jar="$FWDIR"/lib/apache-rat-${RAT_VERSION}.jar
|
||||
mkdir -p "$FWDIR"/lib
|
||||
|
||||
[[ -f "$rat_jar" ]] || acquire_rat_jar || {
|
||||
echo "Download failed. Obtain the rat jar manually and place it at $rat_jar"
|
||||
exit 1
|
||||
}
|
||||
|
||||
$java_cmd -jar $rat_jar -E $FWDIR/.rat-excludes -d $FWDIR > rat-results.txt
|
||||
$java_cmd -jar "$rat_jar" -E "$FWDIR"/.rat-excludes -d "$FWDIR" > rat-results.txt
|
||||
|
||||
ERRORS=$(cat rat-results.txt | grep -e "??")
|
||||
ERRORS="$(cat rat-results.txt | grep -e "??")"
|
||||
|
||||
if test ! -z "$ERRORS"; then
|
||||
echo "Could not find Apache license headers in the following files:"
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
#
|
||||
|
||||
SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
|
||||
SPARK_ROOT_DIR="$(dirname $SCRIPT_DIR)"
|
||||
SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
|
||||
|
||||
cd $SPARK_ROOT_DIR
|
||||
cd "$SPARK_ROOT_DIR"
|
||||
|
||||
# Get pep8 at runtime so that we don't rely on it being installed on the build server.
|
||||
#+ See: https://github.com/apache/spark/pull/1744#issuecomment-50982162
|
||||
|
@ -45,7 +45,7 @@ fi
|
|||
#+ first, but we do so so that the check status can
|
||||
#+ be output before the report, like with the
|
||||
#+ scalastyle and RAT checks.
|
||||
python $PEP8_SCRIPT_PATH $PEP8_PATHS_TO_CHECK > "$PEP8_REPORT_PATH"
|
||||
python "$PEP8_SCRIPT_PATH" $PEP8_PATHS_TO_CHECK > "$PEP8_REPORT_PATH"
|
||||
pep8_status=${PIPESTATUS[0]} #$?
|
||||
|
||||
if [ $pep8_status -ne 0 ]; then
|
||||
|
|
4
dev/mima
4
dev/mima
|
@ -21,12 +21,12 @@ set -o pipefail
|
|||
set -e
|
||||
|
||||
# Go to the Spark project root directory
|
||||
FWDIR="$(cd `dirname $0`/..; pwd)"
|
||||
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
cd "$FWDIR"
|
||||
|
||||
echo -e "q\n" | sbt/sbt oldDeps/update
|
||||
|
||||
export SPARK_CLASSPATH=`find lib_managed \( -name '*spark*jar' -a -type f \) | tr "\\n" ":"`
|
||||
export SPARK_CLASSPATH="`find lib_managed \( -name '*spark*jar' -a -type f \) | tr "\\n" ":"`"
|
||||
echo "SPARK_CLASSPATH=$SPARK_CLASSPATH"
|
||||
|
||||
./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#
|
||||
|
||||
# Go to the Spark project root directory
|
||||
FWDIR="$(cd `dirname $0`/..; pwd)"
|
||||
FWDIR="$(cd "`dirname $0`"/..; pwd)"
|
||||
cd "$FWDIR"
|
||||
|
||||
if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
echo -e "q\n" | sbt/sbt -Phive scalastyle > scalastyle.txt
|
||||
# Check style with YARN alpha built too
|
||||
echo -e "q\n" | sbt/sbt -Pyarn -Phadoop-0.23 -Dhadoop.version=0.23.9 yarn-alpha/scalastyle \
|
||||
echo -e "q\n" | sbt/sbt -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.9 yarn-alpha/scalastyle \
|
||||
>> scalastyle.txt
|
||||
# Check style with YARN built too
|
||||
echo -e "q\n" | sbt/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 yarn/scalastyle \
|
||||
|
|
|
@ -28,7 +28,7 @@ set -o pipefail
|
|||
set -e
|
||||
|
||||
# Figure out where the Spark framework is installed
|
||||
FWDIR="$(cd `dirname $0`; pwd)"
|
||||
FWDIR="$(cd "`dirname "$0"`"; pwd)"
|
||||
DISTDIR="$FWDIR/dist"
|
||||
|
||||
SPARK_TACHYON=false
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
|
||||
# Figure out where the Spark framework is installed
|
||||
FWDIR="$(cd `dirname $0`; cd ../; pwd)"
|
||||
FWDIR="$(cd "`dirname "$0"`"; cd ../; pwd)"
|
||||
|
||||
# CD into the python directory to find things on the right path
|
||||
cd "$FWDIR/python"
|
||||
|
@ -33,7 +33,9 @@ rm -rf metastore warehouse
|
|||
|
||||
function run_test() {
|
||||
echo "Running test: $1"
|
||||
SPARK_TESTING=1 $FWDIR/bin/pyspark $1 2>&1 | tee -a unit-tests.log
|
||||
|
||||
SPARK_TESTING=1 "$FWDIR"/bin/pyspark $1 2>&1 | tee -a unit-tests.log
|
||||
|
||||
FAILED=$((PIPESTATUS[0]||$FAILED))
|
||||
|
||||
# Fail and exit on the first test failure.
|
||||
|
|
|
@ -36,29 +36,29 @@ if [ $# -le 0 ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
sbin=`dirname "$0"`
|
||||
sbin=`cd "$sbin"; pwd`
|
||||
sbin="`dirname "$0"`"
|
||||
sbin="`cd "$sbin"; pwd`"
|
||||
|
||||
. "$sbin/spark-config.sh"
|
||||
|
||||
# If the slaves file is specified in the command line,
|
||||
# then it takes precedence over the definition in
|
||||
# spark-env.sh. Save it here.
|
||||
HOSTLIST=$SPARK_SLAVES
|
||||
HOSTLIST="$SPARK_SLAVES"
|
||||
|
||||
# Check if --config is passed as an argument. It is an optional parameter.
|
||||
# Exit if the argument is not a directory.
|
||||
if [ "$1" == "--config" ]
|
||||
then
|
||||
shift
|
||||
conf_dir=$1
|
||||
conf_dir="$1"
|
||||
if [ ! -d "$conf_dir" ]
|
||||
then
|
||||
echo "ERROR : $conf_dir is not a directory"
|
||||
echo $usage
|
||||
exit 1
|
||||
else
|
||||
export SPARK_CONF_DIR=$conf_dir
|
||||
export SPARK_CONF_DIR="$conf_dir"
|
||||
fi
|
||||
shift
|
||||
fi
|
||||
|
@ -79,7 +79,7 @@ if [ "$SPARK_SSH_OPTS" = "" ]; then
|
|||
fi
|
||||
|
||||
for slave in `cat "$HOSTLIST"|sed "s/#.*$//;/^$/d"`; do
|
||||
ssh $SPARK_SSH_OPTS $slave $"${@// /\\ }" \
|
||||
ssh $SPARK_SSH_OPTS "$slave" $"${@// /\\ }" \
|
||||
2>&1 | sed "s/^/$slave: /" &
|
||||
if [ "$SPARK_SLAVE_SLEEP" != "" ]; then
|
||||
sleep $SPARK_SLAVE_SLEEP
|
||||
|
|
|
@ -21,19 +21,19 @@
|
|||
|
||||
# resolve links - $0 may be a softlink
|
||||
this="${BASH_SOURCE-$0}"
|
||||
common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
|
||||
common_bin="$(cd -P -- "$(dirname -- "$this")" && pwd -P)"
|
||||
script="$(basename -- "$this")"
|
||||
this="$common_bin/$script"
|
||||
|
||||
# convert relative path to absolute path
|
||||
config_bin=`dirname "$this"`
|
||||
script=`basename "$this"`
|
||||
config_bin=`cd "$config_bin"; pwd`
|
||||
config_bin="`dirname "$this"`"
|
||||
script="`basename "$this"`"
|
||||
config_bin="`cd "$config_bin"; pwd`"
|
||||
this="$config_bin/$script"
|
||||
|
||||
export SPARK_PREFIX=`dirname "$this"`/..
|
||||
export SPARK_HOME=${SPARK_PREFIX}
|
||||
export SPARK_PREFIX="`dirname "$this"`"/..
|
||||
export SPARK_HOME="${SPARK_PREFIX}"
|
||||
export SPARK_CONF_DIR="$SPARK_HOME/conf"
|
||||
# Add the PySpark classes to the PYTHONPATH:
|
||||
export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
|
||||
export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH
|
||||
export PYTHONPATH="$SPARK_HOME/python:$PYTHONPATH"
|
||||
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
|
||||
|
|
|
@ -37,8 +37,8 @@ if [ $# -le 1 ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
sbin=`dirname "$0"`
|
||||
sbin=`cd "$sbin"; pwd`
|
||||
sbin="`dirname "$0"`"
|
||||
sbin="`cd "$sbin"; pwd`"
|
||||
|
||||
. "$sbin/spark-config.sh"
|
||||
|
||||
|
@ -50,14 +50,14 @@ sbin=`cd "$sbin"; pwd`
|
|||
if [ "$1" == "--config" ]
|
||||
then
|
||||
shift
|
||||
conf_dir=$1
|
||||
conf_dir="$1"
|
||||
if [ ! -d "$conf_dir" ]
|
||||
then
|
||||
echo "ERROR : $conf_dir is not a directory"
|
||||
echo $usage
|
||||
exit 1
|
||||
else
|
||||
export SPARK_CONF_DIR=$conf_dir
|
||||
export SPARK_CONF_DIR="$conf_dir"
|
||||
fi
|
||||
shift
|
||||
fi
|
||||
|
@ -100,12 +100,12 @@ if [ "$SPARK_LOG_DIR" = "" ]; then
|
|||
export SPARK_LOG_DIR="$SPARK_HOME/logs"
|
||||
fi
|
||||
mkdir -p "$SPARK_LOG_DIR"
|
||||
touch $SPARK_LOG_DIR/.spark_test > /dev/null 2>&1
|
||||
touch "$SPARK_LOG_DIR"/.spark_test > /dev/null 2>&1
|
||||
TEST_LOG_DIR=$?
|
||||
if [ "${TEST_LOG_DIR}" = "0" ]; then
|
||||
rm -f $SPARK_LOG_DIR/.spark_test
|
||||
rm -f "$SPARK_LOG_DIR"/.spark_test
|
||||
else
|
||||
chown $SPARK_IDENT_STRING $SPARK_LOG_DIR
|
||||
chown "$SPARK_IDENT_STRING" "$SPARK_LOG_DIR"
|
||||
fi
|
||||
|
||||
if [ "$SPARK_PID_DIR" = "" ]; then
|
||||
|
@ -113,8 +113,8 @@ if [ "$SPARK_PID_DIR" = "" ]; then
|
|||
fi
|
||||
|
||||
# some variables
|
||||
log=$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$instance-$HOSTNAME.out
|
||||
pid=$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid
|
||||
log="$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$instance-$HOSTNAME.out"
|
||||
pid="$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid"
|
||||
|
||||
# Set default scheduling priority
|
||||
if [ "$SPARK_NICENESS" = "" ]; then
|
||||
|
@ -136,7 +136,7 @@ case $startStop in
|
|||
fi
|
||||
|
||||
if [ "$SPARK_MASTER" != "" ]; then
|
||||
echo rsync from $SPARK_MASTER
|
||||
echo rsync from "$SPARK_MASTER"
|
||||
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $SPARK_MASTER/ "$SPARK_HOME"
|
||||
fi
|
||||
|
||||
|
|
|
@ -17,10 +17,10 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
FWDIR="$(cd `dirname $0`/..; pwd)"
|
||||
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
|
||||
export PYTHONPATH=$FWDIR/python:$PYTHONPATH
|
||||
export PYTHONPATH=$FWDIR/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH
|
||||
export PYTHONPATH="$FWDIR/python:$PYTHONPATH"
|
||||
export PYTHONPATH="$FWDIR/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
|
||||
|
||||
echo "Running spark-executor with framework dir = $FWDIR"
|
||||
exec $FWDIR/bin/spark-class org.apache.spark.executor.MesosExecutorBackend
|
||||
exec "$FWDIR"/bin/spark-class org.apache.spark.executor.MesosExecutorBackend
|
||||
|
|
|
@ -21,8 +21,8 @@
|
|||
# Starts the master on this node.
|
||||
# Starts a worker on each node specified in conf/slaves
|
||||
|
||||
sbin=`dirname "$0"`
|
||||
sbin=`cd "$sbin"; pwd`
|
||||
sbin="`dirname "$0"`"
|
||||
sbin="`cd "$sbin"; pwd`"
|
||||
|
||||
TACHYON_STR=""
|
||||
|
||||
|
|
|
@ -24,8 +24,8 @@
|
|||
# Use the SPARK_HISTORY_OPTS environment variable to set history server configuration.
|
||||
#
|
||||
|
||||
sbin=`dirname "$0"`
|
||||
sbin=`cd "$sbin"; pwd`
|
||||
sbin="`dirname "$0"`"
|
||||
sbin="`cd "$sbin"; pwd`"
|
||||
|
||||
. "$sbin/spark-config.sh"
|
||||
. "$SPARK_PREFIX/bin/load-spark-env.sh"
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
# Starts the master on the machine this script is executed on.
|
||||
|
||||
sbin=`dirname "$0"`
|
||||
sbin=`cd "$sbin"; pwd`
|
||||
sbin="`dirname "$0"`"
|
||||
sbin="`cd "$sbin"; pwd`"
|
||||
|
||||
START_TACHYON=false
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
# Usage: start-slave.sh <worker#> <master-spark-URL>
|
||||
# where <master-spark-URL> is like "spark://localhost:7077"
|
||||
|
||||
sbin=`dirname "$0"`
|
||||
sbin=`cd "$sbin"; pwd`
|
||||
sbin="`dirname "$0"`"
|
||||
sbin="`cd "$sbin"; pwd`"
|
||||
|
||||
"$sbin"/spark-daemon.sh start org.apache.spark.deploy.worker.Worker "$@"
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
sbin=`dirname "$0"`
|
||||
sbin=`cd "$sbin"; pwd`
|
||||
sbin="`dirname "$0"`"
|
||||
sbin="`cd "$sbin"; pwd`"
|
||||
|
||||
|
||||
START_TACHYON=false
|
||||
|
@ -46,11 +46,11 @@ if [ "$SPARK_MASTER_PORT" = "" ]; then
|
|||
fi
|
||||
|
||||
if [ "$SPARK_MASTER_IP" = "" ]; then
|
||||
SPARK_MASTER_IP=`hostname`
|
||||
SPARK_MASTER_IP="`hostname`"
|
||||
fi
|
||||
|
||||
if [ "$START_TACHYON" == "true" ]; then
|
||||
"$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin"/../tachyon/bin/tachyon bootstrap-conf $SPARK_MASTER_IP
|
||||
"$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin"/../tachyon/bin/tachyon bootstrap-conf "$SPARK_MASTER_IP"
|
||||
|
||||
# set -t so we can call sudo
|
||||
SPARK_SSH_OPTS="-o StrictHostKeyChecking=no -t" "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/../tachyon/bin/tachyon-start.sh" worker SudoMount \; sleep 1
|
||||
|
@ -58,12 +58,12 @@ fi
|
|||
|
||||
# Launch the slaves
|
||||
if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
|
||||
exec "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/start-slave.sh" 1 spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT
|
||||
exec "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/start-slave.sh" 1 "spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT"
|
||||
else
|
||||
if [ "$SPARK_WORKER_WEBUI_PORT" = "" ]; then
|
||||
SPARK_WORKER_WEBUI_PORT=8081
|
||||
fi
|
||||
for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
|
||||
"$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/start-slave.sh" $(( $i + 1 )) spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT --webui-port $(( $SPARK_WORKER_WEBUI_PORT + $i ))
|
||||
"$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/start-slave.sh" $(( $i + 1 )) "spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT" --webui-port $(( $SPARK_WORKER_WEBUI_PORT + $i ))
|
||||
done
|
||||
fi
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
set -o posix
|
||||
|
||||
# Figure out where Spark is installed
|
||||
FWDIR="$(cd `dirname $0`/..; pwd)"
|
||||
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
|
||||
CLASS="org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"
|
||||
CLASS_NOT_FOUND_EXIT_STATUS=1
|
||||
|
@ -38,10 +38,10 @@ function usage {
|
|||
pattern+="\|======="
|
||||
pattern+="\|--help"
|
||||
|
||||
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
|
||||
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
|
||||
echo
|
||||
echo "Thrift server options:"
|
||||
$FWDIR/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
|
||||
"$FWDIR"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
|
||||
}
|
||||
|
||||
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
|
||||
|
@ -49,7 +49,7 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
|
|||
exit 0
|
||||
fi
|
||||
|
||||
source $FWDIR/bin/utils.sh
|
||||
source "$FWDIR"/bin/utils.sh
|
||||
SUBMIT_USAGE_FUNCTION=usage
|
||||
gatherSparkSubmitOpts "$@"
|
||||
|
||||
|
|
|
@ -21,8 +21,8 @@
|
|||
# Run this on the master nde
|
||||
|
||||
|
||||
sbin=`dirname "$0"`
|
||||
sbin=`cd "$sbin"; pwd`
|
||||
sbin="`dirname "$0"`"
|
||||
sbin="`cd "$sbin"; pwd`"
|
||||
|
||||
# Load the Spark configuration
|
||||
. "$sbin/spark-config.sh"
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
# Stops the history server on the machine this script is executed on.
|
||||
|
||||
sbin=`dirname "$0"`
|
||||
sbin=`cd "$sbin"; pwd`
|
||||
sbin="`dirname "$0"`"
|
||||
sbin="`cd "$sbin"; pwd`"
|
||||
|
||||
"$sbin"/spark-daemon.sh stop org.apache.spark.deploy.history.HistoryServer 1
|
||||
|
|
20
sbt/sbt
20
sbt/sbt
|
@ -3,32 +3,32 @@
|
|||
# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so
|
||||
# that we can run Hive to generate the golden answer. This is not required for normal development
|
||||
# or testing.
|
||||
for i in $HIVE_HOME/lib/*
|
||||
do HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$i
|
||||
for i in "$HIVE_HOME"/lib/*
|
||||
do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i"
|
||||
done
|
||||
export HADOOP_CLASSPATH
|
||||
|
||||
realpath () {
|
||||
(
|
||||
TARGET_FILE=$1
|
||||
TARGET_FILE="$1"
|
||||
|
||||
cd $(dirname $TARGET_FILE)
|
||||
TARGET_FILE=$(basename $TARGET_FILE)
|
||||
cd "$(dirname "$TARGET_FILE")"
|
||||
TARGET_FILE="$(basename "$TARGET_FILE")"
|
||||
|
||||
COUNT=0
|
||||
while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
|
||||
do
|
||||
TARGET_FILE=$(readlink $TARGET_FILE)
|
||||
cd $(dirname $TARGET_FILE)
|
||||
TARGET_FILE=$(basename $TARGET_FILE)
|
||||
TARGET_FILE="$(readlink "$TARGET_FILE")"
|
||||
cd $(dirname "$TARGET_FILE")
|
||||
TARGET_FILE="$(basename $TARGET_FILE)"
|
||||
COUNT=$(($COUNT + 1))
|
||||
done
|
||||
|
||||
echo $(pwd -P)/$TARGET_FILE
|
||||
echo "$(pwd -P)/"$TARGET_FILE""
|
||||
)
|
||||
}
|
||||
|
||||
. $(dirname $(realpath $0))/sbt-launch-lib.bash
|
||||
. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash
|
||||
|
||||
|
||||
declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
# TODO - Should we merge the main SBT script with this library?
|
||||
|
||||
if test -z "$HOME"; then
|
||||
declare -r script_dir="$(dirname $script_path)"
|
||||
declare -r script_dir="$(dirname "$script_path")"
|
||||
else
|
||||
declare -r script_dir="$HOME/.sbt"
|
||||
fi
|
||||
|
@ -46,20 +46,20 @@ acquire_sbt_jar () {
|
|||
|
||||
if [[ ! -f "$sbt_jar" ]]; then
|
||||
# Download sbt launch jar if it hasn't been downloaded yet
|
||||
if [ ! -f ${JAR} ]; then
|
||||
if [ ! -f "${JAR}" ]; then
|
||||
# Download
|
||||
printf "Attempting to fetch sbt\n"
|
||||
JAR_DL=${JAR}.part
|
||||
JAR_DL="${JAR}.part"
|
||||
if hash curl 2>/dev/null; then
|
||||
(curl --silent ${URL1} > ${JAR_DL} || curl --silent ${URL2} > ${JAR_DL}) && mv ${JAR_DL} ${JAR}
|
||||
(curl --silent ${URL1} > "${JAR_DL}" || curl --silent ${URL2} > "${JAR_DL}") && mv "${JAR_DL}" "${JAR}"
|
||||
elif hash wget 2>/dev/null; then
|
||||
(wget --quiet ${URL1} -O ${JAR_DL} || wget --quiet ${URL2} -O ${JAR_DL}) && mv ${JAR_DL} ${JAR}
|
||||
(wget --quiet ${URL1} -O "${JAR_DL}" || wget --quiet ${URL2} -O "${JAR_DL}") && mv "${JAR_DL}" "${JAR}"
|
||||
else
|
||||
printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n"
|
||||
exit -1
|
||||
fi
|
||||
fi
|
||||
if [ ! -f ${JAR} ]; then
|
||||
if [ ! -f "${JAR}" ]; then
|
||||
# We failed to download
|
||||
printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n"
|
||||
exit -1
|
||||
|
|
Loading…
Reference in a new issue