9717389365
The fix for SPARK-6406 broke the case where sub-processes are launched when SPARK_PREPEND_CLASSES is set, because the code now would only add the launcher's build directory to the sub-process's classpath instead of the complete assembly. This patch fixes the problem by having the launch scripts stash the assembly's location in an environment variable. This is not the prettiest solution, but it avoids having to plumb that location all the way through the Worker code that launches executors. The env variable is always set by the launch scripts, so users cannot override it. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #5504 from vanzin/SPARK-6890 and squashes the following commits: 7aec921 [Marcelo Vanzin] Fix tests. ff87a60 [Marcelo Vanzin] Merge branch 'master' into SPARK-6890 31d3ce8 [Marcelo Vanzin] [SPARK-6890] [core] Fix launcher lib work with SPARK_PREPEND_CLASSES.
107 lines
3.4 KiB
Bash
Executable file
107 lines
3.4 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
|
|
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
set -e
|
|
|
|
# Figure out where Spark is installed
|
|
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
|
|
|
. "$SPARK_HOME"/bin/load-spark-env.sh
|
|
|
|
if [ -z "$1" ]; then
|
|
echo "Usage: spark-class <class> [<args>]" 1>&2
|
|
exit 1
|
|
fi
|
|
|
|
# Find the java binary
|
|
if [ -n "${JAVA_HOME}" ]; then
|
|
RUNNER="${JAVA_HOME}/bin/java"
|
|
else
|
|
if [ `command -v java` ]; then
|
|
RUNNER="java"
|
|
else
|
|
echo "JAVA_HOME is not set" >&2
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# Find assembly jar
|
|
SPARK_ASSEMBLY_JAR=
|
|
if [ -f "$SPARK_HOME/RELEASE" ]; then
|
|
ASSEMBLY_DIR="$SPARK_HOME/lib"
|
|
else
|
|
ASSEMBLY_DIR="$SPARK_HOME/assembly/target/scala-$SPARK_SCALA_VERSION"
|
|
fi
|
|
|
|
num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" | wc -l)"
|
|
if [ "$num_jars" -eq "0" -a -z "$SPARK_ASSEMBLY_JAR" ]; then
|
|
echo "Failed to find Spark assembly in $ASSEMBLY_DIR." 1>&2
|
|
echo "You need to build Spark before running this program." 1>&2
|
|
exit 1
|
|
fi
|
|
ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" || true)"
|
|
if [ "$num_jars" -gt "1" ]; then
|
|
echo "Found multiple Spark assembly jars in $ASSEMBLY_DIR:" 1>&2
|
|
echo "$ASSEMBLY_JARS" 1>&2
|
|
echo "Please remove all but one jar." 1>&2
|
|
exit 1
|
|
fi
|
|
|
|
SPARK_ASSEMBLY_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"
|
|
|
|
# Verify that versions of java used to build the jars and run Spark are compatible
|
|
if [ -n "$JAVA_HOME" ]; then
|
|
JAR_CMD="$JAVA_HOME/bin/jar"
|
|
else
|
|
JAR_CMD="jar"
|
|
fi
|
|
|
|
if [ $(command -v "$JAR_CMD") ] ; then
|
|
jar_error_check=$("$JAR_CMD" -tf "$SPARK_ASSEMBLY_JAR" nonexistent/class/path 2>&1)
|
|
if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
|
|
echo "Loading Spark jar with '$JAR_CMD' failed. " 1>&2
|
|
echo "This is likely because Spark was compiled with Java 7 and run " 1>&2
|
|
echo "with Java 6. (see SPARK-1703). Please use Java 7 to run Spark " 1>&2
|
|
echo "or build Spark with Java 6." 1>&2
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
LAUNCH_CLASSPATH="$SPARK_ASSEMBLY_JAR"
|
|
|
|
# Add the launcher build dir to the classpath if requested.
|
|
if [ -n "$SPARK_PREPEND_CLASSES" ]; then
|
|
LAUNCH_CLASSPATH="$SPARK_HOME/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
|
|
fi
|
|
|
|
export _SPARK_ASSEMBLY="$SPARK_ASSEMBLY_JAR"
|
|
|
|
# The launcher library will print arguments separated by a NULL character, to allow arguments with
|
|
# characters that would be otherwise interpreted by the shell. Read that in a while loop, populating
|
|
# an array that will be used to exec the final command.
|
|
CMD=()
|
|
while IFS= read -d '' -r ARG; do
|
|
CMD+=("$ARG")
|
|
done < <("$RUNNER" -cp "$LAUNCH_CLASSPATH" org.apache.spark.launcher.Main "$@")
|
|
|
|
if [ "${CMD[0]}" = "usage" ]; then
|
|
"${CMD[@]}"
|
|
else
|
|
exec "${CMD[@]}"
|
|
fi
|