4106558435
Previously, we based our decision regarding including datanucleus jars based on the existence of a spark-hive-assembly jar, which was incidentally built whenever "sbt assembly" is run. This means that a typical and previously supported pathway would start using hive jars. This patch has the following features/bug fixes: - Use of SPARK_HIVE (default false) to determine if we should include Hive in the assembly jar. - Analagous feature in Maven with -Phive (previously, there was no support for adding Hive to any of our jars produced by Maven) - assemble-deps fixed since we no longer use a different ASSEMBLY_DIR - avoid adding log message in compute-classpath.sh to the classpath :) Still TODO before mergeable: - We need to download the datanucleus jars outside of sbt. Perhaps we can have spark-class download them if SPARK_HIVE is set similar to how sbt downloads itself. - Spark SQL documentation updates. Author: Aaron Davidson <aaron@databricks.com> Closes #237 from aarondav/master and squashes the following commits: 5dc4329 [Aaron Davidson] Typo fixes dd4f298 [Aaron Davidson] Doc update dd1a365 [Aaron Davidson] Eliminate need for SPARK_HIVE at runtime by d/ling datanucleus from Maven a9269b5 [Aaron Davidson] [WIP] Use SPARK_HIVE to determine if we include Hive in packaging
157 lines
5.3 KiB
Bash
Executable file
157 lines
5.3 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
|
|
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
cygwin=false
|
|
case "`uname`" in
|
|
CYGWIN*) cygwin=true;;
|
|
esac
|
|
|
|
SCALA_VERSION=2.10
|
|
|
|
# Figure out where the Scala framework is installed
|
|
FWDIR="$(cd `dirname $0`/..; pwd)"
|
|
|
|
# Export this as SPARK_HOME
|
|
export SPARK_HOME="$FWDIR"
|
|
|
|
. $FWDIR/bin/load-spark-env.sh
|
|
|
|
if [ -z "$1" ]; then
|
|
echo "Usage: spark-class <class> [<args>]" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [ -n "$SPARK_MEM" ]; then
|
|
echo "Warning: SPARK_MEM is deprecated, please use a more specific config option"
|
|
echo "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)."
|
|
fi
|
|
|
|
# Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
|
|
DEFAULT_MEM=${SPARK_MEM:-512m}
|
|
|
|
SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.akka.logLifecycleEvents=true"
|
|
|
|
# Add java opts and memory settings for master, worker, executors, and repl.
|
|
case "$1" in
|
|
# Master and Worker use SPARK_DAEMON_JAVA_OPTS (and specific opts) + SPARK_DAEMON_MEMORY.
|
|
'org.apache.spark.deploy.master.Master')
|
|
OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS $SPARK_MASTER_OPTS"
|
|
OUR_JAVA_MEM=${SPARK_DAEMON_MEMORY:-$DEFAULT_MEM}
|
|
;;
|
|
'org.apache.spark.deploy.worker.Worker')
|
|
OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS $SPARK_WORKER_OPTS"
|
|
OUR_JAVA_MEM=${SPARK_DAEMON_MEMORY:-$DEFAULT_MEM}
|
|
;;
|
|
|
|
# Executors use SPARK_JAVA_OPTS + SPARK_EXECUTOR_MEMORY.
|
|
'org.apache.spark.executor.CoarseGrainedExecutorBackend')
|
|
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
|
|
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
|
|
;;
|
|
'org.apache.spark.executor.MesosExecutorBackend')
|
|
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
|
|
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
|
|
;;
|
|
|
|
# All drivers use SPARK_JAVA_OPTS + SPARK_DRIVER_MEMORY. The repl also uses SPARK_REPL_OPTS.
|
|
'org.apache.spark.repl.Main')
|
|
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_REPL_OPTS"
|
|
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
|
|
;;
|
|
*)
|
|
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS"
|
|
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
|
|
;;
|
|
esac
|
|
|
|
# Find the java binary
|
|
if [ -n "${JAVA_HOME}" ]; then
|
|
RUNNER="${JAVA_HOME}/bin/java"
|
|
else
|
|
if [ `command -v java` ]; then
|
|
RUNNER="java"
|
|
else
|
|
echo "JAVA_HOME is not set" >&2
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# Set JAVA_OPTS to be able to load native libraries and to set heap size
|
|
JAVA_OPTS="$OUR_JAVA_OPTS"
|
|
JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
|
|
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
|
|
# Load extra JAVA_OPTS from conf/java-opts, if it exists
|
|
if [ -e "$FWDIR/conf/java-opts" ] ; then
|
|
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
|
|
fi
|
|
export JAVA_OPTS
|
|
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!
|
|
|
|
if [ ! -f "$FWDIR/RELEASE" ]; then
|
|
# Exit if the user hasn't compiled Spark
|
|
num_jars=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar" | wc -l)
|
|
jars_list=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar")
|
|
if [ "$num_jars" -eq "0" ]; then
|
|
echo "Failed to find Spark assembly in $FWDIR/assembly/target/scala-$SCALA_VERSION/" >&2
|
|
echo "You need to build Spark with 'sbt/sbt assembly' before running this program." >&2
|
|
exit 1
|
|
fi
|
|
if [ "$num_jars" -gt "1" ]; then
|
|
echo "Found multiple Spark assembly jars in $FWDIR/assembly/target/scala-$SCALA_VERSION:" >&2
|
|
echo "$jars_list"
|
|
echo "Please remove all but one jar."
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
TOOLS_DIR="$FWDIR"/tools
|
|
SPARK_TOOLS_JAR=""
|
|
if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then
|
|
# Use the JAR from the SBT build
|
|
export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar`
|
|
fi
|
|
if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then
|
|
# Use the JAR from the Maven build
|
|
# TODO: this also needs to become an assembly!
|
|
export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar`
|
|
fi
|
|
|
|
# Compute classpath using external script
|
|
CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
|
|
if [[ "$1" =~ org.apache.spark.tools.* ]]; then
|
|
CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
|
|
fi
|
|
|
|
if $cygwin; then
|
|
CLASSPATH=`cygpath -wp $CLASSPATH`
|
|
if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
|
|
export SPARK_TOOLS_JAR=`cygpath -w $SPARK_TOOLS_JAR`
|
|
fi
|
|
fi
|
|
export CLASSPATH
|
|
|
|
if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
|
|
echo -n "Spark Command: "
|
|
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
|
|
echo "========================================"
|
|
echo
|
|
fi
|
|
|
|
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
|