2012-08-01 16:17:31 -04:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
2012-09-25 17:43:40 -04:00
|
|
|
#
|
2012-08-04 20:04:33 -04:00
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
|
|
# this work for additional information regarding copyright ownership.
|
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
# (the "License"); you may not use this file except in compliance with
|
|
|
|
# the License. You may obtain a copy of the License at
|
|
|
|
#
|
2013-07-16 20:21:33 -04:00
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
2012-08-04 20:04:33 -04:00
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2013-07-16 20:21:33 -04:00
|
|
|
#
|
2012-08-04 20:04:33 -04:00
|
|
|
|
2012-08-01 16:17:31 -04:00
|
|
|
# Runs a Spark command as a daemon.
|
|
|
|
#
|
|
|
|
# Environment Variables
|
|
|
|
#
|
2014-10-28 15:29:01 -04:00
|
|
|
# SPARK_CONF_DIR Alternate conf dir. Default is ${SPARK_HOME}/conf.
|
|
|
|
# SPARK_LOG_DIR Where log files are stored. ${SPARK_HOME}/logs by default.
|
2012-08-01 16:17:31 -04:00
|
|
|
# SPARK_MASTER host:path where spark code should be rsync'd from
|
|
|
|
# SPARK_PID_DIR The pid files are stored. /tmp by default.
|
|
|
|
# SPARK_IDENT_STRING A string representing this instance of spark. $USER by default
|
|
|
|
# SPARK_NICENESS The scheduling priority for daemons. Defaults to 0.
|
2016-10-20 04:49:58 -04:00
|
|
|
# SPARK_NO_DAEMONIZE If set, will run the proposed command in the foreground. It will not output a PID file.
|
2012-08-01 16:17:31 -04:00
|
|
|
##
|
|
|
|
|
[SPARK-8064] [SQL] Build against Hive 1.2.1
Cherry picked the parts of the initial SPARK-8064 WiP branch needed to get sql/hive to compile against hive 1.2.1. That's the ASF release packaged under org.apache.hive, not any fork.
Tests not run yet: that's what the machines are for
Author: Steve Loughran <stevel@hortonworks.com>
Author: Cheng Lian <lian@databricks.com>
Author: Michael Armbrust <michael@databricks.com>
Author: Patrick Wendell <patrick@databricks.com>
Closes #7191 from steveloughran/stevel/feature/SPARK-8064-hive-1.2-002 and squashes the following commits:
7556d85 [Cheng Lian] Updates .q files and corresponding golden files
ef4af62 [Steve Loughran] Merge commit '6a92bb09f46a04d6cd8c41bdba3ecb727ebb9030' into stevel/feature/SPARK-8064-hive-1.2-002
6a92bb0 [Cheng Lian] Overrides HiveConf time vars
dcbb391 [Cheng Lian] Adds com.twitter:parquet-hadoop-bundle:1.6.0 for Hive Parquet SerDe
0bbe475 [Steve Loughran] SPARK-8064 scalastyle rejects the standard Hadoop ASF license header...
fdf759b [Steve Loughran] SPARK-8064 classpath dependency suite to be in sync with shading in final (?) hive-exec spark
7a6c727 [Steve Loughran] SPARK-8064 switch to second staging repo of the spark-hive artifacts. This one has the protobuf-shaded hive-exec jar
376c003 [Steve Loughran] SPARK-8064 purge duplicate protobuf declaration
2c74697 [Steve Loughran] SPARK-8064 switch to the protobuf shaded hive-exec jar with tests to chase it down
cc44020 [Steve Loughran] SPARK-8064 remove hadoop.version from runtest.py, as profile will fix that automatically.
6901fa9 [Steve Loughran] SPARK-8064 explicit protobuf import
da310dc [Michael Armbrust] Fixes for Hive tests.
a775a75 [Steve Loughran] SPARK-8064 cherry-pick-incomplete
7404f34 [Patrick Wendell] Add spark-hive staging repo
832c164 [Steve Loughran] SPARK-8064 try to supress compiler warnings on Complex.java pasted-thrift-code
312c0d4 [Steve Loughran] SPARK-8064 maven/ivy dependency purge; calcite declaration needed
fa5ae7b [Steve Loughran] HIVE-8064 fix up hive-thriftserver dependencies and cut back on evicted references in the hive- packages; this keeps mvn and ivy resolution compatible, as the reconciliation policy is "by hand"
c188048 [Steve Loughran] SPARK-8064 manage the Hive depencencies to that -things that aren't needed are excluded -sql/hive built with ivy is in sync with the maven reconciliation policy, rather than latest-first
4c8be8d [Cheng Lian] WIP: Partial fix for Thrift server and CLI tests
314eb3c [Steve Loughran] SPARK-8064 deprecation warning noise in one of the tests
17b0341 [Steve Loughran] SPARK-8064 IDE-hinted cleanups of Complex.java to reduce compiler warnings. It's all autogenerated code, so still ugly.
d029b92 [Steve Loughran] SPARK-8064 rely on unescaping to have already taken place, so go straight to map of serde options
23eca7e [Steve Loughran] HIVE-8064 handle raw and escaped property tokens
54d9b06 [Steve Loughran] SPARK-8064 fix compilation regression surfacing from rebase
0b12d5f [Steve Loughran] HIVE-8064 use subset of hive complex type whose types deserialize
fce73b6 [Steve Loughran] SPARK-8064 poms rely implicitly on the version of kryo chill provides
fd3aa5d [Steve Loughran] SPARK-8064 version of hive to d/l from ivy is 1.2.1
dc73ece [Steve Loughran] SPARK-8064 revert to master's determinstic pushdown strategy
d3c1e4a [Steve Loughran] SPARK-8064 purge UnionType
051cc21 [Steve Loughran] SPARK-8064 switch to an unshaded version of hive-exec-core, which must have been built with Kryo 2.21. This currently looks for a (locally built) version 1.2.1.spark
6684c60 [Steve Loughran] SPARK-8064 ignore RTE raised in blocking process.exitValue() call
e6121e5 [Steve Loughran] SPARK-8064 address review comments
aa43dc6 [Steve Loughran] SPARK-8064 more robust teardown on JavaMetastoreDatasourcesSuite
f2bff01 [Steve Loughran] SPARK-8064 better takeup of asynchronously caught error text
8b1ef38 [Steve Loughran] SPARK-8064: on failures executing spark-submit in HiveSparkSubmitSuite, print command line and all logged output.
5a9ce6b [Steve Loughran] SPARK-8064 add explicit reason for kv split failure, rather than array OOB. *does not address the issue*
642b63a [Steve Loughran] SPARK-8064 reinstate something cut briefly during rebasing
97194dc [Steve Loughran] SPARK-8064 add extra logging to the YarnClusterSuite classpath test. There should be no reason why this is failing on jenkins, but as it is (and presumably its CP-related), improve the logging including any exception raised.
335357f [Steve Loughran] SPARK-8064 fail fast on thrive process spawning tests on exit codes and/or error string patterns seen in log.
3ed872f [Steve Loughran] SPARK-8064 rename field double to dbl
bca55e5 [Steve Loughran] SPARK-8064 missed one of the `date` escapes
41d6479 [Steve Loughran] SPARK-8064 wrap tests with withTable() calls to avoid table-exists exceptions
2bc29a4 [Steve Loughran] SPARK-8064 ParquetSuites to escape `date` field name
1ab9bc4 [Steve Loughran] SPARK-8064 TestHive to use sered2.thrift.test.Complex
bf3a249 [Steve Loughran] SPARK-8064: more resubmit than fix; tighten startup timeout to 60s. Still no obvious reason why jersey server code in spark-assembly isn't being picked up -it hasn't been shaded
c829b8f [Steve Loughran] SPARK-8064: reinstate yarn-rm-server dependencies to hive-exec to ensure that jersey server is on classpath on hadoop versions < 2.6
0b0f738 [Steve Loughran] SPARK-8064: thrift server startup to fail fast on any exception in the main thread
13abaf1 [Steve Loughran] SPARK-8064 Hive compatibilty tests sin sync with explain/show output from Hive 1.2.1
d14d5ea [Steve Loughran] SPARK-8064: DATE is now a predicate; you can't use it as a field in select ops
26eef1c [Steve Loughran] SPARK-8064: HIVE-9039 renamed TOK_UNION => TOK_UNIONALL while adding TOK_UNIONDISTINCT
3d64523 [Steve Loughran] SPARK-8064 improve diagns on uknown token; fix scalastyle failure
d0360f6 [Steve Loughran] SPARK-8064: delicate merge in of the branch vanzin/hive-1.1
1126e5a [Steve Loughran] SPARK-8064: name of unrecognized file format wasn't appearing in error text
8cb09c4 [Steve Loughran] SPARK-8064: test resilience/assertion improvements. Independent of the rest of the work; can be backported to earlier versions
dec12cb [Steve Loughran] SPARK-8064: when a CLI suite test fails include the full output text in the raised exception; this ensures that the stdout/stderr is included in jenkins reports, so it becomes possible to diagnose the cause.
463a670 [Steve Loughran] SPARK-8064 run-tests.py adds a hadoop-2.6 profile, and changes info messages to say "w/Hive 1.2.1" in console output
2531099 [Steve Loughran] SPARK-8064 successful attempt to get rid of pentaho as a transitive dependency of hive-exec
1d59100 [Steve Loughran] SPARK-8064 (unsuccessful) attempt to get rid of pentaho as a transitive dependency of hive-exec
75733fc [Steve Loughran] SPARK-8064 change thrift binary startup message to "Starting ThriftBinaryCLIService on port"
3ebc279 [Steve Loughran] SPARK-8064 move strings used to check for http/bin thrift services up into constants
c80979d [Steve Loughran] SPARK-8064: SparkSQLCLIDriver drops remote mode support. CLISuite Tests pass instead of timing out: undetected regression?
27e8370 [Steve Loughran] SPARK-8064 fix some style & IDE warnings
00e50d6 [Steve Loughran] SPARK-8064 stop excluding hive shims from dependency (commented out , for now)
cb4f142 [Steve Loughran] SPARK-8054 cut pentaho dependency from calcite
f7aa9cb [Steve Loughran] SPARK-8064 everything compiles with some commenting and moving of classes into a hive package
6c310b4 [Steve Loughran] SPARK-8064 subclass Hive ServerOptionsProcessor to make it public again
f61a675 [Steve Loughran] SPARK-8064 thrift server switched to Hive 1.2.1, though it doesn't compile everywhere
4890b9d [Steve Loughran] SPARK-8064, build against Hive 1.2.1
2015-08-03 18:24:34 -04:00
|
|
|
usage="Usage: spark-daemon.sh [--config <conf-dir>] (start|stop|submit|status) <spark-command> <spark-instance-number> <args...>"
|
2012-08-01 16:17:31 -04:00
|
|
|
|
|
|
|
# if no args specified, show usage
|
|
|
|
if [ $# -le 1 ]; then
|
|
|
|
echo $usage
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
2015-11-04 05:49:34 -05:00
|
|
|
if [ -z "${SPARK_HOME}" ]; then
|
|
|
|
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
|
|
|
fi
|
2012-08-01 16:17:31 -04:00
|
|
|
|
2015-11-04 05:49:34 -05:00
|
|
|
. "${SPARK_HOME}/sbin/spark-config.sh"
|
2012-08-01 16:17:31 -04:00
|
|
|
|
|
|
|
# get arguments
|
2013-10-15 03:35:44 -04:00
|
|
|
|
2013-10-17 01:51:09 -04:00
|
|
|
# Check if --config is passed as an argument. It is an optional parameter.
|
2013-10-17 01:55:15 -04:00
|
|
|
# Exit if the argument is not a directory.
|
2013-10-17 01:51:09 -04:00
|
|
|
|
2013-10-15 03:35:44 -04:00
|
|
|
if [ "$1" == "--config" ]
|
|
|
|
then
|
|
|
|
shift
|
2014-09-08 13:24:15 -04:00
|
|
|
conf_dir="$1"
|
2013-10-15 03:35:44 -04:00
|
|
|
if [ ! -d "$conf_dir" ]
|
|
|
|
then
|
2013-10-17 01:51:09 -04:00
|
|
|
echo "ERROR : $conf_dir is not a directory"
|
|
|
|
echo $usage
|
2013-10-15 03:35:44 -04:00
|
|
|
exit 1
|
|
|
|
else
|
2014-09-08 13:24:15 -04:00
|
|
|
export SPARK_CONF_DIR="$conf_dir"
|
2013-10-15 03:35:44 -04:00
|
|
|
fi
|
|
|
|
shift
|
|
|
|
fi
|
|
|
|
|
2014-10-01 18:15:09 -04:00
|
|
|
option=$1
|
2012-08-01 16:17:31 -04:00
|
|
|
shift
|
|
|
|
command=$1
|
|
|
|
shift
|
2013-03-06 21:06:32 -05:00
|
|
|
instance=$1
|
|
|
|
shift
|
2012-08-01 16:17:31 -04:00
|
|
|
|
|
|
|
spark_rotate_log ()
|
|
|
|
{
|
|
|
|
log=$1;
|
|
|
|
num=5;
|
|
|
|
if [ -n "$2" ]; then
|
|
|
|
num=$2
|
|
|
|
fi
|
|
|
|
if [ -f "$log" ]; then # rotate logs
|
|
|
|
while [ $num -gt 1 ]; do
|
|
|
|
prev=`expr $num - 1`
|
|
|
|
[ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
|
|
|
|
num=$prev
|
|
|
|
done
|
|
|
|
mv "$log" "$log.$num";
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2015-11-04 05:49:34 -05:00
|
|
|
. "${SPARK_HOME}/bin/load-spark-env.sh"
|
2012-08-01 16:17:31 -04:00
|
|
|
|
|
|
|
if [ "$SPARK_IDENT_STRING" = "" ]; then
|
|
|
|
export SPARK_IDENT_STRING="$USER"
|
|
|
|
fi
|
|
|
|
|
2013-08-31 20:31:07 -04:00
|
|
|
|
|
|
|
export SPARK_PRINT_LAUNCH_COMMAND="1"
|
|
|
|
|
2012-08-01 16:17:31 -04:00
|
|
|
# get log directory
|
|
|
|
if [ "$SPARK_LOG_DIR" = "" ]; then
|
2015-11-04 05:49:34 -05:00
|
|
|
export SPARK_LOG_DIR="${SPARK_HOME}/logs"
|
2012-08-01 16:17:31 -04:00
|
|
|
fi
|
|
|
|
mkdir -p "$SPARK_LOG_DIR"
|
2014-09-08 13:24:15 -04:00
|
|
|
touch "$SPARK_LOG_DIR"/.spark_test > /dev/null 2>&1
|
2012-08-01 16:17:31 -04:00
|
|
|
TEST_LOG_DIR=$?
|
|
|
|
if [ "${TEST_LOG_DIR}" = "0" ]; then
|
2014-09-08 13:24:15 -04:00
|
|
|
rm -f "$SPARK_LOG_DIR"/.spark_test
|
2012-08-01 16:17:31 -04:00
|
|
|
else
|
2014-09-08 13:24:15 -04:00
|
|
|
chown "$SPARK_IDENT_STRING" "$SPARK_LOG_DIR"
|
2012-08-01 16:17:31 -04:00
|
|
|
fi
|
|
|
|
|
|
|
|
if [ "$SPARK_PID_DIR" = "" ]; then
|
|
|
|
SPARK_PID_DIR=/tmp
|
|
|
|
fi
|
|
|
|
|
|
|
|
# some variables
|
2014-09-08 13:24:15 -04:00
|
|
|
log="$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$instance-$HOSTNAME.out"
|
|
|
|
pid="$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid"
|
2012-08-01 16:17:31 -04:00
|
|
|
|
|
|
|
# Set default scheduling priority
|
|
|
|
if [ "$SPARK_NICENESS" = "" ]; then
|
|
|
|
export SPARK_NICENESS=0
|
|
|
|
fi
|
|
|
|
|
2016-10-20 04:49:58 -04:00
|
|
|
execute_command() {
|
|
|
|
if [ -z ${SPARK_NO_DAEMONIZE+set} ]; then
|
2016-12-01 08:14:09 -05:00
|
|
|
nohup -- "$@" >> $log 2>&1 < /dev/null &
|
2016-10-20 04:49:58 -04:00
|
|
|
newpid="$!"
|
|
|
|
|
|
|
|
echo "$newpid" > "$pid"
|
|
|
|
|
|
|
|
# Poll for up to 5 seconds for the java process to start
|
|
|
|
for i in {1..10}
|
|
|
|
do
|
|
|
|
if [[ $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
sleep 0.5
|
|
|
|
done
|
|
|
|
|
|
|
|
sleep 2
|
|
|
|
# Check if the process has died; in that case we'll tail the log so the user can see
|
|
|
|
if [[ ! $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
|
2016-12-01 08:14:09 -05:00
|
|
|
echo "failed to launch: $@"
|
2016-10-20 04:49:58 -04:00
|
|
|
tail -2 "$log" | sed 's/^/ /'
|
|
|
|
echo "full log in $log"
|
|
|
|
fi
|
|
|
|
else
|
2016-12-01 08:14:09 -05:00
|
|
|
"$@"
|
2016-10-20 04:49:58 -04:00
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2015-03-11 04:03:01 -04:00
|
|
|
run_command() {
|
|
|
|
mode="$1"
|
|
|
|
shift
|
2012-08-01 16:17:31 -04:00
|
|
|
|
2015-03-11 04:03:01 -04:00
|
|
|
mkdir -p "$SPARK_PID_DIR"
|
2012-08-01 16:17:31 -04:00
|
|
|
|
2015-03-11 04:03:01 -04:00
|
|
|
if [ -f "$pid" ]; then
|
|
|
|
TARGET_ID="$(cat "$pid")"
|
2015-04-17 06:08:37 -04:00
|
|
|
if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
|
2015-03-11 04:03:01 -04:00
|
|
|
echo "$command running as process $TARGET_ID. Stop it first."
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
fi
|
2013-08-24 02:30:17 -04:00
|
|
|
|
2015-03-11 04:03:01 -04:00
|
|
|
if [ "$SPARK_MASTER" != "" ]; then
|
|
|
|
echo rsync from "$SPARK_MASTER"
|
2015-11-04 05:49:34 -05:00
|
|
|
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' "$SPARK_MASTER/" "${SPARK_HOME}"
|
2015-03-11 04:03:01 -04:00
|
|
|
fi
|
2012-08-01 16:17:31 -04:00
|
|
|
|
2015-03-11 04:03:01 -04:00
|
|
|
spark_rotate_log "$log"
|
|
|
|
echo "starting $command, logging to $log"
|
|
|
|
|
|
|
|
case "$mode" in
|
|
|
|
(class)
|
2016-12-01 08:14:09 -05:00
|
|
|
execute_command nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-class "$command" "$@"
|
2015-03-11 04:03:01 -04:00
|
|
|
;;
|
|
|
|
|
|
|
|
(submit)
|
2016-12-01 08:14:09 -05:00
|
|
|
execute_command nice -n "$SPARK_NICENESS" bash "${SPARK_HOME}"/bin/spark-submit --class "$command" "$@"
|
2015-03-11 04:03:01 -04:00
|
|
|
;;
|
|
|
|
|
|
|
|
(*)
|
|
|
|
echo "unknown mode: $mode"
|
|
|
|
exit 1
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
|
|
|
|
}
|
2012-08-01 16:17:31 -04:00
|
|
|
|
2015-03-11 04:03:01 -04:00
|
|
|
case $option in
|
2012-08-01 16:17:31 -04:00
|
|
|
|
2015-03-11 04:03:01 -04:00
|
|
|
(submit)
|
|
|
|
run_command submit "$@"
|
|
|
|
;;
|
|
|
|
|
|
|
|
(start)
|
|
|
|
run_command class "$@"
|
2012-08-01 16:17:31 -04:00
|
|
|
;;
|
2013-08-24 02:30:17 -04:00
|
|
|
|
2012-08-01 16:17:31 -04:00
|
|
|
(stop)
|
|
|
|
|
|
|
|
if [ -f $pid ]; then
|
2015-02-13 05:27:23 -05:00
|
|
|
TARGET_ID="$(cat "$pid")"
|
2015-02-19 15:07:51 -05:00
|
|
|
if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
|
2015-02-13 05:27:23 -05:00
|
|
|
echo "stopping $command"
|
2015-02-19 18:13:02 -05:00
|
|
|
kill "$TARGET_ID" && rm -f "$pid"
|
2012-08-01 16:17:31 -04:00
|
|
|
else
|
2015-02-13 05:27:23 -05:00
|
|
|
echo "no $command to stop"
|
2012-08-01 16:17:31 -04:00
|
|
|
fi
|
|
|
|
else
|
2015-02-13 05:27:23 -05:00
|
|
|
echo "no $command to stop"
|
2012-08-01 16:17:31 -04:00
|
|
|
fi
|
|
|
|
;;
|
|
|
|
|
2015-04-13 08:02:55 -04:00
|
|
|
(status)
|
|
|
|
|
|
|
|
if [ -f $pid ]; then
|
|
|
|
TARGET_ID="$(cat "$pid")"
|
|
|
|
if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
|
|
|
|
echo $command is running.
|
|
|
|
exit 0
|
|
|
|
else
|
|
|
|
echo $pid file is present but $command not running
|
|
|
|
exit 1
|
2015-11-04 05:49:34 -05:00
|
|
|
fi
|
2015-04-13 08:02:55 -04:00
|
|
|
else
|
|
|
|
echo $command not running.
|
|
|
|
exit 2
|
2015-11-04 05:49:34 -05:00
|
|
|
fi
|
2015-04-13 08:02:55 -04:00
|
|
|
;;
|
2015-11-04 05:49:34 -05:00
|
|
|
|
2012-08-01 16:17:31 -04:00
|
|
|
(*)
|
|
|
|
echo $usage
|
|
|
|
exit 1
|
|
|
|
;;
|
|
|
|
|
|
|
|
esac
|
|
|
|
|
|
|
|
|