[SPARK-2894] spark-shell doesn't accept flags
As sryza reported, spark-shell doesn't accept any flags. The root cause is wrong usage of spark-submit in spark-shell and it come to the surface by #1801 Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp> Author: Cheng Lian <lian.cs.zju@gmail.com> Closes #1715, Closes #1864, and Closes #1861 Closes #1825 from sarutak/SPARK-2894 and squashes the following commits: 47f3510 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-2894 2c899ed [Kousuke Saruta] Removed useless code from java_gateway.py 98287ed [Kousuke Saruta] Removed useless code from java_gateway.py 513ad2e [Kousuke Saruta] Modified util.sh to enable to use option including white spaces 28a374e [Kousuke Saruta] Modified java_gateway.py to recognize arguments 5afc584 [Cheng Lian] Filter out spark-submit options when starting Python gateway e630d19 [Cheng Lian] Fixing pyspark and spark-shell CLI options
This commit is contained in:
parent
e45daf226d
commit
4f4a9884d9
18
bin/pyspark
18
bin/pyspark
|
@ -23,12 +23,18 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
|
|||
# Export this as SPARK_HOME
|
||||
export SPARK_HOME="$FWDIR"
|
||||
|
||||
source $FWDIR/bin/utils.sh
|
||||
|
||||
SCALA_VERSION=2.10
|
||||
|
||||
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
|
||||
function usage() {
|
||||
echo "Usage: ./bin/pyspark [options]" 1>&2
|
||||
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
|
||||
exit 0
|
||||
}
|
||||
|
||||
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
# Exit if the user hasn't compiled Spark
|
||||
|
@ -66,10 +72,11 @@ fi
|
|||
# Build up arguments list manually to preserve quotes and backslashes.
|
||||
# We export Spark submit arguments as an environment variable because shell.py must run as a
|
||||
# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
|
||||
|
||||
SUBMIT_USAGE_FUNCTION=usage
|
||||
gatherSparkSubmitOpts "$@"
|
||||
PYSPARK_SUBMIT_ARGS=""
|
||||
whitespace="[[:space:]]"
|
||||
for i in "$@"; do
|
||||
for i in "${SUBMISSION_OPTS[@]}"; do
|
||||
if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
|
||||
if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
|
||||
PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
|
||||
|
@ -90,7 +97,10 @@ fi
|
|||
if [[ "$1" =~ \.py$ ]]; then
|
||||
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
|
||||
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
|
||||
exec $FWDIR/bin/spark-submit "$@"
|
||||
primary=$1
|
||||
shift
|
||||
gatherSparkSubmitOpts "$@"
|
||||
exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
|
||||
else
|
||||
# Only use ipython if no command line arguments were provided [SPARK-1134]
|
||||
if [[ "$IPYTHON" = "1" ]]; then
|
||||
|
|
|
@ -31,13 +31,21 @@ set -o posix
|
|||
## Global script variables
|
||||
FWDIR="$(cd `dirname $0`/..; pwd)"
|
||||
|
||||
function usage() {
|
||||
echo "Usage: ./bin/spark-shell [options]"
|
||||
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
|
||||
exit 0
|
||||
}
|
||||
|
||||
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
|
||||
echo "Usage: ./bin/spark-shell [options]"
|
||||
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
|
||||
exit 0
|
||||
usage
|
||||
fi
|
||||
|
||||
function main(){
|
||||
source $FWDIR/bin/utils.sh
|
||||
SUBMIT_USAGE_FUNCTION=usage
|
||||
gatherSparkSubmitOpts "$@"
|
||||
|
||||
function main() {
|
||||
if $cygwin; then
|
||||
# Workaround for issue involving JLine and Cygwin
|
||||
# (see http://sourceforge.net/p/jline/bugs/40/).
|
||||
|
@ -46,11 +54,11 @@ function main(){
|
|||
# (see https://github.com/sbt/sbt/issues/562).
|
||||
stty -icanon min 1 -echo > /dev/null 2>&1
|
||||
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
|
||||
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
|
||||
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
|
||||
stty icanon echo > /dev/null 2>&1
|
||||
else
|
||||
export SPARK_SUBMIT_OPTS
|
||||
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
|
||||
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
|
||||
fi
|
||||
}
|
||||
|
||||
|
|
59
bin/utils.sh
Normal file
59
bin/utils.sh
Normal file
|
@ -0,0 +1,59 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Gather all all spark-submit options into SUBMISSION_OPTS
|
||||
function gatherSparkSubmitOpts() {
|
||||
|
||||
if [ -z "$SUBMIT_USAGE_FUNCTION" ]; then
|
||||
echo "Function for printing usage of $0 is not set." 1>&2
|
||||
echo "Please set usage function to shell variable 'SUBMIT_USAGE_FUNCTION' in $0" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# NOTE: If you add or remove spark-sumbmit options,
|
||||
# modify NOT ONLY this script but also SparkSubmitArgument.scala
|
||||
SUBMISSION_OPTS=()
|
||||
APPLICATION_OPTS=()
|
||||
while (($#)); do
|
||||
case "$1" in
|
||||
--master | --deploy-mode | --class | --name | --jars | --py-files | --files | \
|
||||
--conf | --properties-file | --driver-memory | --driver-java-options | \
|
||||
--driver-library-path | --driver-class-path | --executor-memory | --driver-cores | \
|
||||
--total-executor-cores | --executor-cores | --queue | --num-executors | --archives)
|
||||
if [[ $# -lt 2 ]]; then
|
||||
"$SUBMIT_USAGE_FUNCTION"
|
||||
exit 1;
|
||||
fi
|
||||
SUBMISSION_OPTS+=("$1"); shift
|
||||
SUBMISSION_OPTS+=("$1"); shift
|
||||
;;
|
||||
|
||||
--verbose | -v | --supervise)
|
||||
SUBMISSION_OPTS+=("$1"); shift
|
||||
;;
|
||||
|
||||
*)
|
||||
APPLICATION_OPTS+=("$1"); shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
export SUBMISSION_OPTS
|
||||
export APPLICATION_OPTS
|
||||
}
|
|
@ -224,6 +224,10 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
|
|||
// Delineates parsing of Spark options from parsing of user options.
|
||||
parse(opts)
|
||||
|
||||
/**
|
||||
* NOTE: If you add or remove spark-submit options,
|
||||
* modify NOT ONLY this file but also utils.sh
|
||||
*/
|
||||
def parse(opts: Seq[String]): Unit = opts match {
|
||||
case ("--name") :: value :: tail =>
|
||||
name = value
|
||||
|
|
|
@ -74,8 +74,10 @@ def fail(msg):
|
|||
|
||||
def run_cmd(cmd):
|
||||
if isinstance(cmd, list):
|
||||
print " ".join(cmd)
|
||||
return subprocess.check_output(cmd)
|
||||
else:
|
||||
print cmd
|
||||
return subprocess.check_output(cmd.split(" "))
|
||||
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ def launch_gateway():
|
|||
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS")
|
||||
submit_args = submit_args if submit_args is not None else ""
|
||||
submit_args = shlex.split(submit_args)
|
||||
command = [os.path.join(SPARK_HOME, script), "pyspark-shell"] + submit_args
|
||||
command = [os.path.join(SPARK_HOME, script)] + submit_args + ["pyspark-shell"]
|
||||
if not on_windows:
|
||||
# Don't send ctrl-c / SIGINT to the Java gateway:
|
||||
def preexec_func():
|
||||
|
|
Loading…
Reference in a new issue