2012-12-28 01:47:37 -05:00
|
|
|
#!/usr/bin/env bash
|
2012-10-19 20:16:41 -04:00
|
|
|
|
2013-07-16 20:21:33 -04:00
|
|
|
#
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
|
|
# this work for additional information regarding copyright ownership.
|
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
# (the "License"); you may not use this file except in compliance with
|
|
|
|
# the License. You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
#
|
|
|
|
|
2012-10-19 20:16:41 -04:00
|
|
|
# Figure out where the Scala framework is installed
|
2013-09-23 04:13:46 -04:00
|
|
|
FWDIR="$(cd `dirname $0`/..; pwd)"
|
2012-10-19 20:16:41 -04:00
|
|
|
|
|
|
|
# Export this as SPARK_HOME
|
|
|
|
export SPARK_HOME="$FWDIR"
|
|
|
|
|
2013-09-15 03:17:20 -04:00
|
|
|
SCALA_VERSION=2.10
|
2013-08-27 18:46:23 -04:00
|
|
|
|
2013-01-17 14:14:47 -05:00
|
|
|
# Exit if the user hasn't compiled Spark
|
2013-08-23 02:02:09 -04:00
|
|
|
if [ ! -f "$FWDIR/RELEASE" ]; then
|
|
|
|
# Exit if the user hasn't compiled Spark
|
2013-08-27 18:46:23 -04:00
|
|
|
ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
|
2013-08-23 02:02:09 -04:00
|
|
|
if [[ $? != 0 ]]; then
|
|
|
|
echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2
|
2014-05-09 01:26:17 -04:00
|
|
|
echo "You need to build Spark before running this program" >&2
|
2013-08-23 02:02:09 -04:00
|
|
|
exit 1
|
|
|
|
fi
|
2013-01-17 14:14:47 -05:00
|
|
|
fi
|
|
|
|
|
2014-03-25 01:24:21 -04:00
|
|
|
. $FWDIR/bin/load-spark-env.sh
|
2012-10-19 20:16:41 -04:00
|
|
|
|
|
|
|
# Figure out which Python executable to use
|
|
|
|
if [ -z "$PYSPARK_PYTHON" ] ; then
|
|
|
|
PYSPARK_PYTHON="python"
|
|
|
|
fi
|
|
|
|
export PYSPARK_PYTHON
|
|
|
|
|
|
|
|
# Add the PySpark classes to the Python path:
|
2013-01-01 17:48:45 -05:00
|
|
|
export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
|
2014-04-30 02:24:34 -04:00
|
|
|
export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH
|
2012-10-19 20:16:41 -04:00
|
|
|
|
2013-01-02 00:25:49 -05:00
|
|
|
# Load the PySpark shell.py script when ./pyspark is used interactively:
|
2014-01-09 21:41:00 -05:00
|
|
|
export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
|
2013-01-02 00:25:49 -05:00
|
|
|
export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
|
|
|
|
|
2013-07-28 22:21:04 -04:00
|
|
|
if [ -n "$IPYTHON_OPTS" ]; then
|
|
|
|
IPYTHON=1
|
|
|
|
fi
|
|
|
|
|
[SPARK-1134] Fix and document passing of arguments to IPython
This is based on @dianacarroll's previous pull request https://github.com/apache/spark/pull/227, and @joshrosen's comments on https://github.com/apache/spark/pull/38. Since we do want to allow passing arguments to IPython, this does the following:
* It documents that IPython can't be used with standalone jobs for now. (Later versions of IPython will deal with PYTHONSTARTUP properly and enable this, see https://github.com/ipython/ipython/pull/5226, but no released version has that fix.)
* If you run `pyspark` with `IPYTHON=1`, it passes your command-line arguments to it. This way you can do stuff like `IPYTHON=1 bin/pyspark notebook`.
* The old `IPYTHON_OPTS` remains, but I've removed it from the documentation. This is in case people read an old tutorial that uses it.
This is not a perfect solution and I'd also be okay with keeping things as they are today (ignoring `$@` for IPython and using IPYTHON_OPTS), and only doing the doc change. With this change though, when IPython fixes https://github.com/ipython/ipython/pull/5226, people will immediately be able to do `IPYTHON=1 bin/pyspark myscript.py` to run a standalone script and get all the benefits of running scripts in IPython (presumably better debugging and such). Without it, there will be no way to run scripts in IPython.
@joshrosen you should probably take the final call on this.
Author: Diana Carroll <dcarroll@cloudera.com>
Closes #294 from mateiz/spark-1134 and squashes the following commits:
747bb13 [Diana Carroll] SPARK-1134 bug with ipython prevents non-interactive use with spark; only call ipython if no command line arguments were supplied
2014-04-03 18:48:42 -04:00
|
|
|
# Only use ipython if no command line arguments were provided [SPARK-1134]
|
|
|
|
if [[ "$IPYTHON" = "1" && $# = 0 ]] ; then
|
2014-04-01 22:31:50 -04:00
|
|
|
exec ipython $IPYTHON_OPTS
|
2013-02-07 09:54:31 -05:00
|
|
|
else
|
2013-07-28 22:21:04 -04:00
|
|
|
exec "$PYSPARK_PYTHON" "$@"
|
2013-02-07 09:54:31 -05:00
|
|
|
fi
|