spark-instrumented-optimizer/bin/pyspark

65 lines
2 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env bash
2012-10-19 20:16:41 -04:00
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
2012-10-19 20:16:41 -04:00
# Figure out where the Scala framework is installed
FWDIR="$(cd `dirname $0`/..; pwd)"
2012-10-19 20:16:41 -04:00
# Export this as SPARK_HOME
export SPARK_HOME="$FWDIR"
SCALA_VERSION=2.10
2013-08-27 18:46:23 -04:00
# Exit if the user hasn't compiled Spark
if [ ! -f "$FWDIR/RELEASE" ]; then
# Exit if the user hasn't compiled Spark
2013-08-27 18:46:23 -04:00
ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
if [[ $? != 0 ]]; then
echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2
echo "You need to build Spark before running this program" >&2
exit 1
fi
fi
. $FWDIR/bin/load-spark-env.sh
2012-10-19 20:16:41 -04:00
# Figure out which Python executable to use
if [ -z "$PYSPARK_PYTHON" ] ; then
PYSPARK_PYTHON="python"
fi
export PYSPARK_PYTHON
# Add the PySpark classes to the Python path:
export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH
2012-10-19 20:16:41 -04:00
# Load the PySpark shell.py script when ./pyspark is used interactively:
2014-01-09 21:41:00 -05:00
export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
if [ -n "$IPYTHON_OPTS" ]; then
IPYTHON=1
fi
[SPARK-1134] Fix and document passing of arguments to IPython This is based on @dianacarroll's previous pull request https://github.com/apache/spark/pull/227, and @joshrosen's comments on https://github.com/apache/spark/pull/38. Since we do want to allow passing arguments to IPython, this does the following: * It documents that IPython can't be used with standalone jobs for now. (Later versions of IPython will deal with PYTHONSTARTUP properly and enable this, see https://github.com/ipython/ipython/pull/5226, but no released version has that fix.) * If you run `pyspark` with `IPYTHON=1`, it passes your command-line arguments to it. This way you can do stuff like `IPYTHON=1 bin/pyspark notebook`. * The old `IPYTHON_OPTS` remains, but I've removed it from the documentation. This is in case people read an old tutorial that uses it. This is not a perfect solution and I'd also be okay with keeping things as they are today (ignoring `$@` for IPython and using IPYTHON_OPTS), and only doing the doc change. With this change though, when IPython fixes https://github.com/ipython/ipython/pull/5226, people will immediately be able to do `IPYTHON=1 bin/pyspark myscript.py` to run a standalone script and get all the benefits of running scripts in IPython (presumably better debugging and such). Without it, there will be no way to run scripts in IPython. @joshrosen you should probably take the final call on this. Author: Diana Carroll <dcarroll@cloudera.com> Closes #294 from mateiz/spark-1134 and squashes the following commits: 747bb13 [Diana Carroll] SPARK-1134 bug with ipython prevents non-interactive use with spark; only call ipython if no command line arguments were supplied
2014-04-03 18:48:42 -04:00
# Only use ipython if no command line arguments were provided [SPARK-1134]
if [[ "$IPYTHON" = "1" && $# = 0 ]] ; then
exec ipython $IPYTHON_OPTS
else
exec "$PYSPARK_PYTHON" "$@"
fi