spark-instrumented-optimizer/dev/run-pip-tests
Holden Karau 965c82d8c4 [SPARK-19064][PYSPARK] Fix pip installing of sub components
## What changes were proposed in this pull request?

Fix instalation of mllib and ml sub components, and more eagerly cleanup cache files during test script & make-distribution.

## How was this patch tested?

Updated sanity test script to import mllib and ml sub-components.

Author: Holden Karau <holden@us.ibm.com>

Closes #16465 from holdenk/SPARK-19064-fix-pip-install-sub-components.
2017-01-25 14:43:39 -08:00

119 lines
4 KiB
Bash
Executable file

#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Stop on error
set -e
# Set nullglob for when we are checking existence based on globs
shopt -s nullglob
FWDIR="$(cd "$(dirname "$0")"/..; pwd)"
cd "$FWDIR"
echo "Constucting virtual env for testing"
VIRTUALENV_BASE=$(mktemp -d)
# Clean up the virtual env enviroment used if we created one.
function delete_virtualenv() {
echo "Cleaning up temporary directory - $VIRTUALENV_BASE"
rm -rf "$VIRTUALENV_BASE"
}
trap delete_virtualenv EXIT
# Some systems don't have pip or virtualenv - in those cases our tests won't work.
if ! hash virtualenv 2>/dev/null; then
echo "Missing virtualenv skipping pip installability tests."
exit 0
fi
if ! hash pip 2>/dev/null; then
echo "Missing pip, skipping pip installability tests."
exit 0
fi
# Figure out which Python execs we should test pip installation with
PYTHON_EXECS=()
if hash python2 2>/dev/null; then
# We do this since we are testing with virtualenv and the default virtual env python
# is in /usr/bin/python
PYTHON_EXECS+=('python2')
elif hash python 2>/dev/null; then
# If python2 isn't installed fallback to python if available
PYTHON_EXECS+=('python')
fi
if hash python3 2>/dev/null; then
PYTHON_EXECS+=('python3')
fi
# Determine which version of PySpark we are building for archive name
PYSPARK_VERSION=$(python -c "exec(open('python/pyspark/version.py').read());print __version__")
PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz"
# The pip install options we use for all the pip commands
PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall "
# Test both regular user and edit/dev install modes.
PIP_COMMANDS=("pip install $PIP_OPTIONS $PYSPARK_DIST"
"pip install $PIP_OPTIONS -e python/")
for python in "${PYTHON_EXECS[@]}"; do
for install_command in "${PIP_COMMANDS[@]}"; do
echo "Testing pip installation with python $python"
# Create a temp directory for us to work in and save its name to a file for cleanup
echo "Using $VIRTUALENV_BASE for virtualenv"
VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python
rm -rf "$VIRTUALENV_PATH"
mkdir -p "$VIRTUALENV_PATH"
virtualenv --python=$python "$VIRTUALENV_PATH"
source "$VIRTUALENV_PATH"/bin/activate
# Upgrade pip & friends
pip install --upgrade pip pypandoc wheel
pip install numpy # Needed so we can verify mllib imports
echo "Creating pip installable source dist"
cd "$FWDIR"/python
# Delete the egg info file if it exists, this can cache the setup file.
rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
$python setup.py sdist
echo "Installing dist into virtual env"
cd dist
# Verify that the dist directory only contains one thing to install
sdists=(*.tar.gz)
if [ ${#sdists[@]} -ne 1 ]; then
echo "Unexpected number of targets found in dist directory - please cleanup existing sdists first."
exit -1
fi
# Do the actual installation
cd "$FWDIR"
$install_command
cd /
echo "Run basic sanity check on pip installed version with spark-submit"
spark-submit "$FWDIR"/dev/pip-sanity-check.py
echo "Run basic sanity check with import based"
python "$FWDIR"/dev/pip-sanity-check.py
echo "Run the tests for context.py"
python "$FWDIR"/python/pyspark/context.py
cd "$FWDIR"
done
done
exit 0