spark-instrumented-optimizer/dev/lint-python

#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# define test binaries + versions
FLAKE8_BUILD="flake8"
# TODO(SPARK-34943): minimum version should be 3.8+
MINIMUM_FLAKE8="3.5.0"
MINIMUM_MYPY="0.910"
MYPY_BUILD="mypy"
PYCODESTYLE_BUILD="pycodestyle"
MINIMUM_PYCODESTYLE="2.7.0"

PYTHON_EXECUTABLE="${PYTHON_EXECUTABLE:-python3}"

BLACK_BUILD="$PYTHON_EXECUTABLE -m black"

function satisfies_min_version {
    local provided_version="$1"
    local expected_version="$2"
    echo "$(
        "$PYTHON_EXECUTABLE" << EOM
from setuptools.extern.packaging import version
print(version.parse('$provided_version') >= version.parse('$expected_version'))
EOM
    )"
}

function compile_python_test {
    local COMPILE_STATUS=
    local COMPILE_REPORT=

    if [[ ! "$1" ]]; then
        echo "No python files found!  Something is very wrong -- exiting."
        exit 1;
    fi

    # compileall: https://docs.python.org/3/library/compileall.html
    echo "starting python compilation test..."
    COMPILE_REPORT=$( ("$PYTHON_EXECUTABLE" -B -mcompileall -q -l -x "[/\\\\][.]git" $1) 2>&1)
    COMPILE_STATUS=$?

    if [ $COMPILE_STATUS -ne 0 ]; then
        echo "Python compilation failed with the following errors:"
        echo "$COMPILE_REPORT"
        echo "$COMPILE_STATUS"
        exit "$COMPILE_STATUS"
    else
        echo "python compilation succeeded."
        echo
    fi
}

function pycodestyle_test {
    local PYCODESTYLE_STATUS=
    local PYCODESTYLE_REPORT=
    local RUN_LOCAL_PYCODESTYLE=
    local PYCODESTYLE_VERSION=
    local EXPECTED_PYCODESTYLE=
    local PYCODESTYLE_SCRIPT_PATH="$SPARK_ROOT_DIR/dev/pycodestyle-$MINIMUM_PYCODESTYLE.py"
    local PYCODESTYLE_SCRIPT_REMOTE_PATH="https://raw.githubusercontent.com/PyCQA/pycodestyle/$MINIMUM_PYCODESTYLE/pycodestyle.py"

    if [[ ! "$1" ]]; then
        echo "No python files found!  Something is very wrong -- exiting."
        exit 1;
    fi

    # check for locally installed pycodestyle & version
    RUN_LOCAL_PYCODESTYLE="False"
    if hash "$PYCODESTYLE_BUILD" 2> /dev/null; then
        PYCODESTYLE_VERSION="$($PYCODESTYLE_BUILD --version)"
        EXPECTED_PYCODESTYLE="$(satisfies_min_version $PYCODESTYLE_VERSION $MINIMUM_PYCODESTYLE)"
        if [ "$EXPECTED_PYCODESTYLE" == "True" ]; then
            RUN_LOCAL_PYCODESTYLE="True"
        fi
    fi

    # download the right version or run locally
    if [ $RUN_LOCAL_PYCODESTYLE == "False" ]; then
        # Get pycodestyle at runtime so that we don't rely on it being installed on the build server.
        # See: https://github.com/apache/spark/pull/1744#issuecomment-50982162
        # Updated to the latest official version of pep8. pep8 is formally renamed to pycodestyle.
        echo "downloading pycodestyle from $PYCODESTYLE_SCRIPT_REMOTE_PATH..."
        if [ ! -e "$PYCODESTYLE_SCRIPT_PATH" ]; then
            curl --silent -o "$PYCODESTYLE_SCRIPT_PATH" "$PYCODESTYLE_SCRIPT_REMOTE_PATH"
            local curl_status="$?"

            if [ "$curl_status" -ne 0 ]; then
                echo "Failed to download pycodestyle.py from $PYCODESTYLE_SCRIPT_REMOTE_PATH"
                exit "$curl_status"
            fi
        fi

        echo "starting pycodestyle test..."
        PYCODESTYLE_REPORT=$( ("$PYTHON_EXECUTABLE" "$PYCODESTYLE_SCRIPT_PATH" --config=dev/tox.ini $1) 2>&1)
        PYCODESTYLE_STATUS=$?
    else
        # we have the right version installed, so run locally
        echo "starting pycodestyle test..."
        PYCODESTYLE_REPORT=$( ($PYCODESTYLE_BUILD --config=dev/tox.ini $1) 2>&1)
        PYCODESTYLE_STATUS=$?
    fi

    if [ $PYCODESTYLE_STATUS -ne 0 ]; then
        echo "pycodestyle checks failed:"
        echo "$PYCODESTYLE_REPORT"
        exit "$PYCODESTYLE_STATUS"
    else
        echo "pycodestyle checks passed."
        echo
    fi
}

function mypy_test {
    local MYPY_REPORT=
    local MYPY_STATUS=

    if ! hash "$MYPY_BUILD" 2> /dev/null; then
        echo "The $MYPY_BUILD command was not found. Skipping for now."
        return
    fi

    _MYPY_VERSION=($($MYPY_BUILD --version))
    MYPY_VERSION="${_MYPY_VERSION[1]}"
    EXPECTED_MYPY="$(satisfies_min_version $MYPY_VERSION $MINIMUM_MYPY)"

    if [[ "$EXPECTED_MYPY" == "False" ]]; then
        echo "The minimum mypy version needs to be $MINIMUM_MYPY. Your current version is $MYPY_VERSION. Skipping for now."
        return
    fi

    echo "starting mypy test..."
    MYPY_REPORT=$( ($MYPY_BUILD --config-file python/mypy.ini python/pyspark) 2>&1)
    MYPY_STATUS=$?

    if [ "$MYPY_STATUS" -ne 0 ]; then
        echo "mypy checks failed:"
        echo "$MYPY_REPORT"
        echo "$MYPY_STATUS"
        exit "$MYPY_STATUS"
    else
        echo "mypy checks passed."
        echo
    fi
}

function flake8_test {
    local FLAKE8_VERSION=
    local EXPECTED_FLAKE8=
    local FLAKE8_REPORT=
    local FLAKE8_STATUS=

    if ! hash "$FLAKE8_BUILD" 2> /dev/null; then
        echo "The flake8 command was not found."
        echo "flake8 checks failed."
        exit 1
    fi

    _FLAKE8_VERSION=($($FLAKE8_BUILD --version))
    FLAKE8_VERSION="${_FLAKE8_VERSION[0]}"
    EXPECTED_FLAKE8="$(satisfies_min_version $FLAKE8_VERSION $MINIMUM_FLAKE8)"

    if [[ "$EXPECTED_FLAKE8" == "False" ]]; then
        echo "\
The minimum flake8 version needs to be $MINIMUM_FLAKE8. Your current version is $FLAKE8_VERSION

flake8 checks failed."
        exit 1
    fi

    echo "starting $FLAKE8_BUILD test..."
    FLAKE8_REPORT=$( ($FLAKE8_BUILD --append-config dev/tox.ini --count --show-source --statistics .) 2>&1)
    FLAKE8_STATUS=$?

    if [ "$FLAKE8_STATUS" -ne 0 ]; then
        echo "flake8 checks failed:"
        echo "$FLAKE8_REPORT"
        echo "$FLAKE8_STATUS"
        exit "$FLAKE8_STATUS"
    else
        echo "flake8 checks passed."
        echo
    fi
}

function black_test {
    local BLACK_REPORT=
    local BLACK_STATUS=

    # Skip check if black is not installed.
    $BLACK_BUILD 2> /dev/null
    if [ $? -ne 0 ]; then
        echo "The $BLACK_BUILD command was not found. Skipping black checks for now."
        echo
        return
    fi

    echo "starting black test..."
    # Black is only applied for pandas API on Spark for now.
    BLACK_REPORT=$( ($BLACK_BUILD python/pyspark/pandas --line-length 100 --check ) 2>&1)
    BLACK_STATUS=$?

    if [ "$BLACK_STATUS" -ne 0 ]; then
        echo "black checks failed:"
        echo "$BLACK_REPORT"
        echo "Please run 'dev/reformat-python' script."
        echo "$BLACK_STATUS"
        exit "$BLACK_STATUS"
    else
        echo "black checks passed."
        echo
    fi
}

SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
SPARK_ROOT_DIR="$(dirname "${SCRIPT_DIR}")"

pushd "$SPARK_ROOT_DIR" &> /dev/null

# skipping local ruby bundle directory from the search
PYTHON_SOURCE="$(find . -path ./docs/.local_ruby_bundle -prune -false -o -name "*.py")"

compile_python_test "$PYTHON_SOURCE"
black_test
pycodestyle_test "$PYTHON_SOURCE"
flake8_test
mypy_test

echo
echo "all lint-python tests passed!"

popd &> /dev/null