spark-instrumented-optimizer/dev/lint-python
itholic b8740a1d1e [SPARK-35499][PYTHON] Apply black to pandas API on Spark codes
### What changes were proposed in this pull request?

This PR proposes applying `black` to pandas API on Spark codes, for improving static analysis.

By executing the `./dev/reformat-python` in the spark home directory, all the code of the pandas API on Spark is fixed according to the static analysis rules.

### Why are the changes needed?

This can be reduces the cost of static analysis during development.

It has been used continuously for about a year in the Koalas project and its convenience has been proven.

### Does this PR introduce _any_ user-facing change?

No, it's dev-only.

### How was this patch tested?

Manually reformat the pandas API on Spark codes by running the `./dev/reformat-python`, and checked the `./dev/lint-python` is passed.

Closes #32779 from itholic/SPARK-35499.

Authored-by: itholic <haejoon.lee@databricks.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
2021-06-06 17:30:07 -07:00

237 lines
7.3 KiB
Bash
Executable file

#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# define test binaries + versions
FLAKE8_BUILD="flake8"
# TODO(SPARK-34943): minimum version should be 3.8+
MINIMUM_FLAKE8="3.5.0"
MYPY_BUILD="mypy"
PYCODESTYLE_BUILD="pycodestyle"
MINIMUM_PYCODESTYLE="2.7.0"
PYTHON_EXECUTABLE="python3"
BLACK_BUILD="$PYTHON_EXECUTABLE -m black"
function satisfies_min_version {
local provided_version="$1"
local expected_version="$2"
echo "$(
"$PYTHON_EXECUTABLE" << EOM
from setuptools.extern.packaging import version
print(version.parse('$provided_version') >= version.parse('$expected_version'))
EOM
)"
}
function compile_python_test {
local COMPILE_STATUS=
local COMPILE_REPORT=
if [[ ! "$1" ]]; then
echo "No python files found! Something is very wrong -- exiting."
exit 1;
fi
# compileall: https://docs.python.org/3/library/compileall.html
echo "starting python compilation test..."
COMPILE_REPORT=$( ("$PYTHON_EXECUTABLE" -B -mcompileall -q -l -x "[/\\\\][.]git" $1) 2>&1)
COMPILE_STATUS=$?
if [ $COMPILE_STATUS -ne 0 ]; then
echo "Python compilation failed with the following errors:"
echo "$COMPILE_REPORT"
echo "$COMPILE_STATUS"
exit "$COMPILE_STATUS"
else
echo "python compilation succeeded."
echo
fi
}
function pycodestyle_test {
local PYCODESTYLE_STATUS=
local PYCODESTYLE_REPORT=
local RUN_LOCAL_PYCODESTYLE=
local PYCODESTYLE_VERSION=
local EXPECTED_PYCODESTYLE=
local PYCODESTYLE_SCRIPT_PATH="$SPARK_ROOT_DIR/dev/pycodestyle-$MINIMUM_PYCODESTYLE.py"
local PYCODESTYLE_SCRIPT_REMOTE_PATH="https://raw.githubusercontent.com/PyCQA/pycodestyle/$MINIMUM_PYCODESTYLE/pycodestyle.py"
if [[ ! "$1" ]]; then
echo "No python files found! Something is very wrong -- exiting."
exit 1;
fi
# check for locally installed pycodestyle & version
RUN_LOCAL_PYCODESTYLE="False"
if hash "$PYCODESTYLE_BUILD" 2> /dev/null; then
PYCODESTYLE_VERSION="$($PYCODESTYLE_BUILD --version)"
EXPECTED_PYCODESTYLE="$(satisfies_min_version $PYCODESTYLE_VERSION $MINIMUM_PYCODESTYLE)"
if [ "$EXPECTED_PYCODESTYLE" == "True" ]; then
RUN_LOCAL_PYCODESTYLE="True"
fi
fi
# download the right version or run locally
if [ $RUN_LOCAL_PYCODESTYLE == "False" ]; then
# Get pycodestyle at runtime so that we don't rely on it being installed on the build server.
# See: https://github.com/apache/spark/pull/1744#issuecomment-50982162
# Updated to the latest official version of pep8. pep8 is formally renamed to pycodestyle.
echo "downloading pycodestyle from $PYCODESTYLE_SCRIPT_REMOTE_PATH..."
if [ ! -e "$PYCODESTYLE_SCRIPT_PATH" ]; then
curl --silent -o "$PYCODESTYLE_SCRIPT_PATH" "$PYCODESTYLE_SCRIPT_REMOTE_PATH"
local curl_status="$?"
if [ "$curl_status" -ne 0 ]; then
echo "Failed to download pycodestyle.py from $PYCODESTYLE_SCRIPT_REMOTE_PATH"
exit "$curl_status"
fi
fi
echo "starting pycodestyle test..."
PYCODESTYLE_REPORT=$( ("$PYTHON_EXECUTABLE" "$PYCODESTYLE_SCRIPT_PATH" --config=dev/tox.ini $1) 2>&1)
PYCODESTYLE_STATUS=$?
else
# we have the right version installed, so run locally
echo "starting pycodestyle test..."
PYCODESTYLE_REPORT=$( ($PYCODESTYLE_BUILD --config=dev/tox.ini $1) 2>&1)
PYCODESTYLE_STATUS=$?
fi
if [ $PYCODESTYLE_STATUS -ne 0 ]; then
echo "pycodestyle checks failed:"
echo "$PYCODESTYLE_REPORT"
exit "$PYCODESTYLE_STATUS"
else
echo "pycodestyle checks passed."
echo
fi
}
function mypy_test {
local MYPY_REPORT=
local MYPY_STATUS=
# TODO(SPARK-32797): Install mypy on the Jenkins CI workers
if ! hash "$MYPY_BUILD" 2> /dev/null; then
echo "The $MYPY_BUILD command was not found. Skipping for now."
return
fi
echo "starting $MYPY_BUILD test..."
MYPY_REPORT=$( ($MYPY_BUILD --config-file python/mypy.ini python/pyspark) 2>&1)
MYPY_STATUS=$?
if [ "$MYPY_STATUS" -ne 0 ]; then
echo "mypy checks failed:"
echo "$MYPY_REPORT"
echo "$MYPY_STATUS"
exit "$MYPY_STATUS"
else
echo "mypy checks passed."
echo
fi
}
function flake8_test {
local FLAKE8_VERSION=
local EXPECTED_FLAKE8=
local FLAKE8_REPORT=
local FLAKE8_STATUS=
if ! hash "$FLAKE8_BUILD" 2> /dev/null; then
echo "The flake8 command was not found."
echo "flake8 checks failed."
exit 1
fi
_FLAKE8_VERSION=($($FLAKE8_BUILD --version))
FLAKE8_VERSION="${_FLAKE8_VERSION[0]}"
EXPECTED_FLAKE8="$(satisfies_min_version $FLAKE8_VERSION $MINIMUM_FLAKE8)"
if [[ "$EXPECTED_FLAKE8" == "False" ]]; then
echo "\
The minimum flake8 version needs to be $MINIMUM_FLAKE8. Your current version is $FLAKE8_VERSION
flake8 checks failed."
exit 1
fi
echo "starting $FLAKE8_BUILD test..."
FLAKE8_REPORT=$( ($FLAKE8_BUILD --append-config dev/tox.ini --count --show-source --statistics .) 2>&1)
FLAKE8_STATUS=$?
if [ "$FLAKE8_STATUS" -ne 0 ]; then
echo "flake8 checks failed:"
echo "$FLAKE8_REPORT"
echo "$FLAKE8_STATUS"
exit "$FLAKE8_STATUS"
else
echo "flake8 checks passed."
echo
fi
}
function black_test {
local BLACK_REPORT=
local BLACK_STATUS=
# Skip check if black is not installed.
$BLACK_BUILD 2> /dev/null
if [ $? -ne 0 ]; then
echo "The $BLACK_BUILD command was not found. Skipping black checks for now."
echo
return
fi
echo "starting black test..."
# Black is only applied for pandas API on Spark for now.
BLACK_REPORT=$( ($BLACK_BUILD python/pyspark/pandas --line-length 100 --check ) 2>&1)
BLACK_STATUS=$?
if [ "$BLACK_STATUS" -ne 0 ]; then
echo "black checks failed:"
echo "$BLACK_REPORT"
echo "Please run 'dev/reformat-python' script."
echo "$BLACK_STATUS"
exit "$BLACK_STATUS"
else
echo "black checks passed."
echo
fi
}
SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
SPARK_ROOT_DIR="$(dirname "${SCRIPT_DIR}")"
pushd "$SPARK_ROOT_DIR" &> /dev/null
# skipping local ruby bundle directory from the search
PYTHON_SOURCE="$(find . -path ./docs/.local_ruby_bundle -prune -false -o -name "*.py")"
compile_python_test "$PYTHON_SOURCE"
black_test
pycodestyle_test "$PYTHON_SOURCE"
flake8_test
mypy_test
echo
echo "all lint-python tests passed!"
popd &> /dev/null