[SPARK-8706] [PYSPARK] [PROJECT INFRA] Add pylint checks to PySpark
This adds Pylint checks to PySpark. For now this lazy installs using easy_install to /dev/pylint (similar to the pep8 script). We still need to figure out what rules to be allowed. Author: MechCoder <manojkumarsivaraj334@gmail.com> Closes #7241 from MechCoder/pylint and squashes the following commits: 8496834 [MechCoder] Silence warnings and make pylint tests fail to check if it works in jenkins 57393a3 [MechCoder] undefined-variable a8e2547 [MechCoder] Minor changes 7753810 [MechCoder] remove trailing whitespace 75c5d2b [MechCoder] Remove blacklisted arguments and pointless statements check 6bde250 [MechCoder] Disable all checks for now 3464666 [MechCoder] Add pylint configuration file d28109f [MechCoder] [SPARK-8706] [PySpark] [Project infra] Add pylint checks to PySpark
This commit is contained in:
parent
7f487c8bde
commit
9b62e9375f
|
@ -21,12 +21,14 @@ SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
|
|||
SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
PATHS_TO_CHECK="./python/pyspark/ ./ec2/spark_ec2.py ./examples/src/main/python/ ./dev/sparktestsupport"
|
||||
PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/run-tests.py ./python/run-tests.py"
|
||||
PYTHON_LINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/python-lint-report.txt"
|
||||
PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
|
||||
PYLINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/pylint-report.txt"
|
||||
PYLINT_INSTALL_INFO="$SPARK_ROOT_DIR/dev/pylint-info.txt"
|
||||
|
||||
cd "$SPARK_ROOT_DIR"
|
||||
|
||||
# compileall: https://docs.python.org/2/library/compileall.html
|
||||
python -B -m compileall -q -l $PATHS_TO_CHECK > "$PYTHON_LINT_REPORT_PATH"
|
||||
python -B -m compileall -q -l $PATHS_TO_CHECK > "$PEP8_REPORT_PATH"
|
||||
compile_status="${PIPESTATUS[0]}"
|
||||
|
||||
# Get pep8 at runtime so that we don't rely on it being installed on the build server.
|
||||
|
@ -47,11 +49,36 @@ if [ ! -e "$PEP8_SCRIPT_PATH" ]; then
|
|||
fi
|
||||
fi
|
||||
|
||||
# Easy install pylint in /dev/pylint. To easy_install into a directory, the PYTHONPATH should
|
||||
# be set to the directory.
|
||||
# dev/pylint should be appended to the PATH variable as well.
|
||||
# Jenkins by default installs the pylint3 version, so for now this just checks the code quality
|
||||
# of python3.
|
||||
export "PYTHONPATH=$SPARK_ROOT_DIR/dev/pylint"
|
||||
export "PYLINT_HOME=$PYTHONPATH"
|
||||
export "PATH=$PYTHONPATH:$PATH"
|
||||
|
||||
if [ ! -d "$PYLINT_HOME" ]; then
|
||||
mkdir "$PYLINT_HOME"
|
||||
# Redirect the annoying pylint installation output.
|
||||
easy_install -d "$PYLINT_HOME" pylint==1.4.4 &>> "$PYLINT_INSTALL_INFO"
|
||||
easy_install_status="$?"
|
||||
|
||||
if [ "$easy_install_status" -ne 0 ]; then
|
||||
echo "Unable to install pylint locally in \"$PYTHONPATH\"."
|
||||
cat "$PYLINT_INSTALL_INFO"
|
||||
exit "$easy_install_status"
|
||||
fi
|
||||
|
||||
rm "$PYLINT_INSTALL_INFO"
|
||||
|
||||
fi
|
||||
|
||||
# There is no need to write this output to a file
|
||||
#+ first, but we do so so that the check status can
|
||||
#+ be output before the report, like with the
|
||||
#+ scalastyle and RAT checks.
|
||||
python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 $PATHS_TO_CHECK >> "$PYTHON_LINT_REPORT_PATH"
|
||||
python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH"
|
||||
pep8_status="${PIPESTATUS[0]}"
|
||||
|
||||
if [ "$compile_status" -eq 0 -a "$pep8_status" -eq 0 ]; then
|
||||
|
@ -61,13 +88,27 @@ else
|
|||
fi
|
||||
|
||||
if [ "$lint_status" -ne 0 ]; then
|
||||
echo "Python lint checks failed."
|
||||
cat "$PYTHON_LINT_REPORT_PATH"
|
||||
echo "PEP8 checks failed."
|
||||
cat "$PEP8_REPORT_PATH"
|
||||
else
|
||||
echo "Python lint checks passed."
|
||||
echo "PEP8 checks passed."
|
||||
fi
|
||||
|
||||
# rm "$PEP8_SCRIPT_PATH"
|
||||
rm "$PYTHON_LINT_REPORT_PATH"
|
||||
rm "$PEP8_REPORT_PATH"
|
||||
|
||||
for to_be_checked in "$PATHS_TO_CHECK"
|
||||
do
|
||||
pylint --rcfile="$SPARK_ROOT_DIR/pylintrc" $to_be_checked >> "$PYLINT_REPORT_PATH"
|
||||
done
|
||||
|
||||
if [ "${PIPESTATUS[0]}" -ne 0 ]; then
|
||||
lint_status=1
|
||||
echo "Pylint checks failed."
|
||||
cat "$PYLINT_REPORT_PATH"
|
||||
else
|
||||
echo "Pylint checks passed."
|
||||
fi
|
||||
|
||||
rm "$PYLINT_REPORT_PATH"
|
||||
|
||||
exit "$lint_status"
|
||||
|
|
404
pylintrc
Normal file
404
pylintrc
Normal file
|
@ -0,0 +1,404 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
[MASTER]
|
||||
|
||||
# Specify a configuration file.
|
||||
#rcfile=
|
||||
|
||||
# Python code to execute, usually for sys.path manipulation such as
|
||||
# pygtk.require().
|
||||
#init-hook=
|
||||
|
||||
# Profiled execution.
|
||||
profile=no
|
||||
|
||||
# Add files or directories to the blacklist. They should be base names, not
|
||||
# paths.
|
||||
ignore=pyspark.heapq3
|
||||
|
||||
# Pickle collected data for later comparisons.
|
||||
persistent=yes
|
||||
|
||||
# List of plugins (as comma separated values of python modules names) to load,
|
||||
# usually to register additional checkers.
|
||||
load-plugins=
|
||||
|
||||
# Use multiple processes to speed up Pylint.
|
||||
jobs=1
|
||||
|
||||
# Allow loading of arbitrary C extensions. Extensions are imported into the
|
||||
# active Python interpreter and may run arbitrary code.
|
||||
unsafe-load-any-extension=no
|
||||
|
||||
# A comma-separated list of package or module names from where C extensions may
|
||||
# be loaded. Extensions are loading into the active Python interpreter and may
|
||||
# run arbitrary code
|
||||
extension-pkg-whitelist=
|
||||
|
||||
# Allow optimization of some AST trees. This will activate a peephole AST
|
||||
# optimizer, which will apply various small optimizations. For instance, it can
|
||||
# be used to obtain the result of joining multiple strings with the addition
|
||||
# operator. Joining a lot of strings can lead to a maximum recursion error in
|
||||
# Pylint and this flag can prevent that. It has one side effect, the resulting
|
||||
# AST will be different than the one from reality.
|
||||
optimize-ast=no
|
||||
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
# Only show warnings with the listed confidence levels. Leave empty to show
|
||||
# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
|
||||
confidence=
|
||||
|
||||
# Enable the message, report, category or checker with the given id(s). You can
|
||||
# either give multiple identifier separated by comma (,) or put this option
|
||||
# multiple time. See also the "--disable" option for examples.
|
||||
enable=
|
||||
|
||||
# Disable the message, report, category or checker with the given id(s). You
|
||||
# can either give multiple identifiers separated by comma (,) or put this
|
||||
# option multiple times (only on the command line, not in the configuration
|
||||
# file where it should appear only once).You can also use "--disable=all" to
|
||||
# disable everything first and then reenable specific checks. For example, if
|
||||
# you want to run only the similarities checker, you can use "--disable=all
|
||||
# --enable=similarities". If you want to run only the classes checker, but have
|
||||
# no Warning level messages displayed, use"--disable=all --enable=classes
|
||||
# --disable=W"
|
||||
|
||||
# These errors are arranged in order of number of warning given in pylint.
|
||||
# If you would like to improve the code quality of pyspark, remove any of these disabled errors
|
||||
# run ./dev/lint-python and see if the errors raised by pylint can be fixed.
|
||||
|
||||
disable=invalid-name,missing-docstring,protected-access,unused-argument,no-member,unused-wildcard-import,redefined-builtin,too-many-arguments,unused-variable,too-few-public-methods,bad-continuation,duplicate-code,redefined-outer-name,too-many-ancestors,import-error,superfluous-parens,unused-import,line-too-long,no-name-in-module,unnecessary-lambda,import-self,no-self-use,unidiomatic-typecheck,fixme,too-many-locals,cyclic-import,too-many-branches,bare-except,wildcard-import,dangerous-default-value,broad-except,too-many-public-methods,deprecated-lambda,anomalous-backslash-in-string,too-many-lines,reimported,too-many-statements,bad-whitespace,unpacking-non-sequence,too-many-instance-attributes,abstract-method,old-style-class,global-statement,attribute-defined-outside-init,arguments-differ,undefined-all-variable,no-init,useless-else-on-loop,super-init-not-called,notimplemented-raised,too-many-return-statements,pointless-string-statement,global-variable-undefined,bad-classmethod-argument,too-many-format-args,parse-error,no-self-argument,pointless-statement,undefined-variable
|
||||
|
||||
|
||||
[REPORTS]
|
||||
|
||||
# Set the output format. Available formats are text, parseable, colorized, msvs
|
||||
# (visual studio) and html. You can also give a reporter class, eg
|
||||
# mypackage.mymodule.MyReporterClass.
|
||||
output-format=text
|
||||
|
||||
# Put messages in a separate file for each module / package specified on the
|
||||
# command line instead of printing them on stdout. Reports (if any) will be
|
||||
# written in a file name "pylint_global.[txt|html]".
|
||||
files-output=no
|
||||
|
||||
# Tells whether to display a full report or only the messages
|
||||
reports=no
|
||||
|
||||
# Python expression which should return a note less than 10 (10 is the highest
|
||||
# note). You have access to the variables errors warning, statement which
|
||||
# respectively contain the number of errors / warnings messages and the total
|
||||
# number of statements analyzed. This is used by the global evaluation report
|
||||
# (RP0004).
|
||||
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
|
||||
|
||||
# Add a comment according to your evaluation note. This is used by the global
|
||||
# evaluation report (RP0004).
|
||||
comment=no
|
||||
|
||||
# Template used to display messages. This is a python new-style format string
|
||||
# used to format the message information. See doc for all details
|
||||
#msg-template=
|
||||
|
||||
|
||||
[MISCELLANEOUS]
|
||||
|
||||
# List of note tags to take in consideration, separated by a comma.
|
||||
notes=FIXME,XXX,TODO
|
||||
|
||||
|
||||
[BASIC]
|
||||
|
||||
# Required attributes for module, separated by a comma
|
||||
required-attributes=
|
||||
|
||||
# List of builtins function names that should not be used, separated by a comma
|
||||
bad-functions=
|
||||
|
||||
# Good variable names which should always be accepted, separated by a comma
|
||||
good-names=i,j,k,ex,Run,_
|
||||
|
||||
# Bad variable names which should always be refused, separated by a comma
|
||||
bad-names=baz,toto,tutu,tata
|
||||
|
||||
# Colon-delimited sets of names that determine each other's naming style when
|
||||
# the name regexes allow several styles.
|
||||
name-group=
|
||||
|
||||
# Include a hint for the correct naming format with invalid-name
|
||||
include-naming-hint=no
|
||||
|
||||
# Regular expression matching correct function names
|
||||
function-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for function names
|
||||
function-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression matching correct variable names
|
||||
variable-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for variable names
|
||||
variable-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression matching correct constant names
|
||||
const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
|
||||
|
||||
# Naming hint for constant names
|
||||
const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
|
||||
|
||||
# Regular expression matching correct attribute names
|
||||
attr-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for attribute names
|
||||
attr-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression matching correct argument names
|
||||
argument-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for argument names
|
||||
argument-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression matching correct class attribute names
|
||||
class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
|
||||
|
||||
# Naming hint for class attribute names
|
||||
class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
|
||||
|
||||
# Regular expression matching correct inline iteration names
|
||||
inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
|
||||
|
||||
# Naming hint for inline iteration names
|
||||
inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
|
||||
|
||||
# Regular expression matching correct class names
|
||||
class-rgx=[A-Z_][a-zA-Z0-9]+$
|
||||
|
||||
# Naming hint for class names
|
||||
class-name-hint=[A-Z_][a-zA-Z0-9]+$
|
||||
|
||||
# Regular expression matching correct module names
|
||||
module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
|
||||
|
||||
# Naming hint for module names
|
||||
module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
|
||||
|
||||
# Regular expression matching correct method names
|
||||
method-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for method names
|
||||
method-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match function or class names that do
|
||||
# not require a docstring.
|
||||
no-docstring-rgx=__.*__
|
||||
|
||||
# Minimum line length for functions/classes that require docstrings, shorter
|
||||
# ones are exempt.
|
||||
docstring-min-length=-1
|
||||
|
||||
|
||||
[FORMAT]
|
||||
|
||||
# Maximum number of characters on a single line.
|
||||
max-line-length=100
|
||||
|
||||
# Regexp for a line that is allowed to be longer than the limit.
|
||||
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
|
||||
|
||||
# Allow the body of an if to be on the same line as the test if there is no
|
||||
# else.
|
||||
single-line-if-stmt=no
|
||||
|
||||
# List of optional constructs for which whitespace checking is disabled
|
||||
no-space-check=trailing-comma,dict-separator
|
||||
|
||||
# Maximum number of lines in a module
|
||||
max-module-lines=1000
|
||||
|
||||
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
|
||||
# tab).
|
||||
indent-string=' '
|
||||
|
||||
# Number of spaces of indent required inside a hanging or continued line.
|
||||
indent-after-paren=4
|
||||
|
||||
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
|
||||
expected-line-ending-format=
|
||||
|
||||
|
||||
[SIMILARITIES]
|
||||
|
||||
# Minimum lines number of a similarity.
|
||||
min-similarity-lines=4
|
||||
|
||||
# Ignore comments when computing similarities.
|
||||
ignore-comments=yes
|
||||
|
||||
# Ignore docstrings when computing similarities.
|
||||
ignore-docstrings=yes
|
||||
|
||||
# Ignore imports when computing similarities.
|
||||
ignore-imports=no
|
||||
|
||||
|
||||
[VARIABLES]
|
||||
|
||||
# Tells whether we should check for unused import in __init__ files.
|
||||
init-import=no
|
||||
|
||||
# A regular expression matching the name of dummy variables (i.e. expectedly
|
||||
# not used).
|
||||
dummy-variables-rgx=_$|dummy
|
||||
|
||||
# List of additional names supposed to be defined in builtins. Remember that
|
||||
# you should avoid to define new builtins when possible.
|
||||
additional-builtins=
|
||||
|
||||
# List of strings which can identify a callback function by name. A callback
|
||||
# name must start or end with one of those strings.
|
||||
callbacks=cb_,_cb
|
||||
|
||||
|
||||
[SPELLING]
|
||||
|
||||
# Spelling dictionary name. Available dictionaries: none. To make it working
|
||||
# install python-enchant package.
|
||||
spelling-dict=
|
||||
|
||||
# List of comma separated words that should not be checked.
|
||||
spelling-ignore-words=
|
||||
|
||||
# A path to a file that contains private dictionary; one word per line.
|
||||
spelling-private-dict-file=
|
||||
|
||||
# Tells whether to store unknown words to indicated private dictionary in
|
||||
# --spelling-private-dict-file option instead of raising a message.
|
||||
spelling-store-unknown-words=no
|
||||
|
||||
|
||||
[LOGGING]
|
||||
|
||||
# Logging modules to check that the string format arguments are in logging
|
||||
# function parameter format
|
||||
logging-modules=logging
|
||||
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
# Tells whether missing members accessed in mixin class should be ignored. A
|
||||
# mixin class is detected if its name ends with "mixin" (case insensitive).
|
||||
ignore-mixin-members=yes
|
||||
|
||||
# List of module names for which member attributes should not be checked
|
||||
# (useful for modules/projects where namespaces are manipulated during runtime
|
||||
# and thus existing member attributes cannot be deduced by static analysis
|
||||
ignored-modules=
|
||||
|
||||
# List of classes names for which member attributes should not be checked
|
||||
# (useful for classes with attributes dynamically set).
|
||||
ignored-classes=SQLObject
|
||||
|
||||
# When zope mode is activated, add a predefined set of Zope acquired attributes
|
||||
# to generated-members.
|
||||
zope=no
|
||||
|
||||
# List of members which are set dynamically and missed by pylint inference
|
||||
# system, and so shouldn't trigger E0201 when accessed. Python regular
|
||||
# expressions are accepted.
|
||||
generated-members=REQUEST,acl_users,aq_parent
|
||||
|
||||
|
||||
[CLASSES]
|
||||
|
||||
# List of interface methods to ignore, separated by a comma. This is used for
|
||||
# instance to not check methods defines in Zope's Interface base class.
|
||||
ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
|
||||
|
||||
# List of method names used to declare (i.e. assign) instance attributes.
|
||||
defining-attr-methods=__init__,__new__,setUp
|
||||
|
||||
# List of valid names for the first argument in a class method.
|
||||
valid-classmethod-first-arg=cls
|
||||
|
||||
# List of valid names for the first argument in a metaclass class method.
|
||||
valid-metaclass-classmethod-first-arg=mcs
|
||||
|
||||
# List of member names, which should be excluded from the protected access
|
||||
# warning.
|
||||
exclude-protected=_asdict,_fields,_replace,_source,_make
|
||||
|
||||
|
||||
[IMPORTS]
|
||||
|
||||
# Deprecated modules which should not be used, separated by a comma
|
||||
deprecated-modules=regsub,TERMIOS,Bastion,rexec
|
||||
|
||||
# Create a graph of every (i.e. internal and external) dependencies in the
|
||||
# given file (report RP0402 must not be disabled)
|
||||
import-graph=
|
||||
|
||||
# Create a graph of external dependencies in the given file (report RP0402 must
|
||||
# not be disabled)
|
||||
ext-import-graph=
|
||||
|
||||
# Create a graph of internal dependencies in the given file (report RP0402 must
|
||||
# not be disabled)
|
||||
int-import-graph=
|
||||
|
||||
|
||||
[DESIGN]
|
||||
|
||||
# Maximum number of arguments for function / method
|
||||
max-args=5
|
||||
|
||||
# Argument names that match this expression will be ignored. Default to name
|
||||
# with leading underscore
|
||||
ignored-argument-names=_.*
|
||||
|
||||
# Maximum number of locals for function / method body
|
||||
max-locals=15
|
||||
|
||||
# Maximum number of return / yield for function / method body
|
||||
max-returns=6
|
||||
|
||||
# Maximum number of branch for function / method body
|
||||
max-branches=12
|
||||
|
||||
# Maximum number of statements in function / method body
|
||||
max-statements=50
|
||||
|
||||
# Maximum number of parents for a class (see R0901).
|
||||
max-parents=7
|
||||
|
||||
# Maximum number of attributes for a class (see R0902).
|
||||
max-attributes=7
|
||||
|
||||
# Minimum number of public methods for a class (see R0903).
|
||||
min-public-methods=2
|
||||
|
||||
# Maximum number of public methods for a class (see R0904).
|
||||
max-public-methods=20
|
||||
|
||||
|
||||
[EXCEPTIONS]
|
||||
|
||||
# Exceptions that will emit a warning when being caught. Defaults to
|
||||
# "Exception"
|
||||
overgeneral-exceptions=Exception
|
Loading…
Reference in a new issue