From 9fcf0ea71820f7331504073045c38820e50141c7 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Sat, 8 Aug 2020 08:51:57 -0700 Subject: [PATCH] [SPARK-32319][PYSPARK] Disallow the use of unused imports Disallow the use of unused imports: - Unnecessary increases the memory footprint of the application - Removes the imports that are required for the examples in the docstring from the file-scope to the example itself. This keeps the files itself clean, and gives a more complete example as it also includes the imports :) ``` fokkodriesprongFan spark % flake8 python | grep -i "imported but unused" python/pyspark/cloudpickle.py:46:1: F401 'functools.partial' imported but unused python/pyspark/cloudpickle.py:55:1: F401 'traceback' imported but unused python/pyspark/heapq3.py:868:5: F401 '_heapq.*' imported but unused python/pyspark/__init__.py:61:1: F401 'pyspark.version.__version__' imported but unused python/pyspark/__init__.py:62:1: F401 'pyspark._globals._NoValue' imported but unused python/pyspark/__init__.py:115:1: F401 'pyspark.sql.SQLContext' imported but unused python/pyspark/__init__.py:115:1: F401 'pyspark.sql.HiveContext' imported but unused python/pyspark/__init__.py:115:1: F401 'pyspark.sql.Row' imported but unused python/pyspark/rdd.py:21:1: F401 're' imported but unused python/pyspark/rdd.py:29:1: F401 'tempfile.NamedTemporaryFile' imported but unused python/pyspark/mllib/regression.py:26:1: F401 'pyspark.mllib.linalg.SparseVector' imported but unused python/pyspark/mllib/clustering.py:28:1: F401 'pyspark.mllib.linalg.SparseVector' imported but unused python/pyspark/mllib/clustering.py:28:1: F401 'pyspark.mllib.linalg.DenseVector' imported but unused python/pyspark/mllib/classification.py:26:1: F401 'pyspark.mllib.linalg.SparseVector' imported but unused python/pyspark/mllib/feature.py:28:1: F401 'pyspark.mllib.linalg.DenseVector' imported but unused python/pyspark/mllib/feature.py:28:1: F401 'pyspark.mllib.linalg.SparseVector' imported but unused python/pyspark/mllib/feature.py:30:1: F401 'pyspark.mllib.regression.LabeledPoint' imported but unused python/pyspark/mllib/tests/test_linalg.py:18:1: F401 'sys' imported but unused python/pyspark/mllib/tests/test_linalg.py:642:5: F401 'pyspark.mllib.tests.test_linalg.*' imported but unused python/pyspark/mllib/tests/test_feature.py:21:1: F401 'numpy.random' imported but unused python/pyspark/mllib/tests/test_feature.py:21:1: F401 'numpy.exp' imported but unused python/pyspark/mllib/tests/test_feature.py:23:1: F401 'pyspark.mllib.linalg.Vector' imported but unused python/pyspark/mllib/tests/test_feature.py:23:1: F401 'pyspark.mllib.linalg.VectorUDT' imported but unused python/pyspark/mllib/tests/test_feature.py:185:5: F401 'pyspark.mllib.tests.test_feature.*' imported but unused python/pyspark/mllib/tests/test_util.py:97:5: F401 'pyspark.mllib.tests.test_util.*' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.Vector' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.SparseVector' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.DenseVector' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.VectorUDT' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg._convert_to_vector' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.DenseMatrix' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.SparseMatrix' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.MatrixUDT' imported but unused python/pyspark/mllib/tests/test_stat.py:181:5: F401 'pyspark.mllib.tests.test_stat.*' imported but unused python/pyspark/mllib/tests/test_streaming_algorithms.py:18:1: F401 'time.time' imported but unused python/pyspark/mllib/tests/test_streaming_algorithms.py:18:1: F401 'time.sleep' imported but unused python/pyspark/mllib/tests/test_streaming_algorithms.py:470:5: F401 'pyspark.mllib.tests.test_streaming_algorithms.*' imported but unused python/pyspark/mllib/tests/test_algorithms.py:295:5: F401 'pyspark.mllib.tests.test_algorithms.*' imported but unused python/pyspark/tests/test_serializers.py:90:13: F401 'xmlrunner' imported but unused python/pyspark/tests/test_rdd.py:21:1: F401 'sys' imported but unused python/pyspark/tests/test_rdd.py:29:1: F401 'pyspark.resource.ResourceProfile' imported but unused python/pyspark/tests/test_rdd.py:885:5: F401 'pyspark.tests.test_rdd.*' imported but unused python/pyspark/tests/test_readwrite.py:19:1: F401 'sys' imported but unused python/pyspark/tests/test_readwrite.py:22:1: F401 'array.array' imported but unused python/pyspark/tests/test_readwrite.py:309:5: F401 'pyspark.tests.test_readwrite.*' imported but unused python/pyspark/tests/test_join.py:62:5: F401 'pyspark.tests.test_join.*' imported but unused python/pyspark/tests/test_taskcontext.py:19:1: F401 'shutil' imported but unused python/pyspark/tests/test_taskcontext.py:325:5: F401 'pyspark.tests.test_taskcontext.*' imported but unused python/pyspark/tests/test_conf.py:36:5: F401 'pyspark.tests.test_conf.*' imported but unused python/pyspark/tests/test_broadcast.py:148:5: F401 'pyspark.tests.test_broadcast.*' imported but unused python/pyspark/tests/test_daemon.py:76:5: F401 'pyspark.tests.test_daemon.*' imported but unused python/pyspark/tests/test_util.py:77:5: F401 'pyspark.tests.test_util.*' imported but unused python/pyspark/tests/test_pin_thread.py:19:1: F401 'random' imported but unused python/pyspark/tests/test_pin_thread.py:149:5: F401 'pyspark.tests.test_pin_thread.*' imported but unused python/pyspark/tests/test_worker.py:19:1: F401 'sys' imported but unused python/pyspark/tests/test_worker.py:26:5: F401 'resource' imported but unused python/pyspark/tests/test_worker.py:203:5: F401 'pyspark.tests.test_worker.*' imported but unused python/pyspark/tests/test_profiler.py:101:5: F401 'pyspark.tests.test_profiler.*' imported but unused python/pyspark/tests/test_shuffle.py:18:1: F401 'sys' imported but unused python/pyspark/tests/test_shuffle.py:171:5: F401 'pyspark.tests.test_shuffle.*' imported but unused python/pyspark/tests/test_rddbarrier.py:43:5: F401 'pyspark.tests.test_rddbarrier.*' imported but unused python/pyspark/tests/test_context.py:129:13: F401 'userlibrary.UserClass' imported but unused python/pyspark/tests/test_context.py:140:13: F401 'userlib.UserClass' imported but unused python/pyspark/tests/test_context.py:310:5: F401 'pyspark.tests.test_context.*' imported but unused python/pyspark/tests/test_appsubmit.py:241:5: F401 'pyspark.tests.test_appsubmit.*' imported but unused python/pyspark/streaming/dstream.py:18:1: F401 'sys' imported but unused python/pyspark/streaming/tests/test_dstream.py:27:1: F401 'pyspark.RDD' imported but unused python/pyspark/streaming/tests/test_dstream.py:647:5: F401 'pyspark.streaming.tests.test_dstream.*' imported but unused python/pyspark/streaming/tests/test_kinesis.py:83:5: F401 'pyspark.streaming.tests.test_kinesis.*' imported but unused python/pyspark/streaming/tests/test_listener.py:152:5: F401 'pyspark.streaming.tests.test_listener.*' imported but unused python/pyspark/streaming/tests/test_context.py:178:5: F401 'pyspark.streaming.tests.test_context.*' imported but unused python/pyspark/testing/utils.py:30:5: F401 'scipy.sparse' imported but unused python/pyspark/testing/utils.py:36:5: F401 'numpy as np' imported but unused python/pyspark/ml/regression.py:25:1: F401 'pyspark.ml.tree._TreeEnsembleParams' imported but unused python/pyspark/ml/regression.py:25:1: F401 'pyspark.ml.tree._HasVarianceImpurity' imported but unused python/pyspark/ml/regression.py:29:1: F401 'pyspark.ml.wrapper.JavaParams' imported but unused python/pyspark/ml/util.py:19:1: F401 'sys' imported but unused python/pyspark/ml/__init__.py:25:1: F401 'pyspark.ml.pipeline' imported but unused python/pyspark/ml/pipeline.py:18:1: F401 'sys' imported but unused python/pyspark/ml/stat.py:22:1: F401 'pyspark.ml.linalg.DenseMatrix' imported but unused python/pyspark/ml/stat.py:22:1: F401 'pyspark.ml.linalg.Vectors' imported but unused python/pyspark/ml/tests/test_training_summary.py:18:1: F401 'sys' imported but unused python/pyspark/ml/tests/test_training_summary.py:364:5: F401 'pyspark.ml.tests.test_training_summary.*' imported but unused python/pyspark/ml/tests/test_linalg.py:381:5: F401 'pyspark.ml.tests.test_linalg.*' imported but unused python/pyspark/ml/tests/test_tuning.py:427:9: F401 'pyspark.sql.functions as F' imported but unused python/pyspark/ml/tests/test_tuning.py:757:5: F401 'pyspark.ml.tests.test_tuning.*' imported but unused python/pyspark/ml/tests/test_wrapper.py:120:5: F401 'pyspark.ml.tests.test_wrapper.*' imported but unused python/pyspark/ml/tests/test_feature.py:19:1: F401 'sys' imported but unused python/pyspark/ml/tests/test_feature.py:304:5: F401 'pyspark.ml.tests.test_feature.*' imported but unused python/pyspark/ml/tests/test_image.py:19:1: F401 'py4j' imported but unused python/pyspark/ml/tests/test_image.py:22:1: F401 'pyspark.testing.mlutils.PySparkTestCase' imported but unused python/pyspark/ml/tests/test_image.py:71:5: F401 'pyspark.ml.tests.test_image.*' imported but unused python/pyspark/ml/tests/test_persistence.py:456:5: F401 'pyspark.ml.tests.test_persistence.*' imported but unused python/pyspark/ml/tests/test_evaluation.py:56:5: F401 'pyspark.ml.tests.test_evaluation.*' imported but unused python/pyspark/ml/tests/test_stat.py:43:5: F401 'pyspark.ml.tests.test_stat.*' imported but unused python/pyspark/ml/tests/test_base.py:70:5: F401 'pyspark.ml.tests.test_base.*' imported but unused python/pyspark/ml/tests/test_param.py:20:1: F401 'sys' imported but unused python/pyspark/ml/tests/test_param.py:375:5: F401 'pyspark.ml.tests.test_param.*' imported but unused python/pyspark/ml/tests/test_pipeline.py:62:5: F401 'pyspark.ml.tests.test_pipeline.*' imported but unused python/pyspark/ml/tests/test_algorithms.py:333:5: F401 'pyspark.ml.tests.test_algorithms.*' imported but unused python/pyspark/ml/param/__init__.py:18:1: F401 'sys' imported but unused python/pyspark/resource/tests/test_resources.py:17:1: F401 'random' imported but unused python/pyspark/resource/tests/test_resources.py:20:1: F401 'pyspark.resource.ResourceProfile' imported but unused python/pyspark/resource/tests/test_resources.py:75:5: F401 'pyspark.resource.tests.test_resources.*' imported but unused python/pyspark/sql/functions.py:32:1: F401 'pyspark.sql.udf.UserDefinedFunction' imported but unused python/pyspark/sql/functions.py:34:1: F401 'pyspark.sql.pandas.functions.pandas_udf' imported but unused python/pyspark/sql/session.py:30:1: F401 'pyspark.sql.types.Row' imported but unused python/pyspark/sql/session.py:30:1: F401 'pyspark.sql.types.StringType' imported but unused python/pyspark/sql/readwriter.py:1084:5: F401 'pyspark.sql.Row' imported but unused python/pyspark/sql/context.py:26:1: F401 'pyspark.sql.types.IntegerType' imported but unused python/pyspark/sql/context.py:26:1: F401 'pyspark.sql.types.Row' imported but unused python/pyspark/sql/context.py:26:1: F401 'pyspark.sql.types.StringType' imported but unused python/pyspark/sql/context.py:27:1: F401 'pyspark.sql.udf.UDFRegistration' imported but unused python/pyspark/sql/streaming.py:1212:5: F401 'pyspark.sql.Row' imported but unused python/pyspark/sql/tests/test_utils.py:55:5: F401 'pyspark.sql.tests.test_utils.*' imported but unused python/pyspark/sql/tests/test_pandas_map.py:18:1: F401 'sys' imported but unused python/pyspark/sql/tests/test_pandas_map.py:22:1: F401 'pyspark.sql.functions.pandas_udf' imported but unused python/pyspark/sql/tests/test_pandas_map.py:22:1: F401 'pyspark.sql.functions.PandasUDFType' imported but unused python/pyspark/sql/tests/test_pandas_map.py:119:5: F401 'pyspark.sql.tests.test_pandas_map.*' imported but unused python/pyspark/sql/tests/test_catalog.py:193:5: F401 'pyspark.sql.tests.test_catalog.*' imported but unused python/pyspark/sql/tests/test_group.py:39:5: F401 'pyspark.sql.tests.test_group.*' imported but unused python/pyspark/sql/tests/test_session.py:361:5: F401 'pyspark.sql.tests.test_session.*' imported but unused python/pyspark/sql/tests/test_conf.py:49:5: F401 'pyspark.sql.tests.test_conf.*' imported but unused python/pyspark/sql/tests/test_pandas_cogrouped_map.py:19:1: F401 'sys' imported but unused python/pyspark/sql/tests/test_pandas_cogrouped_map.py:21:1: F401 'pyspark.sql.functions.sum' imported but unused python/pyspark/sql/tests/test_pandas_cogrouped_map.py:21:1: F401 'pyspark.sql.functions.PandasUDFType' imported but unused python/pyspark/sql/tests/test_pandas_cogrouped_map.py:29:5: F401 'pandas.util.testing.assert_series_equal' imported but unused python/pyspark/sql/tests/test_pandas_cogrouped_map.py:32:5: F401 'pyarrow as pa' imported but unused python/pyspark/sql/tests/test_pandas_cogrouped_map.py:248:5: F401 'pyspark.sql.tests.test_pandas_cogrouped_map.*' imported but unused python/pyspark/sql/tests/test_udf.py:24:1: F401 'py4j' imported but unused python/pyspark/sql/tests/test_pandas_udf_typehints.py:246:5: F401 'pyspark.sql.tests.test_pandas_udf_typehints.*' imported but unused python/pyspark/sql/tests/test_functions.py:19:1: F401 'sys' imported but unused python/pyspark/sql/tests/test_functions.py:362:9: F401 'pyspark.sql.functions.exists' imported but unused python/pyspark/sql/tests/test_functions.py:387:5: F401 'pyspark.sql.tests.test_functions.*' imported but unused python/pyspark/sql/tests/test_pandas_udf_scalar.py:21:1: F401 'sys' imported but unused python/pyspark/sql/tests/test_pandas_udf_scalar.py:45:5: F401 'pyarrow as pa' imported but unused python/pyspark/sql/tests/test_pandas_udf_window.py:355:5: F401 'pyspark.sql.tests.test_pandas_udf_window.*' imported but unused python/pyspark/sql/tests/test_arrow.py:38:5: F401 'pyarrow as pa' imported but unused python/pyspark/sql/tests/test_pandas_grouped_map.py:20:1: F401 'sys' imported but unused python/pyspark/sql/tests/test_pandas_grouped_map.py:38:5: F401 'pyarrow as pa' imported but unused python/pyspark/sql/tests/test_dataframe.py:382:9: F401 'pyspark.sql.DataFrame' imported but unused python/pyspark/sql/avro/functions.py:125:5: F401 'pyspark.sql.Row' imported but unused python/pyspark/sql/pandas/functions.py:19:1: F401 'sys' imported but unused ``` After: ``` fokkodriesprongFan spark % flake8 python | grep -i "imported but unused" fokkodriesprongFan spark % ``` ### What changes were proposed in this pull request? Removing unused imports from the Python files to keep everything nice and tidy. ### Why are the changes needed? Cleaning up of the imports that aren't used, and suppressing the imports that are used as references to other modules, preserving backward compatibility. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Adding the rule to the existing Flake8 checks. Closes #29121 from Fokko/SPARK-32319. Authored-by: Fokko Driesprong Signed-off-by: Dongjoon Hyun --- dev/create-release/releaseutils.py | 4 ++-- dev/lint-python | 3 +-- dev/pip-sanity-check.py | 1 - dev/run-tests.py | 1 - dev/tox.ini | 5 +++++ examples/src/main/python/sql/hive.py | 2 +- examples/src/main/python/status_api_demo.py | 1 - python/pyspark/__init__.py | 6 +++--- python/pyspark/ml/__init__.py | 2 +- python/pyspark/ml/param/__init__.py | 1 - python/pyspark/ml/pipeline.py | 2 -- python/pyspark/ml/regression.py | 8 +++----- python/pyspark/ml/stat.py | 4 ++-- python/pyspark/ml/tests/test_algorithms.py | 2 +- python/pyspark/ml/tests/test_base.py | 2 +- python/pyspark/ml/tests/test_evaluation.py | 2 +- python/pyspark/ml/tests/test_feature.py | 3 +-- python/pyspark/ml/tests/test_image.py | 6 ++---- python/pyspark/ml/tests/test_linalg.py | 2 +- python/pyspark/ml/tests/test_param.py | 3 +-- python/pyspark/ml/tests/test_persistence.py | 2 +- python/pyspark/ml/tests/test_pipeline.py | 2 +- python/pyspark/ml/tests/test_stat.py | 2 +- python/pyspark/ml/tests/test_training_summary.py | 3 +-- python/pyspark/ml/tests/test_tuning.py | 4 +--- python/pyspark/ml/tests/test_wrapper.py | 2 +- python/pyspark/ml/util.py | 1 - python/pyspark/mllib/classification.py | 5 ++++- python/pyspark/mllib/clustering.py | 2 +- python/pyspark/mllib/feature.py | 7 ++++--- python/pyspark/mllib/regression.py | 6 +++++- python/pyspark/mllib/tests/test_algorithms.py | 2 +- python/pyspark/mllib/tests/test_feature.py | 6 +++--- python/pyspark/mllib/tests/test_linalg.py | 3 +-- python/pyspark/mllib/tests/test_stat.py | 5 ++--- python/pyspark/mllib/tests/test_streaming_algorithms.py | 3 +-- python/pyspark/mllib/tests/test_util.py | 2 +- python/pyspark/rdd.py | 6 ++++-- python/pyspark/resource/tests/test_resources.py | 5 ++--- python/pyspark/sql/avro/functions.py | 2 +- python/pyspark/sql/context.py | 4 ++-- python/pyspark/sql/functions.py | 5 +++-- python/pyspark/sql/pandas/functions.py | 1 - python/pyspark/sql/readwriter.py | 2 +- python/pyspark/sql/session.py | 3 ++- python/pyspark/sql/streaming.py | 2 +- python/pyspark/sql/tests/test_arrow.py | 2 +- python/pyspark/sql/tests/test_catalog.py | 2 +- python/pyspark/sql/tests/test_column.py | 2 +- python/pyspark/sql/tests/test_conf.py | 2 +- python/pyspark/sql/tests/test_context.py | 2 +- python/pyspark/sql/tests/test_dataframe.py | 2 -- python/pyspark/sql/tests/test_datasources.py | 2 +- python/pyspark/sql/tests/test_functions.py | 5 ++--- python/pyspark/sql/tests/test_group.py | 2 +- python/pyspark/sql/tests/test_pandas_cogrouped_map.py | 9 ++++----- python/pyspark/sql/tests/test_pandas_grouped_map.py | 3 +-- python/pyspark/sql/tests/test_pandas_map.py | 4 +--- python/pyspark/sql/tests/test_pandas_udf.py | 2 +- python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py | 2 +- python/pyspark/sql/tests/test_pandas_udf_scalar.py | 3 +-- python/pyspark/sql/tests/test_pandas_udf_typehints.py | 2 +- python/pyspark/sql/tests/test_pandas_udf_window.py | 2 +- python/pyspark/sql/tests/test_readwriter.py | 2 +- python/pyspark/sql/tests/test_serde.py | 2 +- python/pyspark/sql/tests/test_session.py | 2 +- python/pyspark/sql/tests/test_streaming.py | 2 +- python/pyspark/sql/tests/test_types.py | 2 +- python/pyspark/sql/tests/test_udf.py | 2 -- python/pyspark/sql/tests/test_utils.py | 2 +- python/pyspark/streaming/dstream.py | 1 - python/pyspark/streaming/tests/test_context.py | 2 +- python/pyspark/streaming/tests/test_dstream.py | 4 ++-- python/pyspark/streaming/tests/test_kinesis.py | 2 +- python/pyspark/streaming/tests/test_listener.py | 2 +- python/pyspark/testing/utils.py | 4 ++-- python/pyspark/tests/test_appsubmit.py | 2 +- python/pyspark/tests/test_broadcast.py | 2 +- python/pyspark/tests/test_conf.py | 2 +- python/pyspark/tests/test_context.py | 6 +++--- python/pyspark/tests/test_daemon.py | 2 +- python/pyspark/tests/test_join.py | 2 +- python/pyspark/tests/test_pin_thread.py | 3 +-- python/pyspark/tests/test_profiler.py | 2 +- python/pyspark/tests/test_rdd.py | 5 ++--- python/pyspark/tests/test_rddbarrier.py | 2 +- python/pyspark/tests/test_readwrite.py | 4 +--- python/pyspark/tests/test_serializers.py | 2 +- python/pyspark/tests/test_shuffle.py | 3 +-- python/pyspark/tests/test_taskcontext.py | 3 +-- python/pyspark/tests/test_util.py | 2 +- python/pyspark/tests/test_worker.py | 5 ++--- 92 files changed, 124 insertions(+), 147 deletions(-) diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py index 9f1dffbd09..c6b3176a21 100755 --- a/dev/create-release/releaseutils.py +++ b/dev/create-release/releaseutils.py @@ -23,7 +23,7 @@ import sys from subprocess import Popen, PIPE try: - from jira.client import JIRA + from jira.client import JIRA # noqa: F401 # Old versions have JIRAError in exceptions package, new (0.5+) in utils. try: from jira.exceptions import JIRAError @@ -35,7 +35,7 @@ except ImportError: sys.exit(-1) try: - from github import Github + from github import Github # noqa: F401 from github import GithubException except ImportError: print("This tool requires the PyGithub library") diff --git a/dev/lint-python b/dev/lint-python index 41da41bfda..07897eb499 100755 --- a/dev/lint-python +++ b/dev/lint-python @@ -147,8 +147,7 @@ flake8 checks failed." fi echo "starting $FLAKE8_BUILD test..." - FLAKE8_REPORT=$( ($FLAKE8_BUILD . --count --select=E901,E999,F821,F822,F823 \ - --max-line-length=100 --show-source --statistics) 2>&1) + FLAKE8_REPORT=$( ($FLAKE8_BUILD --append-config dev/tox.ini --count --show-source --statistics .) 2>&1) FLAKE8_STATUS=$? if [ "$FLAKE8_STATUS" -ne 0 ]; then diff --git a/dev/pip-sanity-check.py b/dev/pip-sanity-check.py index e9f10233b1..469e27b78b 100644 --- a/dev/pip-sanity-check.py +++ b/dev/pip-sanity-check.py @@ -16,7 +16,6 @@ # from pyspark.sql import SparkSession -from pyspark.mllib.linalg import * import sys if __name__ == "__main__": diff --git a/dev/run-tests.py b/dev/run-tests.py index 6aae3bdaef..93023d41e2 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -26,7 +26,6 @@ import sys import subprocess import glob import shutil -from collections import namedtuple from sparktestsupport import SPARK_HOME, USER_HOME, ERROR_CODES from sparktestsupport.shellutils import exit_from_command_with_retcode, run_cmd, rm_r, which diff --git a/dev/tox.ini b/dev/tox.ini index 5bf27d1abd..e8e44803bd 100644 --- a/dev/tox.ini +++ b/dev/tox.ini @@ -17,3 +17,8 @@ ignore=E226,E241,E305,E402,E722,E731,E741,W503,W504 max-line-length=100 exclude=python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/* + +[flake8] +select = E901,E999,F821,F822,F823,F401 +exclude = python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/* +max-line-length = 100 diff --git a/examples/src/main/python/sql/hive.py b/examples/src/main/python/sql/hive.py index bc23dcd9bd..fa1b975e2b 100644 --- a/examples/src/main/python/sql/hive.py +++ b/examples/src/main/python/sql/hive.py @@ -21,7 +21,7 @@ Run with: ./bin/spark-submit examples/src/main/python/sql/hive.py """ # $example on:spark_hive$ -from os.path import join, abspath +from os.path import abspath from pyspark.sql import SparkSession from pyspark.sql import Row diff --git a/examples/src/main/python/status_api_demo.py b/examples/src/main/python/status_api_demo.py index 7b408c8726..ae39cef6eb 100644 --- a/examples/src/main/python/status_api_demo.py +++ b/examples/src/main/python/status_api_demo.py @@ -17,7 +17,6 @@ import time import threading -import sys import queue as Queue from pyspark import SparkConf, SparkContext diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index 61e38fdb2a..f84c01e505 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -61,8 +61,8 @@ from pyspark.serializers import MarshalSerializer, PickleSerializer from pyspark.status import * from pyspark.taskcontext import TaskContext, BarrierTaskContext, BarrierTaskInfo from pyspark.profiler import Profiler, BasicProfiler -from pyspark.version import __version__ -from pyspark._globals import _NoValue +from pyspark.version import __version__ # noqa: F401 +from pyspark._globals import _NoValue # noqa: F401 def since(version): @@ -115,7 +115,7 @@ def keyword_only(func): # for back compatibility -from pyspark.sql import SQLContext, HiveContext, Row +from pyspark.sql import SQLContext, HiveContext, Row # noqa: F401 __all__ = [ "SparkConf", "SparkContext", "SparkFiles", "RDD", "StorageLevel", "Broadcast", diff --git a/python/pyspark/ml/__init__.py b/python/pyspark/ml/__init__.py index 47fc78e83f..7d0e55a922 100644 --- a/python/pyspark/ml/__init__.py +++ b/python/pyspark/ml/__init__.py @@ -23,7 +23,7 @@ from pyspark.ml.base import Estimator, Model, Predictor, PredictionModel, \ Transformer, UnaryTransformer from pyspark.ml.pipeline import Pipeline, PipelineModel from pyspark.ml import classification, clustering, evaluation, feature, fpm, \ - image, pipeline, recommendation, regression, stat, tuning, util, linalg, param + image, recommendation, regression, stat, tuning, util, linalg, param __all__ = [ "Transformer", "UnaryTransformer", "Estimator", "Model", diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index 96b07bfa5f..95f3c32b8b 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -15,7 +15,6 @@ # limitations under the License. # import array -import sys from abc import ABCMeta import copy diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py index eacb8b82b5..937237fb97 100644 --- a/python/pyspark/ml/pipeline.py +++ b/python/pyspark/ml/pipeline.py @@ -15,8 +15,6 @@ # limitations under the License. # -import sys - from pyspark import keyword_only from pyspark.ml.base import Estimator, Model, Transformer from pyspark.ml.param import Param, Params diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 4a8d1530b8..6d88b97e8f 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -15,18 +15,16 @@ # limitations under the License. # -import sys from abc import ABCMeta -from pyspark import since, keyword_only +from pyspark import keyword_only from pyspark.ml import Predictor, PredictionModel from pyspark.ml.base import _PredictorParams from pyspark.ml.param.shared import * from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \ - _TreeEnsembleModel, _TreeEnsembleParams, _RandomForestParams, _GBTParams, \ - _HasVarianceImpurity, _TreeRegressorParams + _TreeEnsembleModel, _RandomForestParams, _GBTParams, _TreeRegressorParams from pyspark.ml.util import * -from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, \ +from pyspark.ml.wrapper import JavaEstimator, JavaModel, \ JavaPredictor, JavaPredictionModel, JavaWrapper from pyspark.ml.common import inherit_doc from pyspark.sql import DataFrame diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py index 517c984252..caa847a6a4 100644 --- a/python/pyspark/ml/stat.py +++ b/python/pyspark/ml/stat.py @@ -19,7 +19,6 @@ import sys from pyspark import since, SparkContext from pyspark.ml.common import _java2py, _py2java -from pyspark.ml.linalg import DenseMatrix, Vectors from pyspark.ml.wrapper import JavaWrapper, _jvm from pyspark.sql.column import Column, _to_seq from pyspark.sql.functions import lit @@ -121,7 +120,7 @@ class Correlation(object): DataFrame contains a single row and a single column of name '$METHODNAME($COLUMN)'. - >>> from pyspark.ml.linalg import Vectors + >>> from pyspark.ml.linalg import DenseMatrix, Vectors >>> from pyspark.ml.stat import Correlation >>> dataset = [[Vectors.dense([1, 0, 0, -2])], ... [Vectors.dense([4, 5, 0, 3])], @@ -412,6 +411,7 @@ class SummaryBuilder(JavaWrapper): class MultivariateGaussian(object): """Represents a (mean, cov) tuple + >>> from pyspark.ml.linalg import DenseMatrix, Vectors >>> m = MultivariateGaussian(Vectors.dense([11,12]), DenseMatrix(2, 2, (1.0, 3.0, 5.0, 2.0))) >>> (m.mean, m.cov.toArray()) (DenseVector([11.0, 12.0]), array([[ 1., 5.], diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py index c948bd0c64..492e849658 100644 --- a/python/pyspark/ml/tests/test_algorithms.py +++ b/python/pyspark/ml/tests/test_algorithms.py @@ -330,7 +330,7 @@ class LinearRegressionTest(SparkSessionTestCase): if __name__ == "__main__": - from pyspark.ml.tests.test_algorithms import * + from pyspark.ml.tests.test_algorithms import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/ml/tests/test_base.py b/python/pyspark/ml/tests/test_base.py index 1b7d1c7585..cba5369ca2 100644 --- a/python/pyspark/ml/tests/test_base.py +++ b/python/pyspark/ml/tests/test_base.py @@ -67,7 +67,7 @@ class EstimatorTest(unittest.TestCase): if __name__ == "__main__": - from pyspark.ml.tests.test_base import * + from pyspark.ml.tests.test_base import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/ml/tests/test_evaluation.py b/python/pyspark/ml/tests/test_evaluation.py index fdd6ee7a53..7883df7882 100644 --- a/python/pyspark/ml/tests/test_evaluation.py +++ b/python/pyspark/ml/tests/test_evaluation.py @@ -53,7 +53,7 @@ class EvaluatorTests(SparkSessionTestCase): if __name__ == "__main__": - from pyspark.ml.tests.test_evaluation import * + from pyspark.ml.tests.test_evaluation import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/ml/tests/test_feature.py b/python/pyspark/ml/tests/test_feature.py index 7856a317c2..7fd8c0b669 100644 --- a/python/pyspark/ml/tests/test_feature.py +++ b/python/pyspark/ml/tests/test_feature.py @@ -16,7 +16,6 @@ # limitations under the License. # -import sys import unittest from pyspark.ml.feature import Binarizer, CountVectorizer, CountVectorizerModel, HashingTF, IDF, \ @@ -301,7 +300,7 @@ class HashingTFTest(SparkSessionTestCase): if __name__ == "__main__": - from pyspark.ml.tests.test_feature import * + from pyspark.ml.tests.test_feature import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/ml/tests/test_image.py b/python/pyspark/ml/tests/test_image.py index 5cc2a815ea..069ffceb50 100644 --- a/python/pyspark/ml/tests/test_image.py +++ b/python/pyspark/ml/tests/test_image.py @@ -16,10 +16,8 @@ # import unittest -import py4j - from pyspark.ml.image import ImageSchema -from pyspark.testing.mlutils import PySparkTestCase, SparkSessionTestCase +from pyspark.testing.mlutils import SparkSessionTestCase from pyspark.sql import Row from pyspark.testing.utils import QuietTest @@ -68,7 +66,7 @@ class ImageFileFormatTest(SparkSessionTestCase): if __name__ == "__main__": - from pyspark.ml.tests.test_image import * + from pyspark.ml.tests.test_image import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/ml/tests/test_linalg.py b/python/pyspark/ml/tests/test_linalg.py index 2cba5396f2..60dda82fe0 100644 --- a/python/pyspark/ml/tests/test_linalg.py +++ b/python/pyspark/ml/tests/test_linalg.py @@ -378,7 +378,7 @@ class MatrixUDTTests(MLlibTestCase): if __name__ == "__main__": - from pyspark.ml.tests.test_linalg import * + from pyspark.ml.tests.test_linalg import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/ml/tests/test_param.py b/python/pyspark/ml/tests/test_param.py index 44731568b6..abee6d1be5 100644 --- a/python/pyspark/ml/tests/test_param.py +++ b/python/pyspark/ml/tests/test_param.py @@ -17,7 +17,6 @@ # import inspect -import sys import array as pyarray import unittest @@ -370,7 +369,7 @@ class DefaultValuesTests(PySparkTestCase): if __name__ == "__main__": - from pyspark.ml.tests.test_param import * + from pyspark.ml.tests.test_param import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/ml/tests/test_persistence.py b/python/pyspark/ml/tests/test_persistence.py index 2f6d451851..4acf58da21 100644 --- a/python/pyspark/ml/tests/test_persistence.py +++ b/python/pyspark/ml/tests/test_persistence.py @@ -453,7 +453,7 @@ class PersistenceTest(SparkSessionTestCase): if __name__ == "__main__": - from pyspark.ml.tests.test_persistence import * + from pyspark.ml.tests.test_persistence import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/ml/tests/test_pipeline.py b/python/pyspark/ml/tests/test_pipeline.py index 18a8b7d3b0..011e6537a8 100644 --- a/python/pyspark/ml/tests/test_pipeline.py +++ b/python/pyspark/ml/tests/test_pipeline.py @@ -59,7 +59,7 @@ class PipelineTests(PySparkTestCase): if __name__ == "__main__": - from pyspark.ml.tests.test_pipeline import * + from pyspark.ml.tests.test_pipeline import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/ml/tests/test_stat.py b/python/pyspark/ml/tests/test_stat.py index d583da2e8a..666d0aec58 100644 --- a/python/pyspark/ml/tests/test_stat.py +++ b/python/pyspark/ml/tests/test_stat.py @@ -40,7 +40,7 @@ class ChiSquareTestTests(SparkSessionTestCase): if __name__ == "__main__": - from pyspark.ml.tests.test_stat import * + from pyspark.ml.tests.test_stat import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/ml/tests/test_training_summary.py b/python/pyspark/ml/tests/test_training_summary.py index 6b05ffaa7d..cb0effbe2b 100644 --- a/python/pyspark/ml/tests/test_training_summary.py +++ b/python/pyspark/ml/tests/test_training_summary.py @@ -15,7 +15,6 @@ # limitations under the License. # -import sys import unittest from pyspark.ml.classification import BinaryLogisticRegressionSummary, \ @@ -443,7 +442,7 @@ class TrainingSummaryTest(SparkSessionTestCase): if __name__ == "__main__": - from pyspark.ml.tests.test_training_summary import * + from pyspark.ml.tests.test_training_summary import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py index 0aa5d47ca6..66f1ea20a4 100644 --- a/python/pyspark/ml/tests/test_tuning.py +++ b/python/pyspark/ml/tests/test_tuning.py @@ -424,8 +424,6 @@ class CrossValidatorTests(SparkSessionTestCase): self.assertEqual(loadedCV.getFoldCol(), cv_with_user_folds.getFoldCol()) def test_invalid_user_specified_folds(self): - from pyspark.sql import functions as F - dataset_with_folds = self.spark.createDataFrame( [(Vectors.dense([0.0]), 0.0, 0), (Vectors.dense([0.4]), 1.0, 1), @@ -754,7 +752,7 @@ class TrainValidationSplitTests(SparkSessionTestCase): if __name__ == "__main__": - from pyspark.ml.tests.test_tuning import * + from pyspark.ml.tests.test_tuning import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/ml/tests/test_wrapper.py b/python/pyspark/ml/tests/test_wrapper.py index c0747155cb..e6eef8a7de 100644 --- a/python/pyspark/ml/tests/test_wrapper.py +++ b/python/pyspark/ml/tests/test_wrapper.py @@ -117,7 +117,7 @@ class WrapperTests(MLlibTestCase): self.assertEqual(_java2py(self.sc, java_array), expected_str_list) if __name__ == "__main__": - from pyspark.ml.tests.test_wrapper import * + from pyspark.ml.tests.test_wrapper import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py index 9ab6bfa9ba..f5f4584231 100644 --- a/python/pyspark/ml/util.py +++ b/python/pyspark/ml/util.py @@ -16,7 +16,6 @@ # import json -import sys import os import time import uuid diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index c52da2ad63..bbca216cce 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -23,7 +23,7 @@ import numpy from pyspark import RDD, since from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py -from pyspark.mllib.linalg import SparseVector, _convert_to_vector +from pyspark.mllib.linalg import _convert_to_vector from pyspark.mllib.regression import ( LabeledPoint, LinearModel, _regression_train_wrapper, StreamingLinearAlgorithm) @@ -102,6 +102,7 @@ class LogisticRegressionModel(LinearClassificationModel): in Multinomial Logistic Regression. By default, it is binary logistic regression so numClasses will be set to 2. + >>> from pyspark.mllib.linalg import SparseVector >>> data = [ ... LabeledPoint(0.0, [0.0, 1.0]), ... LabeledPoint(1.0, [1.0, 0.0]), @@ -410,6 +411,7 @@ class SVMModel(LinearClassificationModel): :param intercept: Intercept computed for this model. + >>> from pyspark.mllib.linalg import SparseVector >>> data = [ ... LabeledPoint(0.0, [0.0]), ... LabeledPoint(1.0, [1.0]), @@ -569,6 +571,7 @@ class NaiveBayesModel(Saveable, Loader): Log of class conditional probabilities, whose dimension is C-by-D, where D is number of features. + >>> from pyspark.mllib.linalg import SparseVector >>> data = [ ... LabeledPoint(0.0, [0.0, 0.0]), ... LabeledPoint(0.0, [0.0, 1.0]), diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index 85cfe583fd..b99a4150c3 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -25,7 +25,7 @@ from numpy import array, random, tile from pyspark import SparkContext, since from pyspark.rdd import RDD from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc, callJavaFunc, _py2java, _java2py -from pyspark.mllib.linalg import SparseVector, _convert_to_vector, DenseVector +from pyspark.mllib.linalg import SparseVector, _convert_to_vector, DenseVector # noqa: F401 from pyspark.mllib.stat.distribution import MultivariateGaussian from pyspark.mllib.util import Saveable, Loader, inherit_doc, JavaLoader, JavaSaveable from pyspark.streaming import DStream diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index 80a197eaa7..d95f9197ea 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -25,9 +25,7 @@ from py4j.protocol import Py4JJavaError from pyspark import since from pyspark.rdd import RDD from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper -from pyspark.mllib.linalg import ( - Vectors, DenseVector, SparseVector, _convert_to_vector) -from pyspark.mllib.regression import LabeledPoint +from pyspark.mllib.linalg import Vectors, _convert_to_vector from pyspark.mllib.util import JavaLoader, JavaSaveable __all__ = ['Normalizer', 'StandardScalerModel', 'StandardScaler', @@ -60,6 +58,7 @@ class Normalizer(VectorTransformer): :param p: Normalization in L^p^ space, p = 2 by default. + >>> from pyspark.mllib.linalg import Vectors >>> v = Vectors.dense(range(3)) >>> nor = Normalizer(1) >>> nor.transform(v) @@ -285,6 +284,8 @@ class ChiSqSelector(object): By default, the selection method is `numTopFeatures`, with the default number of top features set to 50. + >>> from pyspark.mllib.linalg import SparseVector, DenseVector + >>> from pyspark.mllib.regression import LabeledPoint >>> data = sc.parallelize([ ... LabeledPoint(0.0, SparseVector(3, {0: 8.0, 1: 7.0})), ... LabeledPoint(1.0, SparseVector(3, {1: 9.0, 2: 6.0})), diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 56ee0083ab..77bca86ac1 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -23,7 +23,7 @@ import numpy as np from pyspark import RDD, since from pyspark.streaming.dstream import DStream from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py, inherit_doc -from pyspark.mllib.linalg import SparseVector, _convert_to_vector +from pyspark.mllib.linalg import _convert_to_vector from pyspark.mllib.util import Saveable, Loader __all__ = ['LabeledPoint', 'LinearModel', @@ -102,6 +102,7 @@ class LinearRegressionModelBase(LinearModel): """A linear regression model. + >>> from pyspark.mllib.linalg import SparseVector >>> lrmb = LinearRegressionModelBase(np.array([1.0, 2.0]), 0.1) >>> abs(lrmb.predict(np.array([-1.03, 7.777])) - 14.624) < 1e-6 True @@ -128,6 +129,7 @@ class LinearRegressionModel(LinearRegressionModelBase): """A linear regression model derived from a least-squares fit. + >>> from pyspark.mllib.linalg import SparseVector >>> from pyspark.mllib.regression import LabeledPoint >>> data = [ ... LabeledPoint(0.0, [0.0]), @@ -297,6 +299,7 @@ class LassoModel(LinearRegressionModelBase): """A linear regression model derived from a least-squares fit with an l_1 penalty term. + >>> from pyspark.mllib.linalg import SparseVector >>> from pyspark.mllib.regression import LabeledPoint >>> data = [ ... LabeledPoint(0.0, [0.0]), @@ -441,6 +444,7 @@ class RidgeRegressionModel(LinearRegressionModelBase): """A linear regression model derived from a least-squares fit with an l_2 penalty term. + >>> from pyspark.mllib.linalg import SparseVector >>> from pyspark.mllib.regression import LabeledPoint >>> data = [ ... LabeledPoint(0.0, [0.0]), diff --git a/python/pyspark/mllib/tests/test_algorithms.py b/python/pyspark/mllib/tests/test_algorithms.py index 4e9dd6b3ba..27a340068a 100644 --- a/python/pyspark/mllib/tests/test_algorithms.py +++ b/python/pyspark/mllib/tests/test_algorithms.py @@ -292,7 +292,7 @@ class FPGrowthTest(MLlibTestCase): if __name__ == "__main__": - from pyspark.mllib.tests.test_algorithms import * + from pyspark.mllib.tests.test_algorithms import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/mllib/tests/test_feature.py b/python/pyspark/mllib/tests/test_feature.py index 9e1da0f860..165c1466dd 100644 --- a/python/pyspark/mllib/tests/test_feature.py +++ b/python/pyspark/mllib/tests/test_feature.py @@ -18,9 +18,9 @@ from math import sqrt import unittest -from numpy import array, random, exp, abs, tile +from numpy import array, abs, tile -from pyspark.mllib.linalg import Vector, SparseVector, DenseVector, VectorUDT, Vectors +from pyspark.mllib.linalg import SparseVector, DenseVector, Vectors from pyspark.mllib.linalg.distributed import RowMatrix from pyspark.mllib.feature import HashingTF, IDF, StandardScaler, ElementwiseProduct, Word2Vec from pyspark.testing.mllibutils import MLlibTestCase @@ -182,7 +182,7 @@ class DimensionalityReductionTests(MLlibTestCase): if __name__ == "__main__": - from pyspark.mllib.tests.test_feature import * + from pyspark.mllib.tests.test_feature import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/mllib/tests/test_linalg.py b/python/pyspark/mllib/tests/test_linalg.py index 21c2bb422a..0e25836599 100644 --- a/python/pyspark/mllib/tests/test_linalg.py +++ b/python/pyspark/mllib/tests/test_linalg.py @@ -15,7 +15,6 @@ # limitations under the License. # -import sys import array as pyarray import unittest @@ -639,7 +638,7 @@ class SciPyTests(MLlibTestCase): if __name__ == "__main__": - from pyspark.mllib.tests.test_linalg import * + from pyspark.mllib.tests.test_linalg import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/mllib/tests/test_stat.py b/python/pyspark/mllib/tests/test_stat.py index c222a33efb..6ed0589387 100644 --- a/python/pyspark/mllib/tests/test_stat.py +++ b/python/pyspark/mllib/tests/test_stat.py @@ -20,8 +20,7 @@ import unittest from numpy import array -from pyspark.mllib.linalg import Vector, SparseVector, DenseVector, VectorUDT, _convert_to_vector, \ - DenseMatrix, SparseMatrix, Vectors, Matrices, MatrixUDT +from pyspark.mllib.linalg import Vectors, Matrices from pyspark.mllib.random import RandomRDDs from pyspark.mllib.regression import LabeledPoint from pyspark.mllib.stat import Statistics @@ -178,7 +177,7 @@ class KolmogorovSmirnovTest(MLlibTestCase): if __name__ == "__main__": - from pyspark.mllib.tests.test_stat import * + from pyspark.mllib.tests.test_stat import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/mllib/tests/test_streaming_algorithms.py b/python/pyspark/mllib/tests/test_streaming_algorithms.py index 5818a7c088..666f6f4d86 100644 --- a/python/pyspark/mllib/tests/test_streaming_algorithms.py +++ b/python/pyspark/mllib/tests/test_streaming_algorithms.py @@ -15,7 +15,6 @@ # limitations under the License. # -from time import time, sleep import unittest from numpy import array, random, exp, dot, all, mean, abs @@ -467,7 +466,7 @@ class StreamingLinearRegressionWithTests(MLLibStreamingTestCase): if __name__ == "__main__": - from pyspark.mllib.tests.test_streaming_algorithms import * + from pyspark.mllib.tests.test_streaming_algorithms import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/mllib/tests/test_util.py b/python/pyspark/mllib/tests/test_util.py index 76bac6c5c0..12578e417b 100644 --- a/python/pyspark/mllib/tests/test_util.py +++ b/python/pyspark/mllib/tests/test_util.py @@ -94,7 +94,7 @@ class SerDeTest(MLlibTestCase): if __name__ == "__main__": - from pyspark.mllib.tests.test_util import * + from pyspark.mllib.tests.test_util import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 4ee486800f..ed4e387d1b 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -18,7 +18,6 @@ import copy import sys import os -import re import operator import shlex import warnings @@ -26,7 +25,6 @@ import heapq import bisect import random from subprocess import Popen, PIPE -from tempfile import NamedTemporaryFile from threading import Thread from collections import defaultdict from itertools import chain @@ -1566,6 +1564,7 @@ class RDD(object): used is :class:`pyspark.serializers.PickleSerializer`, default batch size is 10. + >>> from tempfile import NamedTemporaryFile >>> tmpFile = NamedTemporaryFile(delete=True) >>> tmpFile.close() >>> sc.parallelize([1, 2, 'spark', 'rdd']).saveAsPickleFile(tmpFile.name, 3) @@ -1586,6 +1585,7 @@ class RDD(object): :param compressionCodecClass: (None by default) string i.e. "org.apache.hadoop.io.compress.GzipCodec" + >>> from tempfile import NamedTemporaryFile >>> tempFile = NamedTemporaryFile(delete=True) >>> tempFile.close() >>> sc.parallelize(range(10)).saveAsTextFile(tempFile.name) @@ -1596,6 +1596,7 @@ class RDD(object): Empty lines are tolerated when saving to text files. + >>> from tempfile import NamedTemporaryFile >>> tempFile2 = NamedTemporaryFile(delete=True) >>> tempFile2.close() >>> sc.parallelize(['', 'foo', '', 'bar', '']).saveAsTextFile(tempFile2.name) @@ -1604,6 +1605,7 @@ class RDD(object): Using compressionCodecClass + >>> from tempfile import NamedTemporaryFile >>> tempFile3 = NamedTemporaryFile(delete=True) >>> tempFile3.close() >>> codec = "org.apache.hadoop.io.compress.GzipCodec" diff --git a/python/pyspark/resource/tests/test_resources.py b/python/pyspark/resource/tests/test_resources.py index 09c0d3ca20..c2b574c61a 100644 --- a/python/pyspark/resource/tests/test_resources.py +++ b/python/pyspark/resource/tests/test_resources.py @@ -14,10 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import random import unittest -from pyspark.resource import ExecutorResourceRequests, ResourceProfile, ResourceProfileBuilder,\ +from pyspark.resource import ExecutorResourceRequests, ResourceProfileBuilder,\ TaskResourceRequests @@ -73,7 +72,7 @@ class ResourceProfileTests(unittest.TestCase): if __name__ == "__main__": - from pyspark.resource.tests.test_resources import * + from pyspark.resource.tests.test_resources import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py index 974412ee4e..75fe4eaa07 100644 --- a/python/pyspark/sql/avro/functions.py +++ b/python/pyspark/sql/avro/functions.py @@ -122,7 +122,7 @@ def _test(): os.environ["PYSPARK_SUBMIT_ARGS"] = " ".join([jars_args, existing_args]) import doctest - from pyspark.sql import Row, SparkSession + from pyspark.sql import SparkSession import pyspark.sql.avro.functions globs = pyspark.sql.avro.functions.__dict__.copy() spark = SparkSession.builder\ diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 7fbcf85cb1..a9c5b3ba0c 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -23,8 +23,7 @@ from pyspark.sql.session import _monkey_patch_RDD, SparkSession from pyspark.sql.dataframe import DataFrame from pyspark.sql.readwriter import DataFrameReader from pyspark.sql.streaming import DataStreamReader -from pyspark.sql.types import IntegerType, Row, StringType -from pyspark.sql.udf import UDFRegistration +from pyspark.sql.udf import UDFRegistration # noqa: F401 from pyspark.sql.utils import install_exception_handler __all__ = ["SQLContext", "HiveContext"] @@ -53,6 +52,7 @@ class SQLContext(object): .. note:: Deprecated in 3.0.0. Use :func:`SparkSession.builder.getOrCreate()` instead. >>> from datetime import datetime + >>> from pyspark.sql import Row >>> sqlContext = SQLContext(sc) >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1, ... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1), diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 308642b136..cf63bfbdc3 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -29,9 +29,10 @@ from pyspark.sql.column import Column, _to_java_column, _to_seq, _create_column_ from pyspark.sql.dataframe import DataFrame from pyspark.sql.types import StringType, DataType # Keep UserDefinedFunction import for backwards compatible import; moved in SPARK-22409 -from pyspark.sql.udf import UserDefinedFunction, _create_udf +from pyspark.sql.udf import UserDefinedFunction, _create_udf # noqa: F401 +from pyspark.sql.udf import _create_udf # Keep pandas_udf and PandasUDFType import for backwards compatible import; moved in SPARK-28264 -from pyspark.sql.pandas.functions import pandas_udf, PandasUDFType +from pyspark.sql.pandas.functions import pandas_udf, PandasUDFType # noqa: F401 from pyspark.sql.utils import to_str # Note to developers: all of PySpark functions here take string as column names whenever possible. diff --git a/python/pyspark/sql/pandas/functions.py b/python/pyspark/sql/pandas/functions.py index ba4dec82d4..08665a72b0 100644 --- a/python/pyspark/sql/pandas/functions.py +++ b/python/pyspark/sql/pandas/functions.py @@ -16,7 +16,6 @@ # import functools -import sys import warnings from inspect import getfullargspec diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 8560ef78c1..a8a067875d 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -1246,7 +1246,7 @@ def _test(): import tempfile import py4j from pyspark.context import SparkContext - from pyspark.sql import SparkSession, Row + from pyspark.sql import SparkSession import pyspark.sql.readwriter os.chdir(os.environ["SPARK_HOME"]) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index a5d102712d..c86078c1b2 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -27,7 +27,7 @@ from pyspark.sql.dataframe import DataFrame from pyspark.sql.pandas.conversion import SparkConversionMixin from pyspark.sql.readwriter import DataFrameReader from pyspark.sql.streaming import DataStreamReader -from pyspark.sql.types import Row, DataType, StringType, StructType, \ +from pyspark.sql.types import DataType, StructType, \ _make_type_verifier, _infer_schema, _has_nulltype, _merge_type, _create_converter, \ _parse_datatype_string from pyspark.sql.utils import install_exception_handler @@ -192,6 +192,7 @@ class SparkSession(SparkConversionMixin): """Creates a new SparkSession. >>> from datetime import datetime + >>> from pyspark.sql import Row >>> spark = SparkSession(sc) >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1, ... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1), diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index b1832f420e..07413ff48a 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -1231,7 +1231,7 @@ def _test(): import doctest import os import tempfile - from pyspark.sql import Row, SparkSession, SQLContext + from pyspark.sql import SparkSession, SQLContext import pyspark.sql.streaming os.chdir(os.environ["SPARK_HOME"]) diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py index 148df9b7d4..620033fbce 100644 --- a/python/pyspark/sql/tests/test_arrow.py +++ b/python/pyspark/sql/tests/test_arrow.py @@ -35,7 +35,7 @@ if have_pandas: from pandas.util.testing import assert_frame_equal if have_pyarrow: - import pyarrow as pa + import pyarrow as pa # noqa: F401 @unittest.skipIf( diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py index ebe81d0325..106426eb55 100644 --- a/python/pyspark/sql/tests/test_catalog.py +++ b/python/pyspark/sql/tests/test_catalog.py @@ -190,7 +190,7 @@ class CatalogTests(ReusedSQLTestCase): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_catalog import * + from pyspark.sql.tests.test_catalog import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py index e0b8bf45a2..99100c7a76 100644 --- a/python/pyspark/sql/tests/test_column.py +++ b/python/pyspark/sql/tests/test_column.py @@ -142,7 +142,7 @@ class ColumnTests(ReusedSQLTestCase): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_column import * + from pyspark.sql.tests.test_column import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_conf.py b/python/pyspark/sql/tests/test_conf.py index 9ec10c4cb6..dd2e0be85d 100644 --- a/python/pyspark/sql/tests/test_conf.py +++ b/python/pyspark/sql/tests/test_conf.py @@ -46,7 +46,7 @@ class ConfTests(ReusedSQLTestCase): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_conf import * + from pyspark.sql.tests.test_conf import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_context.py b/python/pyspark/sql/tests/test_context.py index ff953ba4b4..85920eef9a 100644 --- a/python/pyspark/sql/tests/test_context.py +++ b/python/pyspark/sql/tests/test_context.py @@ -273,7 +273,7 @@ class SQLContextTests(unittest.TestCase): if __name__ == "__main__": - from pyspark.sql.tests.test_context import * + from pyspark.sql.tests.test_context import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index 30c3fd4c8d..747abdec67 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -379,8 +379,6 @@ class DataFrameTests(ReusedSQLTestCase): # add tests for SPARK-23647 (test more types for hint) def test_extended_hint_types(self): - from pyspark.sql import DataFrame - df = self.spark.range(10e10).toDF("id") such_a_nice_list = ["itworks1", "itworks2", "itworks3"] hinted_df = df.hint("my awesome hint", 1.2345, "what", such_a_nice_list) diff --git a/python/pyspark/sql/tests/test_datasources.py b/python/pyspark/sql/tests/test_datasources.py index a2e73ca610..1b466e294a 100644 --- a/python/pyspark/sql/tests/test_datasources.py +++ b/python/pyspark/sql/tests/test_datasources.py @@ -161,7 +161,7 @@ class DataSourcesTests(ReusedSQLTestCase): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_datasources import * + from pyspark.sql.tests.test_datasources import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 02180daf08..09f5960c6f 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -16,7 +16,6 @@ # import datetime -import sys from itertools import chain import re @@ -369,7 +368,7 @@ class FunctionsTests(ReusedSQLTestCase): self.assertListEqual(actual, expected) def test_higher_order_function_failures(self): - from pyspark.sql.functions import col, exists, transform + from pyspark.sql.functions import col, transform # Should fail with varargs with self.assertRaises(ValueError): @@ -394,7 +393,7 @@ class FunctionsTests(ReusedSQLTestCase): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_functions import * + from pyspark.sql.tests.test_functions import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_group.py b/python/pyspark/sql/tests/test_group.py index 3261fa1836..2fab7a08da 100644 --- a/python/pyspark/sql/tests/test_group.py +++ b/python/pyspark/sql/tests/test_group.py @@ -36,7 +36,7 @@ class GroupTests(ReusedSQLTestCase): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_group import * + from pyspark.sql.tests.test_group import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py index 24a73918d8..5013e2d4d6 100644 --- a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py +++ b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py @@ -16,9 +16,8 @@ # import unittest -import sys -from pyspark.sql.functions import array, explode, col, lit, udf, sum, pandas_udf, PandasUDFType +from pyspark.sql.functions import array, explode, col, lit, udf, pandas_udf from pyspark.sql.types import DoubleType, StructType, StructField, Row from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \ pandas_requirement_message, pyarrow_requirement_message @@ -26,10 +25,10 @@ from pyspark.testing.utils import QuietTest if have_pandas: import pandas as pd - from pandas.util.testing import assert_frame_equal, assert_series_equal + from pandas.util.testing import assert_frame_equal if have_pyarrow: - import pyarrow as pa + import pyarrow as pa # noqa: F401 @unittest.skipIf( @@ -245,7 +244,7 @@ class CogroupedMapInPandasTests(ReusedSQLTestCase): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_cogrouped_map import * + from pyspark.sql.tests.test_pandas_cogrouped_map import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py index 00cc9b3a64..d1e841f7d6 100644 --- a/python/pyspark/sql/tests/test_pandas_grouped_map.py +++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py @@ -17,7 +17,6 @@ import datetime import unittest -import sys from collections import OrderedDict from decimal import Decimal @@ -35,7 +34,7 @@ if have_pandas: from pandas.util.testing import assert_frame_equal if have_pyarrow: - import pyarrow as pa + import pyarrow as pa # noqa: F401 @unittest.skipIf( diff --git a/python/pyspark/sql/tests/test_pandas_map.py b/python/pyspark/sql/tests/test_pandas_map.py index 02ae6a86f9..bda370dffb 100644 --- a/python/pyspark/sql/tests/test_pandas_map.py +++ b/python/pyspark/sql/tests/test_pandas_map.py @@ -15,11 +15,9 @@ # limitations under the License. # import os -import sys import time import unittest -from pyspark.sql.functions import pandas_udf, PandasUDFType from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \ pandas_requirement_message, pyarrow_requirement_message @@ -116,7 +114,7 @@ class MapInPandasTests(ReusedSQLTestCase): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_map import * + from pyspark.sql.tests.test_pandas_map import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_pandas_udf.py b/python/pyspark/sql/tests/test_pandas_udf.py index 7fa65f0e79..97b4de3aec 100644 --- a/python/pyspark/sql/tests/test_pandas_udf.py +++ b/python/pyspark/sql/tests/test_pandas_udf.py @@ -241,7 +241,7 @@ class PandasUDFTests(ReusedSQLTestCase): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_udf import * + from pyspark.sql.tests.test_pandas_udf import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py index 224c8ce97f..4014a70df9 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py +++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py @@ -510,7 +510,7 @@ class GroupedAggPandasUDFTests(ReusedSQLTestCase): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_udf_grouped_agg import * + from pyspark.sql.tests.test_pandas_udf_grouped_agg import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py index 448e409b0c..951cef7f9e 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py +++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py @@ -17,7 +17,6 @@ import os import random import shutil -import sys import tempfile import time import unittest @@ -41,7 +40,7 @@ if have_pandas: import pandas as pd if have_pyarrow: - import pyarrow as pa + import pyarrow as pa # noqa: F401 @unittest.skipIf( diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints.py b/python/pyspark/sql/tests/test_pandas_udf_typehints.py index 618164fa84..7be81f8280 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_typehints.py +++ b/python/pyspark/sql/tests/test_pandas_udf_typehints.py @@ -243,7 +243,7 @@ class PandasUDFTypeHintsTests(ReusedSQLTestCase): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_udf_typehints import * + from pyspark.sql.tests.test_pandas_udf_typehints import * # noqa: #401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_pandas_udf_window.py b/python/pyspark/sql/tests/test_pandas_udf_window.py index 4c969abef4..6e59255da1 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_window.py +++ b/python/pyspark/sql/tests/test_pandas_udf_window.py @@ -352,7 +352,7 @@ class WindowPandasUDFTests(ReusedSQLTestCase): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_udf_window import * + from pyspark.sql.tests.test_pandas_udf_window import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py index 8e34d3865c..44b37ac1ac 100644 --- a/python/pyspark/sql/tests/test_readwriter.py +++ b/python/pyspark/sql/tests/test_readwriter.py @@ -201,7 +201,7 @@ class ReadwriterV2Tests(ReusedSQLTestCase): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_readwriter import * + from pyspark.sql.tests.test_readwriter import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py index 052a5b2835..6508a0f09f 100644 --- a/python/pyspark/sql/tests/test_serde.py +++ b/python/pyspark/sql/tests/test_serde.py @@ -139,7 +139,7 @@ class SerdeTests(ReusedSQLTestCase): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_serde import * + from pyspark.sql.tests.test_serde import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_session.py b/python/pyspark/sql/tests/test_session.py index 5e4166e6f8..d10f7bf906 100644 --- a/python/pyspark/sql/tests/test_session.py +++ b/python/pyspark/sql/tests/test_session.py @@ -358,7 +358,7 @@ class SparkExtensionsTest(unittest.TestCase): if __name__ == "__main__": - from pyspark.sql.tests.test_session import * + from pyspark.sql.tests.test_session import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/test_streaming.py index d19ca075f5..caac67d7ef 100644 --- a/python/pyspark/sql/tests/test_streaming.py +++ b/python/pyspark/sql/tests/test_streaming.py @@ -567,7 +567,7 @@ class StreamingTests(ReusedSQLTestCase): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_streaming import * + from pyspark.sql.tests.test_streaming import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 05e2717fda..68e4de8382 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -969,7 +969,7 @@ class DataTypeVerificationTests(unittest.TestCase): if __name__ == "__main__": - from pyspark.sql.tests.test_types import * + from pyspark.sql.tests.test_types import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py index d673f7c159..ff92e1e97e 100644 --- a/python/pyspark/sql/tests/test_udf.py +++ b/python/pyspark/sql/tests/test_udf.py @@ -21,8 +21,6 @@ import shutil import tempfile import unittest -import py4j - from pyspark import SparkContext from pyspark.sql import SparkSession, Column, Row from pyspark.sql.functions import UserDefinedFunction, udf diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py index 072ea08085..c6e7fcd8ec 100644 --- a/python/pyspark/sql/tests/test_utils.py +++ b/python/pyspark/sql/tests/test_utils.py @@ -52,7 +52,7 @@ class UtilsTests(ReusedSQLTestCase): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_utils import * + from pyspark.sql.tests.test_utils import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py index 000318588e..f0067026ff 100644 --- a/python/pyspark/streaming/dstream.py +++ b/python/pyspark/streaming/dstream.py @@ -15,7 +15,6 @@ # limitations under the License. # -import sys import operator import time from itertools import chain diff --git a/python/pyspark/streaming/tests/test_context.py b/python/pyspark/streaming/tests/test_context.py index 69a209ad87..26f1d24f64 100644 --- a/python/pyspark/streaming/tests/test_context.py +++ b/python/pyspark/streaming/tests/test_context.py @@ -175,7 +175,7 @@ class StreamingContextTests(PySparkStreamingTestCase): if __name__ == "__main__": import unittest - from pyspark.streaming.tests.test_context import * + from pyspark.streaming.tests.test_context import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/streaming/tests/test_dstream.py b/python/pyspark/streaming/tests/test_dstream.py index 89edb23070..00d00b50c9 100644 --- a/python/pyspark/streaming/tests/test_dstream.py +++ b/python/pyspark/streaming/tests/test_dstream.py @@ -24,7 +24,7 @@ from functools import reduce from itertools import chain import platform -from pyspark import SparkConf, SparkContext, RDD +from pyspark import SparkConf, SparkContext from pyspark.streaming import StreamingContext from pyspark.testing.streamingutils import PySparkStreamingTestCase @@ -644,7 +644,7 @@ class CheckpointTests(unittest.TestCase): if __name__ == "__main__": - from pyspark.streaming.tests.test_dstream import * + from pyspark.streaming.tests.test_dstream import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/streaming/tests/test_kinesis.py b/python/pyspark/streaming/tests/test_kinesis.py index a2da230821..b39809e2f6 100644 --- a/python/pyspark/streaming/tests/test_kinesis.py +++ b/python/pyspark/streaming/tests/test_kinesis.py @@ -80,7 +80,7 @@ class KinesisStreamTests(PySparkStreamingTestCase): if __name__ == "__main__": - from pyspark.streaming.tests.test_kinesis import * + from pyspark.streaming.tests.test_kinesis import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/streaming/tests/test_listener.py b/python/pyspark/streaming/tests/test_listener.py index 48c5783bf8..3970cf6589 100644 --- a/python/pyspark/streaming/tests/test_listener.py +++ b/python/pyspark/streaming/tests/test_listener.py @@ -149,7 +149,7 @@ class StreamingListenerTests(PySparkStreamingTestCase): if __name__ == "__main__": import unittest - from pyspark.streaming.tests.test_listener import * + from pyspark.streaming.tests.test_listener import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py index cda902b6f4..ca5761e8cd 100644 --- a/python/pyspark/testing/utils.py +++ b/python/pyspark/testing/utils.py @@ -27,13 +27,13 @@ from pyspark import SparkContext, SparkConf have_scipy = False have_numpy = False try: - import scipy.sparse + import scipy.sparse # noqa: F401 have_scipy = True except: # No SciPy, but that's okay, we'll skip those tests pass try: - import numpy as np + import numpy as np # noqa: F401 have_numpy = True except: # No NumPy, but that's okay, we'll skip those tests diff --git a/python/pyspark/tests/test_appsubmit.py b/python/pyspark/tests/test_appsubmit.py index 0eff514829..15170b878e 100644 --- a/python/pyspark/tests/test_appsubmit.py +++ b/python/pyspark/tests/test_appsubmit.py @@ -238,7 +238,7 @@ class SparkSubmitTests(unittest.TestCase): if __name__ == "__main__": - from pyspark.tests.test_appsubmit import * + from pyspark.tests.test_appsubmit import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_broadcast.py b/python/pyspark/tests/test_broadcast.py index 02b0d799bd..543dc98660 100644 --- a/python/pyspark/tests/test_broadcast.py +++ b/python/pyspark/tests/test_broadcast.py @@ -145,7 +145,7 @@ class BroadcastFrameProtocolTest(unittest.TestCase): if __name__ == '__main__': - from pyspark.tests.test_broadcast import * + from pyspark.tests.test_broadcast import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_conf.py b/python/pyspark/tests/test_conf.py index d51fd3d1f4..3e80c17f49 100644 --- a/python/pyspark/tests/test_conf.py +++ b/python/pyspark/tests/test_conf.py @@ -33,7 +33,7 @@ class ConfTests(unittest.TestCase): if __name__ == "__main__": - from pyspark.tests.test_conf import * + from pyspark.tests.test_conf import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_context.py b/python/pyspark/tests/test_context.py index f398cec344..9f159f7703 100644 --- a/python/pyspark/tests/test_context.py +++ b/python/pyspark/tests/test_context.py @@ -126,7 +126,7 @@ class AddFileTests(PySparkTestCase): # To ensure that we're actually testing addPyFile's effects, check that # this fails due to `userlibrary` not being on the Python path: def func(): - from userlibrary import UserClass + from userlibrary import UserClass # noqa: F401 self.assertRaises(ImportError, func) path = os.path.join(SPARK_HOME, "python/test_support/userlibrary.py") self.sc.addPyFile(path) @@ -137,7 +137,7 @@ class AddFileTests(PySparkTestCase): # To ensure that we're actually testing addPyFile's effects, check that # this fails due to `userlibrary` not being on the Python path: def func(): - from userlib import UserClass + from userlib import UserClass # noqa: F401 self.assertRaises(ImportError, func) path = os.path.join(SPARK_HOME, "python/test_support/userlib-0.1.zip") self.sc.addPyFile(path) @@ -318,7 +318,7 @@ class ContextTestsWithResources(unittest.TestCase): if __name__ == "__main__": - from pyspark.tests.test_context import * + from pyspark.tests.test_context import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_daemon.py b/python/pyspark/tests/test_daemon.py index 898fb39d9e..b1f8c71c77 100644 --- a/python/pyspark/tests/test_daemon.py +++ b/python/pyspark/tests/test_daemon.py @@ -73,7 +73,7 @@ class DaemonTests(unittest.TestCase): if __name__ == "__main__": - from pyspark.tests.test_daemon import * + from pyspark.tests.test_daemon import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_join.py b/python/pyspark/tests/test_join.py index 138d062e72..815c78ef9a 100644 --- a/python/pyspark/tests/test_join.py +++ b/python/pyspark/tests/test_join.py @@ -59,7 +59,7 @@ class JoinTests(ReusedPySparkTestCase): if __name__ == "__main__": import unittest - from pyspark.tests.test_join import * + from pyspark.tests.test_join import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_pin_thread.py b/python/pyspark/tests/test_pin_thread.py index 50eb8e0ec8..efe7d7f663 100644 --- a/python/pyspark/tests/test_pin_thread.py +++ b/python/pyspark/tests/test_pin_thread.py @@ -16,7 +16,6 @@ # import os import time -import random import threading import unittest @@ -167,7 +166,7 @@ class PinThreadTests(unittest.TestCase): if __name__ == "__main__": import unittest - from pyspark.tests.test_pin_thread import * + from pyspark.tests.test_pin_thread import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_profiler.py b/python/pyspark/tests/test_profiler.py index dbce72a0d3..ca144cc6e1 100644 --- a/python/pyspark/tests/test_profiler.py +++ b/python/pyspark/tests/test_profiler.py @@ -98,7 +98,7 @@ class ProfilerTests2(unittest.TestCase): if __name__ == "__main__": - from pyspark.tests.test_profiler import * + from pyspark.tests.test_profiler import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py index 1a580e27ea..c154bda00d 100644 --- a/python/pyspark/tests/test_rdd.py +++ b/python/pyspark/tests/test_rdd.py @@ -18,7 +18,6 @@ from datetime import datetime, timedelta import hashlib import os import random -import sys import tempfile import time from glob import glob @@ -26,7 +25,7 @@ from glob import glob from py4j.protocol import Py4JJavaError from pyspark import shuffle, RDD -from pyspark.resource import ExecutorResourceRequests, ResourceProfile, ResourceProfileBuilder,\ +from pyspark.resource import ExecutorResourceRequests, ResourceProfileBuilder,\ TaskResourceRequests from pyspark.serializers import CloudPickleSerializer, BatchedSerializer, PickleSerializer,\ MarshalSerializer, UTF8Deserializer, NoOpSerializer @@ -882,7 +881,7 @@ class RDDTests(ReusedPySparkTestCase): if __name__ == "__main__": import unittest - from pyspark.tests.test_rdd import * + from pyspark.tests.test_rdd import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_rddbarrier.py b/python/pyspark/tests/test_rddbarrier.py index 8534fb4abb..f0a05a23cc 100644 --- a/python/pyspark/tests/test_rddbarrier.py +++ b/python/pyspark/tests/test_rddbarrier.py @@ -40,7 +40,7 @@ class RDDBarrierTests(ReusedPySparkTestCase): if __name__ == "__main__": import unittest - from pyspark.tests.test_rddbarrier import * + from pyspark.tests.test_rddbarrier import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_readwrite.py b/python/pyspark/tests/test_readwrite.py index faa006c7d8..adbc343c65 100644 --- a/python/pyspark/tests/test_readwrite.py +++ b/python/pyspark/tests/test_readwrite.py @@ -16,10 +16,8 @@ # import os import shutil -import sys import tempfile import unittest -from array import array from pyspark.testing.utils import ReusedPySparkTestCase, SPARK_HOME @@ -306,7 +304,7 @@ class OutputFormatTests(ReusedPySparkTestCase): if __name__ == "__main__": - from pyspark.tests.test_readwrite import * + from pyspark.tests.test_readwrite import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_serializers.py b/python/pyspark/tests/test_serializers.py index 8caf9da85a..bffd78a501 100644 --- a/python/pyspark/tests/test_serializers.py +++ b/python/pyspark/tests/test_serializers.py @@ -87,7 +87,7 @@ class SerializationTestCase(unittest.TestCase): def test_pickling_file_handles(self): # to be corrected with SPARK-11160 try: - import xmlrunner + import xmlrunner # noqa: F401 except ImportError: ser = CloudPickleSerializer() out1 = sys.stderr diff --git a/python/pyspark/tests/test_shuffle.py b/python/pyspark/tests/test_shuffle.py index 434414618e..061b93f32c 100644 --- a/python/pyspark/tests/test_shuffle.py +++ b/python/pyspark/tests/test_shuffle.py @@ -15,7 +15,6 @@ # limitations under the License. # import random -import sys import unittest from py4j.protocol import Py4JJavaError @@ -168,7 +167,7 @@ class SorterTests(unittest.TestCase): if __name__ == "__main__": - from pyspark.tests.test_shuffle import * + from pyspark.tests.test_shuffle import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_taskcontext.py b/python/pyspark/tests/test_taskcontext.py index 8c2bedbe4e..f5be685643 100644 --- a/python/pyspark/tests/test_taskcontext.py +++ b/python/pyspark/tests/test_taskcontext.py @@ -16,7 +16,6 @@ # import os import random -import shutil import stat import sys import tempfile @@ -322,7 +321,7 @@ class TaskContextTestsWithResources(unittest.TestCase): if __name__ == "__main__": import unittest - from pyspark.tests.test_taskcontext import * + from pyspark.tests.test_taskcontext import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_util.py b/python/pyspark/tests/test_util.py index 511d62a51f..e853bc322c 100644 --- a/python/pyspark/tests/test_util.py +++ b/python/pyspark/tests/test_util.py @@ -74,7 +74,7 @@ class UtilTests(PySparkTestCase): if __name__ == "__main__": - from pyspark.tests.test_util import * + from pyspark.tests.test_util import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py index 3b1848dcfd..a855eaafc1 100644 --- a/python/pyspark/tests/test_worker.py +++ b/python/pyspark/tests/test_worker.py @@ -16,14 +16,13 @@ # limitations under the License. # import os -import sys import tempfile import threading import time import unittest has_resource_module = True try: - import resource + import resource # noqa: F401 except ImportError: has_resource_module = False @@ -200,7 +199,7 @@ class WorkerMemoryTest(unittest.TestCase): if __name__ == "__main__": import unittest - from pyspark.tests.test_worker import * + from pyspark.tests.test_worker import * # noqa: F401 try: import xmlrunner