spark-instrumented-optimizer/python/mypy.ini

;
; Licensed to the Apache Software Foundation (ASF) under one or more
; contributor license agreements.  See the NOTICE file distributed with
; this work for additional information regarding copyright ownership.
; The ASF licenses this file to You under the Apache License, Version 2.0
; (the "License"); you may not use this file except in compliance with
; the License.  You may obtain a copy of the License at
;
;    http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;

[mypy]
strict_optional = True
no_implicit_optional = True
disallow_untyped_defs = True

; Allow untyped def in internal modules and tests

[mypy-pyspark.daemon]
disallow_untyped_defs = False

[mypy-pyspark.find_spark_home]
disallow_untyped_defs = False

[mypy-pyspark._globals]
disallow_untyped_defs = False

[mypy-pyspark.install]
disallow_untyped_defs = False

[mypy-pyspark.java_gateway]
disallow_untyped_defs = False

[mypy-pyspark.join]
disallow_untyped_defs = False

[mypy-pyspark.ml.tests.*]
disallow_untyped_defs = False

[mypy-pyspark.mllib.tests.*]
disallow_untyped_defs = False

[mypy-pyspark.rddsampler]
disallow_untyped_defs = False

[mypy-pyspark.resource.tests.*]
disallow_untyped_defs = False

[mypy-pyspark.serializers]
disallow_untyped_defs = False

[mypy-pyspark.shuffle]
disallow_untyped_defs = False

[mypy-pyspark.streaming.tests.*]
disallow_untyped_defs = False

[mypy-pyspark.streaming.util]
disallow_untyped_defs = False

[mypy-pyspark.sql.tests.*]
disallow_untyped_defs = False

[mypy-pyspark.sql.pandas.serializers]
disallow_untyped_defs = False

[mypy-pyspark.sql.pandas.types]
disallow_untyped_defs = False

[mypy-pyspark.sql.pandas.typehints]
disallow_untyped_defs = False

[mypy-pyspark.sql.pandas.utils]
disallow_untyped_defs = False

[mypy-pyspark.sql.pandas._typing.protocols.*]
disallow_untyped_defs = False

[mypy-pyspark.sql.utils]
disallow_untyped_defs = False

[mypy-pyspark.tests.*]
disallow_untyped_defs = False

[mypy-pyspark.testing.*]
disallow_untyped_defs = False

[mypy-pyspark.traceback_utils]
disallow_untyped_defs = False

[mypy-pyspark.util]
disallow_untyped_defs = False

[mypy-pyspark.worker]
disallow_untyped_defs = False

; Ignore errors in embedded third party code

no_implicit_optional = True

[mypy-pyspark.cloudpickle.*]
ignore_errors = True

; Ignore missing imports for external untyped packages

[mypy-py4j.*]
ignore_missing_imports = True

[mypy-numpy]
ignore_missing_imports = True

[mypy-scipy.*]
ignore_missing_imports = True

[mypy-pandas.*]
ignore_missing_imports = True

[mypy-pyarrow.*]
ignore_missing_imports = True

[mypy-psutil.*]
ignore_missing_imports = True
[SPARK-32714][PYTHON] Initial pyspark-stubs port ### What changes were proposed in this pull request? This PR proposes migration of [`pyspark-stubs`](https://github.com/zero323/pyspark-stubs) into Spark codebase. ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? Yes. This PR adds type annotations directly to Spark source. This can impact interaction with development tools for users, which haven't used `pyspark-stubs`. ### How was this patch tested? - [x] MyPy tests of the PySpark source ``` mypy --no-incremental --config python/mypy.ini python/pyspark ``` - [x] MyPy tests of Spark examples ``` MYPYPATH=python/ mypy --no-incremental --config python/mypy.ini examples/src/main/python/ml examples/src/main/python/sql examples/src/main/python/sql/streaming ``` - [x] Existing Flake8 linter - [x] Existing unit tests Tested against: - `mypy==0.790+dev.e959952d9001e9713d329a2f9b196705b028f894` - `mypy==0.782` Closes #29591 from zero323/SPARK-32681. Authored-by: zero323 <mszymkiewicz@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org> 2020-09-24 01:15:36 -04:00			`;`
			`; Licensed to the Apache Software Foundation (ASF) under one or more`
			`; contributor license agreements. See the NOTICE file distributed with`
			`; this work for additional information regarding copyright ownership.`
			`; The ASF licenses this file to You under the Apache License, Version 2.0`
			`; (the "License"); you may not use this file except in compliance with`
			`; the License. You may obtain a copy of the License at`
			`;`
			`; http://www.apache.org/licenses/LICENSE-2.0`
			`;`
			`; Unless required by applicable law or agreed to in writing, software`
			`; distributed under the License is distributed on an "AS IS" BASIS,`
			`; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`; See the License for the specific language governing permissions and`
			`; limitations under the License.`
			`;`

			`[mypy]`
[SPARK-33457][PYTHON] Adjust mypy configuration ### What changes were proposed in this pull request? This pull request: - Adds following flags to the main mypy configuration: - [`strict_optional`](https://mypy.readthedocs.io/en/stable/config_file.html#confval-strict_optional) - [`no_implicit_optional`](https://mypy.readthedocs.io/en/stable/config_file.html#confval-no_implicit_optional) - [`disallow_untyped_defs`](https://mypy.readthedocs.io/en/stable/config_file.html#confval-disallow_untyped_calls) These flags are enabled only for public API and disabled for tests and internal modules. Additionally, these PR fixes missing annotations. ### Why are the changes needed? Primary reason to propose this changes is to use standard configuration as used by typeshed project. This will allow us to be more strict, especially when interacting with JVM code. See for example https://github.com/apache/spark/pull/29122#pullrequestreview-513112882 Additionally, it will allow us to detect cases where annotations have unintentionally omitted. ### Does this PR introduce _any_ user-facing change? Annotations only. ### How was this patch tested? `dev/lint-python`. Closes #30382 from zero323/SPARK-33457. Authored-by: zero323 <mszymkiewicz@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org> 2020-11-24 19:27:04 -05:00			`strict_optional = True`
			`no_implicit_optional = True`
			`disallow_untyped_defs = True`

			`; Allow untyped def in internal modules and tests`

			`[mypy-pyspark.daemon]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.find_spark_home]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark._globals]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.install]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.java_gateway]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.join]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.ml.tests.*]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.mllib.tests.*]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.rddsampler]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.resource.tests.*]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.serializers]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.shuffle]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.streaming.tests.*]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.streaming.util]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.sql.tests.*]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.sql.pandas.serializers]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.sql.pandas.types]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.sql.pandas.typehints]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.sql.pandas.utils]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.sql.pandas._typing.protocols.*]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.sql.utils]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.tests.*]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.testing.*]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.traceback_utils]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.util]`
			`disallow_untyped_defs = False`

			`[mypy-pyspark.worker]`
			`disallow_untyped_defs = False`

			`; Ignore errors in embedded third party code`
[SPARK-32714][PYTHON] Initial pyspark-stubs port ### What changes were proposed in this pull request? This PR proposes migration of [`pyspark-stubs`](https://github.com/zero323/pyspark-stubs) into Spark codebase. ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? Yes. This PR adds type annotations directly to Spark source. This can impact interaction with development tools for users, which haven't used `pyspark-stubs`. ### How was this patch tested? - [x] MyPy tests of the PySpark source ``` mypy --no-incremental --config python/mypy.ini python/pyspark ``` - [x] MyPy tests of Spark examples ``` MYPYPATH=python/ mypy --no-incremental --config python/mypy.ini examples/src/main/python/ml examples/src/main/python/sql examples/src/main/python/sql/streaming ``` - [x] Existing Flake8 linter - [x] Existing unit tests Tested against: - `mypy==0.790+dev.e959952d9001e9713d329a2f9b196705b028f894` - `mypy==0.782` Closes #29591 from zero323/SPARK-32681. Authored-by: zero323 <mszymkiewicz@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org> 2020-09-24 01:15:36 -04:00
[SPARK-32320][PYSPARK] Remove mutable default arguments This is bad practice, and might lead to unexpected behaviour: https://florimond.dev/blog/articles/2018/08/python-mutable-defaults-are-the-source-of-all-evil/ ``` fokkodriesprongFan spark % grep -R "={}" python \| grep def python/pyspark/resource/profile.py: def __init__(self, _java_resource_profile=None, _exec_req={}, _task_req={}): python/pyspark/sql/functions.py:def from_json(col, schema, options={}): python/pyspark/sql/functions.py:def to_json(col, options={}): python/pyspark/sql/functions.py:def schema_of_json(json, options={}): python/pyspark/sql/functions.py:def schema_of_csv(csv, options={}): python/pyspark/sql/functions.py:def to_csv(col, options={}): python/pyspark/sql/functions.py:def from_csv(col, schema, options={}): python/pyspark/sql/avro/functions.py:def from_avro(data, jsonFormatSchema, options={}): ``` ``` fokkodriesprongFan spark % grep -R "=\[\]" python \| grep def python/pyspark/ml/tuning.py: def __init__(self, bestModel, avgMetrics=[], subModels=None): python/pyspark/ml/tuning.py: def __init__(self, bestModel, validationMetrics=[], subModels=None): ``` ### What changes were proposed in this pull request? Removing the mutable default arguments. ### Why are the changes needed? Removing the mutable default arguments, and changing the signature to `Optional[...]`. ### Does this PR introduce _any_ user-facing change? No 👍 ### How was this patch tested? Using the Flake8 bugbear code analysis plugin. Closes #29122 from Fokko/SPARK-32320. Authored-by: Fokko Driesprong <fokko@apache.org> Signed-off-by: Ruifeng Zheng <ruifengz@foxmail.com> 2020-12-07 20:35:36 -05:00			`no_implicit_optional = True`

[SPARK-32714][PYTHON] Initial pyspark-stubs port ### What changes were proposed in this pull request? This PR proposes migration of [`pyspark-stubs`](https://github.com/zero323/pyspark-stubs) into Spark codebase. ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? Yes. This PR adds type annotations directly to Spark source. This can impact interaction with development tools for users, which haven't used `pyspark-stubs`. ### How was this patch tested? - [x] MyPy tests of the PySpark source ``` mypy --no-incremental --config python/mypy.ini python/pyspark ``` - [x] MyPy tests of Spark examples ``` MYPYPATH=python/ mypy --no-incremental --config python/mypy.ini examples/src/main/python/ml examples/src/main/python/sql examples/src/main/python/sql/streaming ``` - [x] Existing Flake8 linter - [x] Existing unit tests Tested against: - `mypy==0.790+dev.e959952d9001e9713d329a2f9b196705b028f894` - `mypy==0.782` Closes #29591 from zero323/SPARK-32681. Authored-by: zero323 <mszymkiewicz@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org> 2020-09-24 01:15:36 -04:00			`[mypy-pyspark.cloudpickle.*]`
			`ignore_errors = True`

[SPARK-33457][PYTHON] Adjust mypy configuration ### What changes were proposed in this pull request? This pull request: - Adds following flags to the main mypy configuration: - [`strict_optional`](https://mypy.readthedocs.io/en/stable/config_file.html#confval-strict_optional) - [`no_implicit_optional`](https://mypy.readthedocs.io/en/stable/config_file.html#confval-no_implicit_optional) - [`disallow_untyped_defs`](https://mypy.readthedocs.io/en/stable/config_file.html#confval-disallow_untyped_calls) These flags are enabled only for public API and disabled for tests and internal modules. Additionally, these PR fixes missing annotations. ### Why are the changes needed? Primary reason to propose this changes is to use standard configuration as used by typeshed project. This will allow us to be more strict, especially when interacting with JVM code. See for example https://github.com/apache/spark/pull/29122#pullrequestreview-513112882 Additionally, it will allow us to detect cases where annotations have unintentionally omitted. ### Does this PR introduce _any_ user-facing change? Annotations only. ### How was this patch tested? `dev/lint-python`. Closes #30382 from zero323/SPARK-33457. Authored-by: zero323 <mszymkiewicz@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org> 2020-11-24 19:27:04 -05:00			`; Ignore missing imports for external untyped packages`

[SPARK-32714][PYTHON] Initial pyspark-stubs port ### What changes were proposed in this pull request? This PR proposes migration of [`pyspark-stubs`](https://github.com/zero323/pyspark-stubs) into Spark codebase. ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? Yes. This PR adds type annotations directly to Spark source. This can impact interaction with development tools for users, which haven't used `pyspark-stubs`. ### How was this patch tested? - [x] MyPy tests of the PySpark source ``` mypy --no-incremental --config python/mypy.ini python/pyspark ``` - [x] MyPy tests of Spark examples ``` MYPYPATH=python/ mypy --no-incremental --config python/mypy.ini examples/src/main/python/ml examples/src/main/python/sql examples/src/main/python/sql/streaming ``` - [x] Existing Flake8 linter - [x] Existing unit tests Tested against: - `mypy==0.790+dev.e959952d9001e9713d329a2f9b196705b028f894` - `mypy==0.782` Closes #29591 from zero323/SPARK-32681. Authored-by: zero323 <mszymkiewicz@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org> 2020-09-24 01:15:36 -04:00			`[mypy-py4j.*]`
			`ignore_missing_imports = True`

			`[mypy-numpy]`
			`ignore_missing_imports = True`

			`[mypy-scipy.*]`
			`ignore_missing_imports = True`

			`[mypy-pandas.*]`
			`ignore_missing_imports = True`

[SPARK-33002][PYTHON] Remove non-API annotations ### What changes were proposed in this pull request? This PR: - removes annotations for modules which are not part of the public API. - removes `__init__.pyi` files, if no annotations, beyond exports, are present. ### Why are the changes needed? Primarily to reduce maintenance overhead and as requested in the comments to https://github.com/apache/spark/pull/29591 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests and additional MyPy checks: ``` mypy --no-incremental --config python/mypy.ini python/pyspark MYPYPATH=python/ mypy --no-incremental --config python/mypy.ini examples/src/main/python/ml examples/src/main/python/sql examples/src/main/python/sql/streaming ``` Closes #29879 from zero323/SPARK-33002. Authored-by: zero323 <mszymkiewicz@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org> 2020-10-07 06:53:59 -04:00			`[mypy-pyarrow.*]`
			`ignore_missing_imports = True`

			`[mypy-psutil.*]`
[SPARK-32714][PYTHON] Initial pyspark-stubs port ### What changes were proposed in this pull request? This PR proposes migration of [`pyspark-stubs`](https://github.com/zero323/pyspark-stubs) into Spark codebase. ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? Yes. This PR adds type annotations directly to Spark source. This can impact interaction with development tools for users, which haven't used `pyspark-stubs`. ### How was this patch tested? - [x] MyPy tests of the PySpark source ``` mypy --no-incremental --config python/mypy.ini python/pyspark ``` - [x] MyPy tests of Spark examples ``` MYPYPATH=python/ mypy --no-incremental --config python/mypy.ini examples/src/main/python/ml examples/src/main/python/sql examples/src/main/python/sql/streaming ``` - [x] Existing Flake8 linter - [x] Existing unit tests Tested against: - `mypy==0.790+dev.e959952d9001e9713d329a2f9b196705b028f894` - `mypy==0.782` Closes #29591 from zero323/SPARK-32681. Authored-by: zero323 <mszymkiewicz@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org> 2020-09-24 01:15:36 -04:00			`ignore_missing_imports = True`