9fcf0ea718
Disallow the use of unused imports: - Unnecessary increases the memory footprint of the application - Removes the imports that are required for the examples in the docstring from the file-scope to the example itself. This keeps the files itself clean, and gives a more complete example as it also includes the imports :) ``` fokkodriesprongFan spark % flake8 python | grep -i "imported but unused" python/pyspark/cloudpickle.py:46:1: F401 'functools.partial' imported but unused python/pyspark/cloudpickle.py:55:1: F401 'traceback' imported but unused python/pyspark/heapq3.py:868:5: F401 '_heapq.*' imported but unused python/pyspark/__init__.py:61:1: F401 'pyspark.version.__version__' imported but unused python/pyspark/__init__.py:62:1: F401 'pyspark._globals._NoValue' imported but unused python/pyspark/__init__.py:115:1: F401 'pyspark.sql.SQLContext' imported but unused python/pyspark/__init__.py:115:1: F401 'pyspark.sql.HiveContext' imported but unused python/pyspark/__init__.py:115:1: F401 'pyspark.sql.Row' imported but unused python/pyspark/rdd.py:21:1: F401 're' imported but unused python/pyspark/rdd.py:29:1: F401 'tempfile.NamedTemporaryFile' imported but unused python/pyspark/mllib/regression.py:26:1: F401 'pyspark.mllib.linalg.SparseVector' imported but unused python/pyspark/mllib/clustering.py:28:1: F401 'pyspark.mllib.linalg.SparseVector' imported but unused python/pyspark/mllib/clustering.py:28:1: F401 'pyspark.mllib.linalg.DenseVector' imported but unused python/pyspark/mllib/classification.py:26:1: F401 'pyspark.mllib.linalg.SparseVector' imported but unused python/pyspark/mllib/feature.py:28:1: F401 'pyspark.mllib.linalg.DenseVector' imported but unused python/pyspark/mllib/feature.py:28:1: F401 'pyspark.mllib.linalg.SparseVector' imported but unused python/pyspark/mllib/feature.py:30:1: F401 'pyspark.mllib.regression.LabeledPoint' imported but unused python/pyspark/mllib/tests/test_linalg.py:18:1: F401 'sys' imported but unused python/pyspark/mllib/tests/test_linalg.py:642:5: F401 'pyspark.mllib.tests.test_linalg.*' imported but unused python/pyspark/mllib/tests/test_feature.py:21:1: F401 'numpy.random' imported but unused python/pyspark/mllib/tests/test_feature.py:21:1: F401 'numpy.exp' imported but unused python/pyspark/mllib/tests/test_feature.py:23:1: F401 'pyspark.mllib.linalg.Vector' imported but unused python/pyspark/mllib/tests/test_feature.py:23:1: F401 'pyspark.mllib.linalg.VectorUDT' imported but unused python/pyspark/mllib/tests/test_feature.py:185:5: F401 'pyspark.mllib.tests.test_feature.*' imported but unused python/pyspark/mllib/tests/test_util.py:97:5: F401 'pyspark.mllib.tests.test_util.*' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.Vector' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.SparseVector' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.DenseVector' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.VectorUDT' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg._convert_to_vector' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.DenseMatrix' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.SparseMatrix' imported but unused python/pyspark/mllib/tests/test_stat.py:23:1: F401 'pyspark.mllib.linalg.MatrixUDT' imported but unused python/pyspark/mllib/tests/test_stat.py:181:5: F401 'pyspark.mllib.tests.test_stat.*' imported but unused python/pyspark/mllib/tests/test_streaming_algorithms.py:18:1: F401 'time.time' imported but unused python/pyspark/mllib/tests/test_streaming_algorithms.py:18:1: F401 'time.sleep' imported but unused python/pyspark/mllib/tests/test_streaming_algorithms.py:470:5: F401 'pyspark.mllib.tests.test_streaming_algorithms.*' imported but unused python/pyspark/mllib/tests/test_algorithms.py:295:5: F401 'pyspark.mllib.tests.test_algorithms.*' imported but unused python/pyspark/tests/test_serializers.py:90:13: F401 'xmlrunner' imported but unused python/pyspark/tests/test_rdd.py:21:1: F401 'sys' imported but unused python/pyspark/tests/test_rdd.py:29:1: F401 'pyspark.resource.ResourceProfile' imported but unused python/pyspark/tests/test_rdd.py:885:5: F401 'pyspark.tests.test_rdd.*' imported but unused python/pyspark/tests/test_readwrite.py:19:1: F401 'sys' imported but unused python/pyspark/tests/test_readwrite.py:22:1: F401 'array.array' imported but unused python/pyspark/tests/test_readwrite.py:309:5: F401 'pyspark.tests.test_readwrite.*' imported but unused python/pyspark/tests/test_join.py:62:5: F401 'pyspark.tests.test_join.*' imported but unused python/pyspark/tests/test_taskcontext.py:19:1: F401 'shutil' imported but unused python/pyspark/tests/test_taskcontext.py:325:5: F401 'pyspark.tests.test_taskcontext.*' imported but unused python/pyspark/tests/test_conf.py:36:5: F401 'pyspark.tests.test_conf.*' imported but unused python/pyspark/tests/test_broadcast.py:148:5: F401 'pyspark.tests.test_broadcast.*' imported but unused python/pyspark/tests/test_daemon.py:76:5: F401 'pyspark.tests.test_daemon.*' imported but unused python/pyspark/tests/test_util.py:77:5: F401 'pyspark.tests.test_util.*' imported but unused python/pyspark/tests/test_pin_thread.py:19:1: F401 'random' imported but unused python/pyspark/tests/test_pin_thread.py:149:5: F401 'pyspark.tests.test_pin_thread.*' imported but unused python/pyspark/tests/test_worker.py:19:1: F401 'sys' imported but unused python/pyspark/tests/test_worker.py:26:5: F401 'resource' imported but unused python/pyspark/tests/test_worker.py:203:5: F401 'pyspark.tests.test_worker.*' imported but unused python/pyspark/tests/test_profiler.py:101:5: F401 'pyspark.tests.test_profiler.*' imported but unused python/pyspark/tests/test_shuffle.py:18:1: F401 'sys' imported but unused python/pyspark/tests/test_shuffle.py:171:5: F401 'pyspark.tests.test_shuffle.*' imported but unused python/pyspark/tests/test_rddbarrier.py:43:5: F401 'pyspark.tests.test_rddbarrier.*' imported but unused python/pyspark/tests/test_context.py:129:13: F401 'userlibrary.UserClass' imported but unused python/pyspark/tests/test_context.py:140:13: F401 'userlib.UserClass' imported but unused python/pyspark/tests/test_context.py:310:5: F401 'pyspark.tests.test_context.*' imported but unused python/pyspark/tests/test_appsubmit.py:241:5: F401 'pyspark.tests.test_appsubmit.*' imported but unused python/pyspark/streaming/dstream.py:18:1: F401 'sys' imported but unused python/pyspark/streaming/tests/test_dstream.py:27:1: F401 'pyspark.RDD' imported but unused python/pyspark/streaming/tests/test_dstream.py:647:5: F401 'pyspark.streaming.tests.test_dstream.*' imported but unused python/pyspark/streaming/tests/test_kinesis.py:83:5: F401 'pyspark.streaming.tests.test_kinesis.*' imported but unused python/pyspark/streaming/tests/test_listener.py:152:5: F401 'pyspark.streaming.tests.test_listener.*' imported but unused python/pyspark/streaming/tests/test_context.py:178:5: F401 'pyspark.streaming.tests.test_context.*' imported but unused python/pyspark/testing/utils.py:30:5: F401 'scipy.sparse' imported but unused python/pyspark/testing/utils.py:36:5: F401 'numpy as np' imported but unused python/pyspark/ml/regression.py:25:1: F401 'pyspark.ml.tree._TreeEnsembleParams' imported but unused python/pyspark/ml/regression.py:25:1: F401 'pyspark.ml.tree._HasVarianceImpurity' imported but unused python/pyspark/ml/regression.py:29:1: F401 'pyspark.ml.wrapper.JavaParams' imported but unused python/pyspark/ml/util.py:19:1: F401 'sys' imported but unused python/pyspark/ml/__init__.py:25:1: F401 'pyspark.ml.pipeline' imported but unused python/pyspark/ml/pipeline.py:18:1: F401 'sys' imported but unused python/pyspark/ml/stat.py:22:1: F401 'pyspark.ml.linalg.DenseMatrix' imported but unused python/pyspark/ml/stat.py:22:1: F401 'pyspark.ml.linalg.Vectors' imported but unused python/pyspark/ml/tests/test_training_summary.py:18:1: F401 'sys' imported but unused python/pyspark/ml/tests/test_training_summary.py:364:5: F401 'pyspark.ml.tests.test_training_summary.*' imported but unused python/pyspark/ml/tests/test_linalg.py:381:5: F401 'pyspark.ml.tests.test_linalg.*' imported but unused python/pyspark/ml/tests/test_tuning.py:427:9: F401 'pyspark.sql.functions as F' imported but unused python/pyspark/ml/tests/test_tuning.py:757:5: F401 'pyspark.ml.tests.test_tuning.*' imported but unused python/pyspark/ml/tests/test_wrapper.py:120:5: F401 'pyspark.ml.tests.test_wrapper.*' imported but unused python/pyspark/ml/tests/test_feature.py:19:1: F401 'sys' imported but unused python/pyspark/ml/tests/test_feature.py:304:5: F401 'pyspark.ml.tests.test_feature.*' imported but unused python/pyspark/ml/tests/test_image.py:19:1: F401 'py4j' imported but unused python/pyspark/ml/tests/test_image.py:22:1: F401 'pyspark.testing.mlutils.PySparkTestCase' imported but unused python/pyspark/ml/tests/test_image.py:71:5: F401 'pyspark.ml.tests.test_image.*' imported but unused python/pyspark/ml/tests/test_persistence.py:456:5: F401 'pyspark.ml.tests.test_persistence.*' imported but unused python/pyspark/ml/tests/test_evaluation.py:56:5: F401 'pyspark.ml.tests.test_evaluation.*' imported but unused python/pyspark/ml/tests/test_stat.py:43:5: F401 'pyspark.ml.tests.test_stat.*' imported but unused python/pyspark/ml/tests/test_base.py:70:5: F401 'pyspark.ml.tests.test_base.*' imported but unused python/pyspark/ml/tests/test_param.py:20:1: F401 'sys' imported but unused python/pyspark/ml/tests/test_param.py:375:5: F401 'pyspark.ml.tests.test_param.*' imported but unused python/pyspark/ml/tests/test_pipeline.py:62:5: F401 'pyspark.ml.tests.test_pipeline.*' imported but unused python/pyspark/ml/tests/test_algorithms.py:333:5: F401 'pyspark.ml.tests.test_algorithms.*' imported but unused python/pyspark/ml/param/__init__.py:18:1: F401 'sys' imported but unused python/pyspark/resource/tests/test_resources.py:17:1: F401 'random' imported but unused python/pyspark/resource/tests/test_resources.py:20:1: F401 'pyspark.resource.ResourceProfile' imported but unused python/pyspark/resource/tests/test_resources.py:75:5: F401 'pyspark.resource.tests.test_resources.*' imported but unused python/pyspark/sql/functions.py:32:1: F401 'pyspark.sql.udf.UserDefinedFunction' imported but unused python/pyspark/sql/functions.py:34:1: F401 'pyspark.sql.pandas.functions.pandas_udf' imported but unused python/pyspark/sql/session.py:30:1: F401 'pyspark.sql.types.Row' imported but unused python/pyspark/sql/session.py:30:1: F401 'pyspark.sql.types.StringType' imported but unused python/pyspark/sql/readwriter.py:1084:5: F401 'pyspark.sql.Row' imported but unused python/pyspark/sql/context.py:26:1: F401 'pyspark.sql.types.IntegerType' imported but unused python/pyspark/sql/context.py:26:1: F401 'pyspark.sql.types.Row' imported but unused python/pyspark/sql/context.py:26:1: F401 'pyspark.sql.types.StringType' imported but unused python/pyspark/sql/context.py:27:1: F401 'pyspark.sql.udf.UDFRegistration' imported but unused python/pyspark/sql/streaming.py:1212:5: F401 'pyspark.sql.Row' imported but unused python/pyspark/sql/tests/test_utils.py:55:5: F401 'pyspark.sql.tests.test_utils.*' imported but unused python/pyspark/sql/tests/test_pandas_map.py:18:1: F401 'sys' imported but unused python/pyspark/sql/tests/test_pandas_map.py:22:1: F401 'pyspark.sql.functions.pandas_udf' imported but unused python/pyspark/sql/tests/test_pandas_map.py:22:1: F401 'pyspark.sql.functions.PandasUDFType' imported but unused python/pyspark/sql/tests/test_pandas_map.py:119:5: F401 'pyspark.sql.tests.test_pandas_map.*' imported but unused python/pyspark/sql/tests/test_catalog.py:193:5: F401 'pyspark.sql.tests.test_catalog.*' imported but unused python/pyspark/sql/tests/test_group.py:39:5: F401 'pyspark.sql.tests.test_group.*' imported but unused python/pyspark/sql/tests/test_session.py:361:5: F401 'pyspark.sql.tests.test_session.*' imported but unused python/pyspark/sql/tests/test_conf.py:49:5: F401 'pyspark.sql.tests.test_conf.*' imported but unused python/pyspark/sql/tests/test_pandas_cogrouped_map.py:19:1: F401 'sys' imported but unused python/pyspark/sql/tests/test_pandas_cogrouped_map.py:21:1: F401 'pyspark.sql.functions.sum' imported but unused python/pyspark/sql/tests/test_pandas_cogrouped_map.py:21:1: F401 'pyspark.sql.functions.PandasUDFType' imported but unused python/pyspark/sql/tests/test_pandas_cogrouped_map.py:29:5: F401 'pandas.util.testing.assert_series_equal' imported but unused python/pyspark/sql/tests/test_pandas_cogrouped_map.py:32:5: F401 'pyarrow as pa' imported but unused python/pyspark/sql/tests/test_pandas_cogrouped_map.py:248:5: F401 'pyspark.sql.tests.test_pandas_cogrouped_map.*' imported but unused python/pyspark/sql/tests/test_udf.py:24:1: F401 'py4j' imported but unused python/pyspark/sql/tests/test_pandas_udf_typehints.py:246:5: F401 'pyspark.sql.tests.test_pandas_udf_typehints.*' imported but unused python/pyspark/sql/tests/test_functions.py:19:1: F401 'sys' imported but unused python/pyspark/sql/tests/test_functions.py:362:9: F401 'pyspark.sql.functions.exists' imported but unused python/pyspark/sql/tests/test_functions.py:387:5: F401 'pyspark.sql.tests.test_functions.*' imported but unused python/pyspark/sql/tests/test_pandas_udf_scalar.py:21:1: F401 'sys' imported but unused python/pyspark/sql/tests/test_pandas_udf_scalar.py:45:5: F401 'pyarrow as pa' imported but unused python/pyspark/sql/tests/test_pandas_udf_window.py:355:5: F401 'pyspark.sql.tests.test_pandas_udf_window.*' imported but unused python/pyspark/sql/tests/test_arrow.py:38:5: F401 'pyarrow as pa' imported but unused python/pyspark/sql/tests/test_pandas_grouped_map.py:20:1: F401 'sys' imported but unused python/pyspark/sql/tests/test_pandas_grouped_map.py:38:5: F401 'pyarrow as pa' imported but unused python/pyspark/sql/tests/test_dataframe.py:382:9: F401 'pyspark.sql.DataFrame' imported but unused python/pyspark/sql/avro/functions.py:125:5: F401 'pyspark.sql.Row' imported but unused python/pyspark/sql/pandas/functions.py:19:1: F401 'sys' imported but unused ``` After: ``` fokkodriesprongFan spark % flake8 python | grep -i "imported but unused" fokkodriesprongFan spark % ``` ### What changes were proposed in this pull request? Removing unused imports from the Python files to keep everything nice and tidy. ### Why are the changes needed? Cleaning up of the imports that aren't used, and suppressing the imports that are used as references to other modules, preserving backward compatibility. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Adding the rule to the existing Flake8 checks. Closes #29121 from Fokko/SPARK-32319. Authored-by: Fokko Driesprong <fokko@apache.org> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
295 lines
9.9 KiB
Python
Executable file
295 lines
9.9 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
# This file contains helper methods used in creating a release.
|
|
|
|
import re
|
|
import sys
|
|
from subprocess import Popen, PIPE
|
|
|
|
try:
|
|
from jira.client import JIRA # noqa: F401
|
|
# Old versions have JIRAError in exceptions package, new (0.5+) in utils.
|
|
try:
|
|
from jira.exceptions import JIRAError
|
|
except ImportError:
|
|
from jira.utils import JIRAError
|
|
except ImportError:
|
|
print("This tool requires the jira-python library")
|
|
print("Install using 'sudo pip install jira'")
|
|
sys.exit(-1)
|
|
|
|
try:
|
|
from github import Github # noqa: F401
|
|
from github import GithubException
|
|
except ImportError:
|
|
print("This tool requires the PyGithub library")
|
|
print("Install using 'sudo pip install PyGithub'")
|
|
sys.exit(-1)
|
|
|
|
try:
|
|
import unidecode
|
|
except ImportError:
|
|
print("This tool requires the unidecode library to decode obscure github usernames")
|
|
print("Install using 'sudo pip install unidecode'")
|
|
sys.exit(-1)
|
|
|
|
|
|
# Contributors list file name
|
|
contributors_file_name = "contributors.txt"
|
|
|
|
|
|
# Prompt the user to answer yes or no until they do so
|
|
def yesOrNoPrompt(msg):
|
|
response = input("%s [y/n]: " % msg)
|
|
while response != "y" and response != "n":
|
|
return yesOrNoPrompt(msg)
|
|
return response == "y"
|
|
|
|
|
|
# Utility functions run git commands (written with Git 1.8.5)
|
|
def run_cmd(cmd):
|
|
return Popen(cmd, stdout=PIPE).communicate()[0]
|
|
|
|
|
|
def run_cmd_error(cmd):
|
|
return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1]
|
|
|
|
|
|
def get_date(commit_hash):
|
|
return run_cmd(["git", "show", "--quiet", "--pretty=format:%cd", commit_hash])
|
|
|
|
|
|
def tag_exists(tag):
|
|
stderr = run_cmd_error(["git", "show", tag])
|
|
return "error" not in stderr
|
|
|
|
|
|
# A type-safe representation of a commit
|
|
class Commit:
|
|
def __init__(self, _hash, author, title, pr_number=None):
|
|
self._hash = _hash
|
|
self.author = author
|
|
self.title = title
|
|
self.pr_number = pr_number
|
|
|
|
def get_hash(self):
|
|
return self._hash
|
|
|
|
def get_author(self):
|
|
return self.author
|
|
|
|
def get_title(self):
|
|
return self.title
|
|
|
|
def get_pr_number(self):
|
|
return self.pr_number
|
|
|
|
def __str__(self):
|
|
closes_pr = "(Closes #%s)" % self.pr_number if self.pr_number else ""
|
|
return "%s %s %s %s" % (self._hash, self.author, self.title, closes_pr)
|
|
|
|
|
|
# Return all commits that belong to the specified tag.
|
|
#
|
|
# Under the hood, this runs a `git log` on that tag and parses the fields
|
|
# from the command output to construct a list of Commit objects. Note that
|
|
# because certain fields reside in the commit description and cannot be parsed
|
|
# through the Github API itself, we need to do some intelligent regex parsing
|
|
# to extract those fields.
|
|
#
|
|
# This is written using Git 1.8.5.
|
|
def get_commits(tag):
|
|
commit_start_marker = "|=== COMMIT START MARKER ===|"
|
|
commit_end_marker = "|=== COMMIT END MARKER ===|"
|
|
field_end_marker = "|=== COMMIT FIELD END MARKER ===|"
|
|
log_format =\
|
|
commit_start_marker + "%h" +\
|
|
field_end_marker + "%an" +\
|
|
field_end_marker + "%s" +\
|
|
commit_end_marker + "%b"
|
|
output = run_cmd(["git", "log", "--quiet", "--pretty=format:" + log_format, tag])
|
|
commits = []
|
|
raw_commits = [c for c in output.split(commit_start_marker) if c]
|
|
for commit in raw_commits:
|
|
if commit.count(commit_end_marker) != 1:
|
|
print("Commit end marker not found in commit: ")
|
|
for line in commit.split("\n"):
|
|
print(line)
|
|
sys.exit(1)
|
|
# Separate commit digest from the body
|
|
# From the digest we extract the hash, author and the title
|
|
# From the body, we extract the PR number and the github username
|
|
[commit_digest, commit_body] = commit.split(commit_end_marker)
|
|
if commit_digest.count(field_end_marker) != 2:
|
|
sys.exit("Unexpected format in commit: %s" % commit_digest)
|
|
[_hash, author, title] = commit_digest.split(field_end_marker)
|
|
# The PR number and github username is in the commit message
|
|
# itself and cannot be accessed through any Github API
|
|
pr_number = None
|
|
match = re.search("Closes #([0-9]+) from ([^/\\s]+)/", commit_body)
|
|
if match:
|
|
[pr_number, github_username] = match.groups()
|
|
# If the author name is not valid, use the github
|
|
# username so we can translate it properly later
|
|
if not is_valid_author(author):
|
|
author = github_username
|
|
# Guard against special characters
|
|
author = str(author)
|
|
author = unidecode.unidecode(author).strip()
|
|
commit = Commit(_hash, author, title, pr_number)
|
|
commits.append(commit)
|
|
return commits
|
|
|
|
# Maintain a mapping for translating issue types to contributions in the release notes
|
|
# This serves an additional function of warning the user against unknown issue types
|
|
# Note: This list is partially derived from this link:
|
|
# https://issues.apache.org/jira/plugins/servlet/project-config/SPARK/issuetypes
|
|
# Keep these in lower case
|
|
known_issue_types = {
|
|
"bug": "bug fixes",
|
|
"build": "build fixes",
|
|
"dependency upgrade": "build fixes",
|
|
"improvement": "improvements",
|
|
"new feature": "new features",
|
|
"documentation": "documentation",
|
|
"test": "test",
|
|
"task": "improvement",
|
|
"sub-task": "improvement"
|
|
}
|
|
|
|
# Maintain a mapping for translating component names when creating the release notes
|
|
# This serves an additional function of warning the user against unknown components
|
|
# Note: This list is largely derived from this link:
|
|
# https://issues.apache.org/jira/plugins/servlet/project-config/SPARK/components
|
|
CORE_COMPONENT = "Core"
|
|
known_components = {
|
|
"block manager": CORE_COMPONENT,
|
|
"build": CORE_COMPONENT,
|
|
"deploy": CORE_COMPONENT,
|
|
"documentation": CORE_COMPONENT,
|
|
"examples": CORE_COMPONENT,
|
|
"graphx": "GraphX",
|
|
"input/output": CORE_COMPONENT,
|
|
"java api": "Java API",
|
|
"k8s": "Kubernetes",
|
|
"kubernetes": "Kubernetes",
|
|
"mesos": "Mesos",
|
|
"ml": "MLlib",
|
|
"mllib": "MLlib",
|
|
"project infra": "Project Infra",
|
|
"pyspark": "PySpark",
|
|
"shuffle": "Shuffle",
|
|
"spark core": CORE_COMPONENT,
|
|
"spark shell": CORE_COMPONENT,
|
|
"sql": "SQL",
|
|
"streaming": "Streaming",
|
|
"web ui": "Web UI",
|
|
"windows": "Windows",
|
|
"yarn": "YARN"
|
|
}
|
|
|
|
|
|
# Translate issue types using a format appropriate for writing contributions
|
|
# If an unknown issue type is encountered, warn the user
|
|
def translate_issue_type(issue_type, issue_id, warnings):
|
|
issue_type = issue_type.lower()
|
|
if issue_type in known_issue_types:
|
|
return known_issue_types[issue_type]
|
|
else:
|
|
warnings.append("Unknown issue type \"%s\" (see %s)" % (issue_type, issue_id))
|
|
return issue_type
|
|
|
|
|
|
# Translate component names using a format appropriate for writing contributions
|
|
# If an unknown component is encountered, warn the user
|
|
def translate_component(component, commit_hash, warnings):
|
|
component = component.lower()
|
|
if component in known_components:
|
|
return known_components[component]
|
|
else:
|
|
warnings.append("Unknown component \"%s\" (see %s)" % (component, commit_hash))
|
|
return component
|
|
|
|
|
|
# Parse components in the commit message
|
|
# The returned components are already filtered and translated
|
|
def find_components(commit, commit_hash):
|
|
components = re.findall(r"\[\w*\]", commit.lower())
|
|
components = [translate_component(c, commit_hash, [])
|
|
for c in components if c in known_components]
|
|
return components
|
|
|
|
|
|
# Join a list of strings in a human-readable manner
|
|
# e.g. ["Juice"] -> "Juice"
|
|
# e.g. ["Juice", "baby"] -> "Juice and baby"
|
|
# e.g. ["Juice", "baby", "moon"] -> "Juice, baby, and moon"
|
|
def nice_join(str_list):
|
|
str_list = list(str_list) # sometimes it's a set
|
|
if not str_list:
|
|
return ""
|
|
elif len(str_list) == 1:
|
|
return next(iter(str_list))
|
|
elif len(str_list) == 2:
|
|
return " and ".join(str_list)
|
|
else:
|
|
return ", ".join(str_list[:-1]) + ", and " + str_list[-1]
|
|
|
|
|
|
# Return the full name of the specified user on Github
|
|
# If the user doesn't exist, return None
|
|
def get_github_name(author, github_client):
|
|
if github_client:
|
|
try:
|
|
return github_client.get_user(author).name
|
|
except GithubException as e:
|
|
# If this is not a "not found" exception
|
|
if e.status != 404:
|
|
raise e
|
|
return None
|
|
|
|
|
|
# Return the full name of the specified user on JIRA
|
|
# If the user doesn't exist, return None
|
|
def get_jira_name(author, jira_client):
|
|
if jira_client:
|
|
try:
|
|
return jira_client.user(author).displayName
|
|
except JIRAError as e:
|
|
# If this is not a "not found" exception
|
|
if e.status_code != 404:
|
|
raise e
|
|
return None
|
|
|
|
|
|
# Return whether the given name is in the form <First Name><space><Last Name>
|
|
def is_valid_author(author):
|
|
if not author:
|
|
return False
|
|
return " " in author and not re.findall("[0-9]", author)
|
|
|
|
|
|
# Capitalize the first letter of each word in the given author name
|
|
def capitalize_author(author):
|
|
if not author:
|
|
return None
|
|
words = author.split(" ")
|
|
words = [w[0].capitalize() + w[1:] for w in words if w]
|
|
return " ".join(words)
|