[SPARK-34551][INFRA] Fix credit related scripts to recover, drop Python 2 and work with Python 3

### What changes were proposed in this pull request? This PR proposes to make the scripts working by: - Recovering credit related scripts that were broken from https://github.com/apache/spark/pull/29563 `raw_input` does not exist in `releaseutils` but only in Python 2 - Dropping Python 2 in these scripts because we dropped Python 2 in https://github.com/apache/spark/pull/28957 - Making these scripts workin with Python 3 ### Why are the changes needed? To unblock the release. ### Does this PR introduce _any_ user-facing change? No, it's dev-only change. ### How was this patch tested? I manually tested against Spark 3.1.1 RC3. Closes #31660 from HyukjinKwon/SPARK-34551. Authored-by: HyukjinKwon <gurwls223@apache.org> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
2021-02-26 20:19:33 +09:00 · 2021-02-26 20:19:33 +09:00 · 5b92531937
parent 5c7d019b60
commit 5b92531937
4 changed files with 12 additions and 36 deletions
--- a/dev/create-release/generate-contributors.py
+++ b/dev/create-release/generate-contributors.py
@ -22,7 +22,7 @@ import os
 import re
 import sys

-from releaseutils import tag_exists, raw_input, get_commits, yesOrNoPrompt, get_date, \
+from releaseutils import tag_exists, get_commits, yesOrNoPrompt, get_date, \
    is_valid_author, capitalize_author, JIRA, find_components, translate_issue_type, \
    translate_component, CORE_COMPONENT, contributors_file_name, nice_join

@ -33,10 +33,10 @@ PREVIOUS_RELEASE_TAG = os.environ.get("PREVIOUS_RELEASE_TAG", "v1.1.0")

 # If the release tags are not provided, prompt the user to provide them
 while not tag_exists(RELEASE_TAG):
-    RELEASE_TAG = raw_input("Please provide a valid release tag: ")
+    RELEASE_TAG = input("Please provide a valid release tag: ")
 while not tag_exists(PREVIOUS_RELEASE_TAG):
    print("Please specify the previous release tag.")
-    PREVIOUS_RELEASE_TAG = raw_input(
+    PREVIOUS_RELEASE_TAG = input(
        "For instance, if you are releasing v1.2.0, you should specify v1.1.0: ")

 # Gather commits found in the new tag but not in the old tag.
@ -236,7 +236,7 @@ print("=========================================================================
 # e.g. * Andrew Or -- Bug fixes in Windows, Core, and Web UI; improvements in Core
 # e.g. * Tathagata Das -- Bug fixes and new features in Streaming
 contributors_file = open(contributors_file_name, "w")
-authors = author_info.keys()
+authors = list(author_info.keys())
 authors.sort()
 for author in authors:
    contribution = ""
--- a/dev/create-release/releaseutils.py
+++ b/dev/create-release/releaseutils.py
@ -42,13 +42,6 @@ except ImportError:
    print("Install using 'sudo pip install PyGithub'")
    sys.exit(-1)

-try:
-    import unidecode
-except ImportError:
-    print("This tool requires the unidecode library to decode obscure github usernames")
-    print("Install using 'sudo pip install unidecode'")
-    sys.exit(-1)
-

 # Contributors list file name
 contributors_file_name = "contributors.txt"
@ -64,11 +57,11 @@ def yesOrNoPrompt(msg):

 # Utility functions run git commands (written with Git 1.8.5)
 def run_cmd(cmd):
-    return Popen(cmd, stdout=PIPE).communicate()[0]
+    return Popen(cmd, stdout=PIPE).communicate()[0].decode("utf8")


 def run_cmd_error(cmd):
-    return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1]
+    return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1].decode("utf8")


 def get_date(commit_hash):
@ -149,9 +142,7 @@ def get_commits(tag):
            # username so we can translate it properly later
            if not is_valid_author(author):
                author = github_username
-        # Guard against special characters
-        author = str(author)
-        author = unidecode.unidecode(author).strip()
+        author = author.strip()
        commit = Commit(_hash, author, title, pr_number)
        commits.append(commit)
    return commits
--- a/dev/create-release/translate-contributors.py
+++ b/dev/create-release/translate-contributors.py
@ -32,14 +32,7 @@ import os
 import sys

 from releaseutils import JIRA, JIRAError, get_jira_name, Github, get_github_name, \
-    contributors_file_name, is_valid_author, raw_input, capitalize_author, yesOrNoPrompt
-
-try:
-    import unidecode
-except ImportError:
-    print("This tool requires the unidecode library to decode obscure github usernames")
-    print("Install using 'sudo pip install unidecode'")
-    sys.exit(-1)
+    contributors_file_name, is_valid_author, capitalize_author, yesOrNoPrompt

 # You must set the following before use!
 JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
@ -139,15 +132,8 @@ def generate_candidates(author, issues):
                    (NOT_FOUND, "No full name found for %s assignee %s" % (issue, user_name)))
        else:
            candidates.append((NOT_FOUND, "No assignee found for %s" % issue))
-    # Guard against special characters in candidate names
-    # Note that the candidate name may already be in unicode (JIRA returns this)
    for i, (candidate, source) in enumerate(candidates):
-        try:
-            candidate = unicode(candidate, "UTF-8")  # noqa: F821
-        except TypeError:
-            # already in unicode
-            pass
-        candidate = unidecode.unidecode(candidate).strip()
+        candidate = candidate.strip()
        candidates[i] = (candidate, source)
    return candidates

@ -209,13 +195,13 @@ for i, line in enumerate(lines):
        if INTERACTIVE_MODE:
            print("    [%d] %s - Raw GitHub username" % (raw_index, author))
            print("    [%d] Custom" % custom_index)
-            response = raw_input("    Your choice: ")
+            response = input("    Your choice: ")
            last_index = custom_index
            while not response.isdigit() or int(response) > last_index:
-                response = raw_input("    Please enter an integer between 0 and %d: " % last_index)
+                response = input("    Please enter an integer between 0 and %d: " % last_index)
            response = int(response)
            if response == custom_index:
-                new_author = raw_input("    Please type a custom name for this author: ")
+                new_author = input("    Please type a custom name for this author: ")
            elif response != raw_index:
                new_author = candidate_names[response]
        # In non-interactive mode, just pick the first candidate
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@ -1,7 +1,6 @@
 flake8==3.5.0
 jira==1.0.3
 PyGithub==1.26.0
-Unidecode==0.04.19
 sphinx
 pydata_sphinx_theme
 ipython