[Release] Update contributors list format and sort it
Additionally, we now warn the user when a duplicate author name arises, in which case he/she needs to resolve it manually.
This commit is contained in:
parent
60698801eb
commit
4e1112e7b0
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -51,7 +51,7 @@ checkpoint
|
|||
derby.log
|
||||
dist/
|
||||
dev/create-release/*txt
|
||||
dev/create-release/*new
|
||||
dev/create-release/*final
|
||||
spark-*-bin-*.tgz
|
||||
unit-tests.log
|
||||
/lib/
|
||||
|
|
|
@ -64,3 +64,4 @@ dist/*
|
|||
logs
|
||||
.*scalastyle-output.xml
|
||||
.*dependency-reduced-pom.xml
|
||||
dev/create-release/known_translations
|
||||
|
|
|
@ -192,9 +192,9 @@ for commit in filtered_commits:
|
|||
print "==================================================================================\n"
|
||||
|
||||
# Write to contributors file ordered by author names
|
||||
# Each line takes the format "Author name - semi-colon delimited contributions"
|
||||
# e.g. Andrew Or - Bug fixes in Windows, Core, and Web UI; improvements in Core
|
||||
# e.g. Tathagata Das - Bug fixes and new features in Streaming
|
||||
# Each line takes the format " * Author name -- semi-colon delimited contributions"
|
||||
# e.g. * Andrew Or -- Bug fixes in Windows, Core, and Web UI; improvements in Core
|
||||
# e.g. * Tathagata Das -- Bug fixes and new features in Streaming
|
||||
contributors_file = open(contributors_file_name, "w")
|
||||
authors = author_info.keys()
|
||||
authors.sort()
|
||||
|
@ -223,7 +223,7 @@ for author in authors:
|
|||
# E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672
|
||||
if author in invalid_authors and invalid_authors[author]:
|
||||
author = author + "/" + "/".join(invalid_authors[author])
|
||||
line = "%s - %s" % (author, contribution)
|
||||
line = " * %s -- %s" % (author, contribution)
|
||||
contributors_file.write(line + "\n")
|
||||
contributors_file.close()
|
||||
print "Contributors list is successfully written to %s!" % contributors_file_name
|
||||
|
|
|
@ -43,14 +43,12 @@ if not JIRA_USERNAME or not JIRA_PASSWORD:
|
|||
if not GITHUB_API_TOKEN:
|
||||
sys.exit("GITHUB_API_TOKEN must be set")
|
||||
|
||||
# Write new contributors list to <old_file_name>.new
|
||||
# Write new contributors list to <old_file_name>.final
|
||||
if not os.path.isfile(contributors_file_name):
|
||||
print "Contributors file %s does not exist!" % contributors_file_name
|
||||
print "Have you run ./generate-contributors.py yet?"
|
||||
sys.exit(1)
|
||||
contributors_file = open(contributors_file_name, "r")
|
||||
new_contributors_file_name = contributors_file_name + ".new"
|
||||
new_contributors_file = open(new_contributors_file_name, "w")
|
||||
warnings = []
|
||||
|
||||
# In non-interactive mode, this script will choose the first replacement that is valid
|
||||
|
@ -73,7 +71,7 @@ known_translations_file_name = "known_translations"
|
|||
known_translations_file = open(known_translations_file_name, "r")
|
||||
for line in known_translations_file:
|
||||
if line.startswith("#"): continue
|
||||
[old_name, new_name] = line.split(" - ")
|
||||
[old_name, new_name] = line.strip("\n").split(" - ")
|
||||
known_translations[old_name] = new_name
|
||||
known_translations_file.close()
|
||||
|
||||
|
@ -147,16 +145,16 @@ def generate_candidates(author, issues):
|
|||
# If no such name exists, the original name is used (without the JIRA numbers).
|
||||
print "\n========================== Translating contributor list =========================="
|
||||
lines = contributors_file.readlines()
|
||||
contributions = []
|
||||
for i, line in enumerate(lines):
|
||||
temp_author = line.split(" - ")[0]
|
||||
temp_author = line.strip(" * ").split(" -- ")[0]
|
||||
print "Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines))
|
||||
if not temp_author:
|
||||
error_msg = " ERROR: Expected the following format <author> - <contributions>\n"
|
||||
error_msg = " ERROR: Expected the following format \" * <author> -- <contributions>\"\n"
|
||||
error_msg += " ERROR: Actual = %s" % line
|
||||
print error_msg
|
||||
warnings.append(error_msg)
|
||||
new_contributors_file.write(line)
|
||||
new_contributors_file.flush()
|
||||
contributions.append(line)
|
||||
continue
|
||||
author = temp_author.split("/")[0]
|
||||
# Use the local copy of known translations where possible
|
||||
|
@ -222,10 +220,26 @@ for i, line in enumerate(lines):
|
|||
known_translations_file.write("%s - %s\n" % (author, new_author))
|
||||
known_translations_file.flush()
|
||||
line = line.replace(temp_author, author)
|
||||
new_contributors_file.write(line)
|
||||
new_contributors_file.flush()
|
||||
contributions.append(line)
|
||||
print "==================================================================================\n"
|
||||
contributors_file.close()
|
||||
known_translations_file.close()
|
||||
|
||||
# Sort the contributions before writing them to the new file.
|
||||
# Additionally, check if there are any duplicate author rows.
|
||||
# This could happen if the same user has both a valid full
|
||||
# name (e.g. Andrew Or) and an invalid one (andrewor14).
|
||||
# If so, warn the user about this at the end.
|
||||
contributions.sort()
|
||||
all_authors = set()
|
||||
new_contributors_file_name = contributors_file_name + ".final"
|
||||
new_contributors_file = open(new_contributors_file_name, "w")
|
||||
for line in contributions:
|
||||
author = line.strip(" * ").split(" -- ")[0]
|
||||
if author in all_authors:
|
||||
warnings.append("Detected duplicate author name %s. Please merge these manually." % author)
|
||||
all_authors.add(author)
|
||||
new_contributors_file.write(line)
|
||||
new_contributors_file.close()
|
||||
|
||||
print "Translated contributors list successfully written to %s!" % new_contributors_file_name
|
||||
|
|
Loading…
Reference in a new issue