[SPARK-19002][BUILD][PYTHON] Check pep8 against all Python scripts
## What changes were proposed in this pull request? This PR proposes to check pep8 against all other Python scripts and fix the errors as below: ```bash ./dev/create-release/generate-contributors.py ./dev/create-release/releaseutils.py ./dev/create-release/translate-contributors.py ./dev/lint-python ./python/docs/epytext.py ./examples/src/main/python/mllib/decision_tree_classification_example.py ./examples/src/main/python/mllib/decision_tree_regression_example.py ./examples/src/main/python/mllib/gradient_boosting_classification_example.py ./examples/src/main/python/mllib/gradient_boosting_regression_example.py ./examples/src/main/python/mllib/linear_regression_with_sgd_example.py ./examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py ./examples/src/main/python/mllib/naive_bayes_example.py ./examples/src/main/python/mllib/random_forest_classification_example.py ./examples/src/main/python/mllib/random_forest_regression_example.py ./examples/src/main/python/mllib/svm_with_sgd_example.py ./examples/src/main/python/streaming/network_wordjoinsentiments.py ./sql/hive/src/test/resources/data/scripts/cat.py ./sql/hive/src/test/resources/data/scripts/cat_error.py ./sql/hive/src/test/resources/data/scripts/doubleescapedtab.py ./sql/hive/src/test/resources/data/scripts/dumpdata_script.py ./sql/hive/src/test/resources/data/scripts/escapedcarriagereturn.py ./sql/hive/src/test/resources/data/scripts/escapednewline.py ./sql/hive/src/test/resources/data/scripts/escapedtab.py ./sql/hive/src/test/resources/data/scripts/input20_script.py ./sql/hive/src/test/resources/data/scripts/newline.py ``` ## How was this patch tested? - `./python/docs/epytext.py` ```bash cd ./python/docs $$ make html ``` - pep8 check (Python 2.7 / Python 3.3.6) ``` ./dev/lint-python ``` - `./dev/merge_spark_pr.py` (Python 2.7 only / Python 3.3.6 not working) ```bash python -m doctest -v ./dev/merge_spark_pr.py ``` - `./dev/create-release/releaseutils.py` `./dev/create-release/generate-contributors.py` `./dev/create-release/translate-contributors.py` (Python 2.7 only / Python 3.3.6 not working) ```bash python generate-contributors.py python translate-contributors.py ``` - Examples (Python 2.7 / Python 3.3.6) ```bash ./bin/spark-submit examples/src/main/python/mllib/decision_tree_classification_example.py ./bin/spark-submit examples/src/main/python/mllib/decision_tree_regression_example.py ./bin/spark-submit examples/src/main/python/mllib/gradient_boosting_classification_example.py ./bin/spark-submit examples/src/main/python/mllib/gradient_boosting_regression_example.p ./bin/spark-submit examples/src/main/python/mllib/random_forest_classification_example.py ./bin/spark-submit examples/src/main/python/mllib/random_forest_regression_example.py ``` - Examples (Python 2.7 only / Python 3.3.6 not working) ``` ./bin/spark-submit examples/src/main/python/mllib/linear_regression_with_sgd_example.py ./bin/spark-submit examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py ./bin/spark-submit examples/src/main/python/mllib/naive_bayes_example.py ./bin/spark-submit examples/src/main/python/mllib/svm_with_sgd_example.py ``` - `sql/hive/src/test/resources/data/scripts/*.py` (Python 2.7 / Python 3.3.6 within suggested changes) Manually tested only changed ones. - `./dev/github_jira_sync.py` (Python 2.7 only / Python 3.3.6 not working) Manually tested this after disabling actually adding comments and links. And also via Jenkins tests. Author: hyukjinkwon <gurwls223@gmail.com> Closes #16405 from HyukjinKwon/minor-pep8.
This commit is contained in:
parent
f1330b1d9e
commit
46b2126024
|
@ -33,14 +33,14 @@ PREVIOUS_RELEASE_TAG = os.environ.get("PREVIOUS_RELEASE_TAG", "v1.1.0")
|
|||
while not tag_exists(RELEASE_TAG):
|
||||
RELEASE_TAG = raw_input("Please provide a valid release tag: ")
|
||||
while not tag_exists(PREVIOUS_RELEASE_TAG):
|
||||
print "Please specify the previous release tag."
|
||||
PREVIOUS_RELEASE_TAG = raw_input(\
|
||||
"For instance, if you are releasing v1.2.0, you should specify v1.1.0: ")
|
||||
print("Please specify the previous release tag.")
|
||||
PREVIOUS_RELEASE_TAG = raw_input(
|
||||
"For instance, if you are releasing v1.2.0, you should specify v1.1.0: ")
|
||||
|
||||
# Gather commits found in the new tag but not in the old tag.
|
||||
# This filters commits based on both the git hash and the PR number.
|
||||
# If either is present in the old tag, then we ignore the commit.
|
||||
print "Gathering new commits between tags %s and %s" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG)
|
||||
print("Gathering new commits between tags %s and %s" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG))
|
||||
release_commits = get_commits(RELEASE_TAG)
|
||||
previous_release_commits = get_commits(PREVIOUS_RELEASE_TAG)
|
||||
previous_release_hashes = set()
|
||||
|
@ -62,17 +62,20 @@ if not new_commits:
|
|||
sys.exit("There are no new commits between %s and %s!" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG))
|
||||
|
||||
# Prompt the user for confirmation that the commit range is correct
|
||||
print "\n=================================================================================="
|
||||
print "JIRA server: %s" % JIRA_API_BASE
|
||||
print "Release tag: %s" % RELEASE_TAG
|
||||
print "Previous release tag: %s" % PREVIOUS_RELEASE_TAG
|
||||
print "Number of commits in this range: %s" % len(new_commits)
|
||||
print("\n==================================================================================")
|
||||
print("JIRA server: %s" % JIRA_API_BASE)
|
||||
print("Release tag: %s" % RELEASE_TAG)
|
||||
print("Previous release tag: %s" % PREVIOUS_RELEASE_TAG)
|
||||
print("Number of commits in this range: %s" % len(new_commits))
|
||||
print
|
||||
|
||||
|
||||
def print_indented(_list):
|
||||
for x in _list: print " %s" % x
|
||||
for x in _list:
|
||||
print(" %s" % x)
|
||||
if yesOrNoPrompt("Show all commits?"):
|
||||
print_indented(new_commits)
|
||||
print "==================================================================================\n"
|
||||
print("==================================================================================\n")
|
||||
if not yesOrNoPrompt("Does this look correct?"):
|
||||
sys.exit("Ok, exiting")
|
||||
|
||||
|
@ -82,45 +85,76 @@ maintenance = []
|
|||
reverts = []
|
||||
nojiras = []
|
||||
filtered_commits = []
|
||||
|
||||
|
||||
def is_release(commit_title):
|
||||
return re.findall("\[release\]", commit_title.lower()) or\
|
||||
"preparing spark release" in commit_title.lower() or\
|
||||
"preparing development version" in commit_title.lower() or\
|
||||
"CHANGES.txt" in commit_title
|
||||
return re.findall("\[release\]", commit_title.lower()) or \
|
||||
"preparing spark release" in commit_title.lower() or \
|
||||
"preparing development version" in commit_title.lower() or \
|
||||
"CHANGES.txt" in commit_title
|
||||
|
||||
|
||||
def is_maintenance(commit_title):
|
||||
return "maintenance" in commit_title.lower() or\
|
||||
"manually close" in commit_title.lower()
|
||||
return "maintenance" in commit_title.lower() or \
|
||||
"manually close" in commit_title.lower()
|
||||
|
||||
|
||||
def has_no_jira(commit_title):
|
||||
return not re.findall("SPARK-[0-9]+", commit_title.upper())
|
||||
|
||||
|
||||
def is_revert(commit_title):
|
||||
return "revert" in commit_title.lower()
|
||||
|
||||
|
||||
def is_docs(commit_title):
|
||||
return re.findall("docs*", commit_title.lower()) or\
|
||||
"programming guide" in commit_title.lower()
|
||||
return re.findall("docs*", commit_title.lower()) or \
|
||||
"programming guide" in commit_title.lower()
|
||||
|
||||
|
||||
for c in new_commits:
|
||||
t = c.get_title()
|
||||
if not t: continue
|
||||
elif is_release(t): releases.append(c)
|
||||
elif is_maintenance(t): maintenance.append(c)
|
||||
elif is_revert(t): reverts.append(c)
|
||||
elif is_docs(t): filtered_commits.append(c) # docs may not have JIRA numbers
|
||||
elif has_no_jira(t): nojiras.append(c)
|
||||
else: filtered_commits.append(c)
|
||||
if not t:
|
||||
continue
|
||||
elif is_release(t):
|
||||
releases.append(c)
|
||||
elif is_maintenance(t):
|
||||
maintenance.append(c)
|
||||
elif is_revert(t):
|
||||
reverts.append(c)
|
||||
elif is_docs(t):
|
||||
filtered_commits.append(c) # docs may not have JIRA numbers
|
||||
elif has_no_jira(t):
|
||||
nojiras.append(c)
|
||||
else:
|
||||
filtered_commits.append(c)
|
||||
|
||||
# Warn against ignored commits
|
||||
if releases or maintenance or reverts or nojiras:
|
||||
print "\n=================================================================================="
|
||||
if releases: print "Found %d release commits" % len(releases)
|
||||
if maintenance: print "Found %d maintenance commits" % len(maintenance)
|
||||
if reverts: print "Found %d revert commits" % len(reverts)
|
||||
if nojiras: print "Found %d commits with no JIRA" % len(nojiras)
|
||||
print "* Warning: these commits will be ignored.\n"
|
||||
print("\n==================================================================================")
|
||||
if releases:
|
||||
print("Found %d release commits" % len(releases))
|
||||
if maintenance:
|
||||
print("Found %d maintenance commits" % len(maintenance))
|
||||
if reverts:
|
||||
print("Found %d revert commits" % len(reverts))
|
||||
if nojiras:
|
||||
print("Found %d commits with no JIRA" % len(nojiras))
|
||||
print("* Warning: these commits will be ignored.\n")
|
||||
if yesOrNoPrompt("Show ignored commits?"):
|
||||
if releases: print "Release (%d)" % len(releases); print_indented(releases)
|
||||
if maintenance: print "Maintenance (%d)" % len(maintenance); print_indented(maintenance)
|
||||
if reverts: print "Revert (%d)" % len(reverts); print_indented(reverts)
|
||||
if nojiras: print "No JIRA (%d)" % len(nojiras); print_indented(nojiras)
|
||||
print "==================== Warning: the above commits will be ignored ==================\n"
|
||||
if releases:
|
||||
print("Release (%d)" % len(releases))
|
||||
print_indented(releases)
|
||||
if maintenance:
|
||||
print("Maintenance (%d)" % len(maintenance))
|
||||
print_indented(maintenance)
|
||||
if reverts:
|
||||
print("Revert (%d)" % len(reverts))
|
||||
print_indented(reverts)
|
||||
if nojiras:
|
||||
print("No JIRA (%d)" % len(nojiras))
|
||||
print_indented(nojiras)
|
||||
print("==================== Warning: the above commits will be ignored ==================\n")
|
||||
prompt_msg = "%d commits left to process after filtering. Ok to proceed?" % len(filtered_commits)
|
||||
if not yesOrNoPrompt(prompt_msg):
|
||||
sys.exit("Ok, exiting.")
|
||||
|
@ -147,9 +181,9 @@ invalid_authors = {}
|
|||
# }
|
||||
#
|
||||
author_info = {}
|
||||
jira_options = { "server": JIRA_API_BASE }
|
||||
jira_client = JIRA(options = jira_options)
|
||||
print "\n=========================== Compiling contributor list ==========================="
|
||||
jira_options = {"server": JIRA_API_BASE}
|
||||
jira_client = JIRA(options=jira_options)
|
||||
print("\n=========================== Compiling contributor list ===========================")
|
||||
for commit in filtered_commits:
|
||||
_hash = commit.get_hash()
|
||||
title = commit.get_title()
|
||||
|
@ -168,8 +202,9 @@ for commit in filtered_commits:
|
|||
# Parse components from the commit title, if any
|
||||
commit_components = find_components(title, _hash)
|
||||
# Populate or merge an issue into author_info[author]
|
||||
|
||||
def populate(issue_type, components):
|
||||
components = components or [CORE_COMPONENT] # assume core if no components provided
|
||||
components = components or [CORE_COMPONENT] # assume core if no components provided
|
||||
if author not in author_info:
|
||||
author_info[author] = {}
|
||||
if issue_type not in author_info[author]:
|
||||
|
@ -182,17 +217,17 @@ for commit in filtered_commits:
|
|||
jira_issue = jira_client.issue(issue)
|
||||
jira_type = jira_issue.fields.issuetype.name
|
||||
jira_type = translate_issue_type(jira_type, issue, warnings)
|
||||
jira_components = [translate_component(c.name, _hash, warnings)\
|
||||
for c in jira_issue.fields.components]
|
||||
jira_components = [translate_component(c.name, _hash, warnings)
|
||||
for c in jira_issue.fields.components]
|
||||
all_components = set(jira_components + commit_components)
|
||||
populate(jira_type, all_components)
|
||||
except Exception as e:
|
||||
print "Unexpected error:", e
|
||||
print("Unexpected error:", e)
|
||||
# For docs without an associated JIRA, manually add it ourselves
|
||||
if is_docs(title) and not issues:
|
||||
populate("documentation", commit_components)
|
||||
print " Processed commit %s authored by %s on %s" % (_hash, author, date)
|
||||
print "==================================================================================\n"
|
||||
print(" Processed commit %s authored by %s on %s" % (_hash, author, date))
|
||||
print("==================================================================================\n")
|
||||
|
||||
# Write to contributors file ordered by author names
|
||||
# Each line takes the format " * Author name -- semi-colon delimited contributions"
|
||||
|
@ -215,8 +250,8 @@ for author in authors:
|
|||
# Otherwise, group contributions by issue types instead of modules
|
||||
# e.g. Bug fixes in MLlib, Core, and Streaming; documentation in YARN
|
||||
else:
|
||||
contributions = ["%s in %s" % (issue_type, nice_join(comps)) \
|
||||
for issue_type, comps in author_info[author].items()]
|
||||
contributions = ["%s in %s" % (issue_type, nice_join(comps))
|
||||
for issue_type, comps in author_info[author].items()]
|
||||
contribution = "; ".join(contributions)
|
||||
# Do not use python's capitalize() on the whole string to preserve case
|
||||
assert contribution
|
||||
|
@ -226,11 +261,11 @@ for author in authors:
|
|||
# E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672
|
||||
if author in invalid_authors and invalid_authors[author]:
|
||||
author = author + "/" + "/".join(invalid_authors[author])
|
||||
#line = " * %s -- %s" % (author, contribution)
|
||||
# line = " * %s -- %s" % (author, contribution)
|
||||
line = author
|
||||
contributors_file.write(line + "\n")
|
||||
contributors_file.close()
|
||||
print "Contributors list is successfully written to %s!" % contributors_file_name
|
||||
print("Contributors list is successfully written to %s!" % contributors_file_name)
|
||||
|
||||
# Prompt the user to translate author names if necessary
|
||||
if invalid_authors:
|
||||
|
@ -241,8 +276,8 @@ if invalid_authors:
|
|||
|
||||
# Log any warnings encountered in the process
|
||||
if warnings:
|
||||
print "\n============ Warnings encountered while creating the contributor list ============"
|
||||
for w in warnings: print w
|
||||
print "Please correct these in the final contributors list at %s." % contributors_file_name
|
||||
print "==================================================================================\n"
|
||||
|
||||
print("\n============ Warnings encountered while creating the contributor list ============")
|
||||
for w in warnings:
|
||||
print(w)
|
||||
print("Please correct these in the final contributors list at %s." % contributors_file_name)
|
||||
print("==================================================================================\n")
|
||||
|
|
|
@ -30,28 +30,29 @@ try:
|
|||
except ImportError:
|
||||
from jira.utils import JIRAError
|
||||
except ImportError:
|
||||
print "This tool requires the jira-python library"
|
||||
print "Install using 'sudo pip install jira'"
|
||||
print("This tool requires the jira-python library")
|
||||
print("Install using 'sudo pip install jira'")
|
||||
sys.exit(-1)
|
||||
|
||||
try:
|
||||
from github import Github
|
||||
from github import GithubException
|
||||
except ImportError:
|
||||
print "This tool requires the PyGithub library"
|
||||
print "Install using 'sudo pip install PyGithub'"
|
||||
print("This tool requires the PyGithub library")
|
||||
print("Install using 'sudo pip install PyGithub'")
|
||||
sys.exit(-1)
|
||||
|
||||
try:
|
||||
import unidecode
|
||||
except ImportError:
|
||||
print "This tool requires the unidecode library to decode obscure github usernames"
|
||||
print "Install using 'sudo pip install unidecode'"
|
||||
print("This tool requires the unidecode library to decode obscure github usernames")
|
||||
print("Install using 'sudo pip install unidecode'")
|
||||
sys.exit(-1)
|
||||
|
||||
# Contributors list file name
|
||||
contributors_file_name = "contributors.txt"
|
||||
|
||||
|
||||
# Prompt the user to answer yes or no until they do so
|
||||
def yesOrNoPrompt(msg):
|
||||
response = raw_input("%s [y/n]: " % msg)
|
||||
|
@ -59,30 +60,50 @@ def yesOrNoPrompt(msg):
|
|||
return yesOrNoPrompt(msg)
|
||||
return response == "y"
|
||||
|
||||
|
||||
# Utility functions run git commands (written with Git 1.8.5)
|
||||
def run_cmd(cmd): return Popen(cmd, stdout=PIPE).communicate()[0]
|
||||
def run_cmd_error(cmd): return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1]
|
||||
def run_cmd(cmd):
|
||||
return Popen(cmd, stdout=PIPE).communicate()[0]
|
||||
|
||||
|
||||
def run_cmd_error(cmd):
|
||||
return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1]
|
||||
|
||||
|
||||
def get_date(commit_hash):
|
||||
return run_cmd(["git", "show", "--quiet", "--pretty=format:%cd", commit_hash])
|
||||
|
||||
|
||||
def tag_exists(tag):
|
||||
stderr = run_cmd_error(["git", "show", tag])
|
||||
return "error" not in stderr
|
||||
|
||||
|
||||
# A type-safe representation of a commit
|
||||
class Commit:
|
||||
def __init__(self, _hash, author, title, pr_number = None):
|
||||
def __init__(self, _hash, author, title, pr_number=None):
|
||||
self._hash = _hash
|
||||
self.author = author
|
||||
self.title = title
|
||||
self.pr_number = pr_number
|
||||
def get_hash(self): return self._hash
|
||||
def get_author(self): return self.author
|
||||
def get_title(self): return self.title
|
||||
def get_pr_number(self): return self.pr_number
|
||||
|
||||
def get_hash(self):
|
||||
return self._hash
|
||||
|
||||
def get_author(self):
|
||||
return self.author
|
||||
|
||||
def get_title(self):
|
||||
return self.title
|
||||
|
||||
def get_pr_number(self):
|
||||
return self.pr_number
|
||||
|
||||
def __str__(self):
|
||||
closes_pr = "(Closes #%s)" % self.pr_number if self.pr_number else ""
|
||||
return "%s %s %s %s" % (self._hash, self.author, self.title, closes_pr)
|
||||
|
||||
|
||||
# Return all commits that belong to the specified tag.
|
||||
#
|
||||
# Under the hood, this runs a `git log` on that tag and parses the fields
|
||||
|
@ -106,8 +127,9 @@ def get_commits(tag):
|
|||
raw_commits = [c for c in output.split(commit_start_marker) if c]
|
||||
for commit in raw_commits:
|
||||
if commit.count(commit_end_marker) != 1:
|
||||
print "Commit end marker not found in commit: "
|
||||
for line in commit.split("\n"): print line
|
||||
print("Commit end marker not found in commit: ")
|
||||
for line in commit.split("\n"):
|
||||
print(line)
|
||||
sys.exit(1)
|
||||
# Separate commit digest from the body
|
||||
# From the digest we extract the hash, author and the title
|
||||
|
@ -178,6 +200,7 @@ known_components = {
|
|||
"yarn": "YARN"
|
||||
}
|
||||
|
||||
|
||||
# Translate issue types using a format appropriate for writing contributions
|
||||
# If an unknown issue type is encountered, warn the user
|
||||
def translate_issue_type(issue_type, issue_id, warnings):
|
||||
|
@ -188,6 +211,7 @@ def translate_issue_type(issue_type, issue_id, warnings):
|
|||
warnings.append("Unknown issue type \"%s\" (see %s)" % (issue_type, issue_id))
|
||||
return issue_type
|
||||
|
||||
|
||||
# Translate component names using a format appropriate for writing contributions
|
||||
# If an unknown component is encountered, warn the user
|
||||
def translate_component(component, commit_hash, warnings):
|
||||
|
@ -198,20 +222,22 @@ def translate_component(component, commit_hash, warnings):
|
|||
warnings.append("Unknown component \"%s\" (see %s)" % (component, commit_hash))
|
||||
return component
|
||||
|
||||
|
||||
# Parse components in the commit message
|
||||
# The returned components are already filtered and translated
|
||||
def find_components(commit, commit_hash):
|
||||
components = re.findall("\[\w*\]", commit.lower())
|
||||
components = [translate_component(c, commit_hash)\
|
||||
for c in components if c in known_components]
|
||||
components = [translate_component(c, commit_hash)
|
||||
for c in components if c in known_components]
|
||||
return components
|
||||
|
||||
|
||||
# Join a list of strings in a human-readable manner
|
||||
# e.g. ["Juice"] -> "Juice"
|
||||
# e.g. ["Juice", "baby"] -> "Juice and baby"
|
||||
# e.g. ["Juice", "baby", "moon"] -> "Juice, baby, and moon"
|
||||
def nice_join(str_list):
|
||||
str_list = list(str_list) # sometimes it's a set
|
||||
str_list = list(str_list) # sometimes it's a set
|
||||
if not str_list:
|
||||
return ""
|
||||
elif len(str_list) == 1:
|
||||
|
@ -221,6 +247,7 @@ def nice_join(str_list):
|
|||
else:
|
||||
return ", ".join(str_list[:-1]) + ", and " + str_list[-1]
|
||||
|
||||
|
||||
# Return the full name of the specified user on Github
|
||||
# If the user doesn't exist, return None
|
||||
def get_github_name(author, github_client):
|
||||
|
@ -233,6 +260,7 @@ def get_github_name(author, github_client):
|
|||
raise e
|
||||
return None
|
||||
|
||||
|
||||
# Return the full name of the specified user on JIRA
|
||||
# If the user doesn't exist, return None
|
||||
def get_jira_name(author, jira_client):
|
||||
|
@ -245,15 +273,18 @@ def get_jira_name(author, jira_client):
|
|||
raise e
|
||||
return None
|
||||
|
||||
|
||||
# Return whether the given name is in the form <First Name><space><Last Name>
|
||||
def is_valid_author(author):
|
||||
if not author: return False
|
||||
if not author:
|
||||
return False
|
||||
return " " in author and not re.findall("[0-9]", author)
|
||||
|
||||
|
||||
# Capitalize the first letter of each word in the given author name
|
||||
def capitalize_author(author):
|
||||
if not author: return None
|
||||
if not author:
|
||||
return None
|
||||
words = author.split(" ")
|
||||
words = [w[0].capitalize() + w[1:] for w in words if w]
|
||||
return " ".join(words)
|
||||
|
||||
|
|
|
@ -45,8 +45,8 @@ if not GITHUB_API_TOKEN:
|
|||
|
||||
# Write new contributors list to <old_file_name>.final
|
||||
if not os.path.isfile(contributors_file_name):
|
||||
print "Contributors file %s does not exist!" % contributors_file_name
|
||||
print "Have you run ./generate-contributors.py yet?"
|
||||
print("Contributors file %s does not exist!" % contributors_file_name)
|
||||
print("Have you run ./generate-contributors.py yet?")
|
||||
sys.exit(1)
|
||||
contributors_file = open(contributors_file_name, "r")
|
||||
warnings = []
|
||||
|
@ -58,11 +58,11 @@ if len(sys.argv) > 1:
|
|||
if "--non-interactive" in options:
|
||||
INTERACTIVE_MODE = False
|
||||
if INTERACTIVE_MODE:
|
||||
print "Running in interactive mode. To disable this, provide the --non-interactive flag."
|
||||
print("Running in interactive mode. To disable this, provide the --non-interactive flag.")
|
||||
|
||||
# Setup Github and JIRA clients
|
||||
jira_options = { "server": JIRA_API_BASE }
|
||||
jira_client = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD))
|
||||
jira_options = {"server": JIRA_API_BASE}
|
||||
jira_client = JIRA(options=jira_options, basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
|
||||
github_client = Github(GITHUB_API_TOKEN)
|
||||
|
||||
# Load known author translations that are cached locally
|
||||
|
@ -70,7 +70,8 @@ known_translations = {}
|
|||
known_translations_file_name = "known_translations"
|
||||
known_translations_file = open(known_translations_file_name, "r")
|
||||
for line in known_translations_file:
|
||||
if line.startswith("#"): continue
|
||||
if line.startswith("#"):
|
||||
continue
|
||||
[old_name, new_name] = line.strip("\n").split(" - ")
|
||||
known_translations[old_name] = new_name
|
||||
known_translations_file.close()
|
||||
|
@ -91,6 +92,8 @@ known_translations_file = open(known_translations_file_name, "a")
|
|||
# (NOT_FOUND, "No assignee found for SPARK-1763")
|
||||
# ]
|
||||
NOT_FOUND = "Not found"
|
||||
|
||||
|
||||
def generate_candidates(author, issues):
|
||||
candidates = []
|
||||
# First check for full name of Github user
|
||||
|
@ -121,9 +124,11 @@ def generate_candidates(author, issues):
|
|||
user_name = jira_assignee.name
|
||||
display_name = jira_assignee.displayName
|
||||
if display_name:
|
||||
candidates.append((display_name, "Full name of %s assignee %s" % (issue, user_name)))
|
||||
candidates.append(
|
||||
(display_name, "Full name of %s assignee %s" % (issue, user_name)))
|
||||
else:
|
||||
candidates.append((NOT_FOUND, "No full name found for %s assignee %" % (issue, user_name)))
|
||||
candidates.append(
|
||||
(NOT_FOUND, "No full name found for %s assignee %s" % (issue, user_name)))
|
||||
else:
|
||||
candidates.append((NOT_FOUND, "No assignee found for %s" % issue))
|
||||
# Guard against special characters in candidate names
|
||||
|
@ -143,18 +148,18 @@ def generate_candidates(author, issues):
|
|||
# select from this list. Additionally, the user may also choose to enter a custom name.
|
||||
# In non-interactive mode, this script picks the first valid author name from the candidates
|
||||
# If no such name exists, the original name is used (without the JIRA numbers).
|
||||
print "\n========================== Translating contributor list =========================="
|
||||
print("\n========================== Translating contributor list ==========================")
|
||||
lines = contributors_file.readlines()
|
||||
contributions = []
|
||||
for i, line in enumerate(lines):
|
||||
# It is possible that a line in the contributor file only has the github name, e.g. yhuai.
|
||||
# So, we need a strip() to remove the newline.
|
||||
temp_author = line.strip(" * ").split(" -- ")[0].strip()
|
||||
print "Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines))
|
||||
print("Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines)))
|
||||
if not temp_author:
|
||||
error_msg = " ERROR: Expected the following format \" * <author> -- <contributions>\"\n"
|
||||
error_msg += " ERROR: Actual = %s" % line
|
||||
print error_msg
|
||||
print(error_msg)
|
||||
warnings.append(error_msg)
|
||||
contributions.append(line)
|
||||
continue
|
||||
|
@ -175,8 +180,8 @@ for i, line in enumerate(lines):
|
|||
# [3] andrewor14 - Raw Github username
|
||||
# [4] Custom
|
||||
candidate_names = []
|
||||
bad_prompts = [] # Prompts that can't actually be selected; print these first.
|
||||
good_prompts = [] # Prompts that contain valid choices
|
||||
bad_prompts = [] # Prompts that can't actually be selected; print these first.
|
||||
good_prompts = [] # Prompts that contain valid choices
|
||||
for candidate, source in candidates:
|
||||
if candidate == NOT_FOUND:
|
||||
bad_prompts.append(" [X] %s" % source)
|
||||
|
@ -186,13 +191,16 @@ for i, line in enumerate(lines):
|
|||
good_prompts.append(" [%d] %s - %s" % (index, candidate, source))
|
||||
raw_index = len(candidate_names)
|
||||
custom_index = len(candidate_names) + 1
|
||||
for p in bad_prompts: print p
|
||||
if bad_prompts: print " ---"
|
||||
for p in good_prompts: print p
|
||||
for p in bad_prompts:
|
||||
print(p)
|
||||
if bad_prompts:
|
||||
print(" ---")
|
||||
for p in good_prompts:
|
||||
print(p)
|
||||
# In interactive mode, additionally provide "custom" option and await user response
|
||||
if INTERACTIVE_MODE:
|
||||
print " [%d] %s - Raw Github username" % (raw_index, author)
|
||||
print " [%d] Custom" % custom_index
|
||||
print(" [%d] %s - Raw Github username" % (raw_index, author))
|
||||
print(" [%d] Custom" % custom_index)
|
||||
response = raw_input(" Your choice: ")
|
||||
last_index = custom_index
|
||||
while not response.isdigit() or int(response) > last_index:
|
||||
|
@ -204,8 +212,8 @@ for i, line in enumerate(lines):
|
|||
new_author = candidate_names[response]
|
||||
# In non-interactive mode, just pick the first candidate
|
||||
else:
|
||||
valid_candidate_names = [name for name, _ in candidates\
|
||||
if is_valid_author(name) and name != NOT_FOUND]
|
||||
valid_candidate_names = [name for name, _ in candidates
|
||||
if is_valid_author(name) and name != NOT_FOUND]
|
||||
if valid_candidate_names:
|
||||
new_author = valid_candidate_names[0]
|
||||
# Finally, capitalize the author and replace the original one with it
|
||||
|
@ -213,17 +221,20 @@ for i, line in enumerate(lines):
|
|||
if is_valid_author(new_author):
|
||||
new_author = capitalize_author(new_author)
|
||||
else:
|
||||
warnings.append("Unable to find a valid name %s for author %s" % (author, temp_author))
|
||||
print " * Replacing %s with %s" % (author, new_author)
|
||||
# If we are in interactive mode, prompt the user whether we want to remember this new mapping
|
||||
if INTERACTIVE_MODE and\
|
||||
author not in known_translations and\
|
||||
yesOrNoPrompt(" Add mapping %s -> %s to known translations file?" % (author, new_author)):
|
||||
warnings.append(
|
||||
"Unable to find a valid name %s for author %s" % (author, temp_author))
|
||||
print(" * Replacing %s with %s" % (author, new_author))
|
||||
# If we are in interactive mode, prompt the user whether we want to remember this new
|
||||
# mapping
|
||||
if INTERACTIVE_MODE and \
|
||||
author not in known_translations and \
|
||||
yesOrNoPrompt(
|
||||
" Add mapping %s -> %s to known translations file?" % (author, new_author)):
|
||||
known_translations_file.write("%s - %s\n" % (author, new_author))
|
||||
known_translations_file.flush()
|
||||
line = line.replace(temp_author, author)
|
||||
contributions.append(line)
|
||||
print "==================================================================================\n"
|
||||
print("==================================================================================\n")
|
||||
contributors_file.close()
|
||||
known_translations_file.close()
|
||||
|
||||
|
@ -244,12 +255,13 @@ for line in contributions:
|
|||
new_contributors_file.write(line)
|
||||
new_contributors_file.close()
|
||||
|
||||
print "Translated contributors list successfully written to %s!" % new_contributors_file_name
|
||||
print("Translated contributors list successfully written to %s!" % new_contributors_file_name)
|
||||
|
||||
# Log any warnings encountered in the process
|
||||
if warnings:
|
||||
print "\n========== Warnings encountered while translating the contributor list ==========="
|
||||
for w in warnings: print w
|
||||
print "Please manually correct these in the final contributors list at %s." % new_contributors_file_name
|
||||
print "==================================================================================\n"
|
||||
|
||||
print("\n========== Warnings encountered while translating the contributor list ===========")
|
||||
for w in warnings:
|
||||
print(w)
|
||||
print("Please manually correct these in the final contributors list at %s." %
|
||||
new_contributors_file_name)
|
||||
print("==================================================================================\n")
|
||||
|
|
|
@ -27,8 +27,8 @@ import urllib2
|
|||
try:
|
||||
import jira.client
|
||||
except ImportError:
|
||||
print "This tool requires the jira-python library"
|
||||
print "Install using 'sudo pip install jira'"
|
||||
print("This tool requires the jira-python library")
|
||||
print("Install using 'sudo pip install jira'")
|
||||
sys.exit(-1)
|
||||
|
||||
# User facing configs
|
||||
|
@ -48,16 +48,19 @@ MIN_COMMENT_PR = int(os.environ.get("MIN_COMMENT_PR", "1496"))
|
|||
# the state of JIRA's that are tied to PR's we've already looked at.
|
||||
MAX_FILE = ".github-jira-max"
|
||||
|
||||
|
||||
def get_url(url):
|
||||
try:
|
||||
return urllib2.urlopen(url)
|
||||
except urllib2.HTTPError as e:
|
||||
print "Unable to fetch URL, exiting: %s" % url
|
||||
except urllib2.HTTPError:
|
||||
print("Unable to fetch URL, exiting: %s" % url)
|
||||
sys.exit(-1)
|
||||
|
||||
|
||||
def get_json(urllib_response):
|
||||
return json.load(urllib_response)
|
||||
|
||||
|
||||
# Return a list of (JIRA id, JSON dict) tuples:
|
||||
# e.g. [('SPARK-1234', {.. json ..}), ('SPARK-5687', {.. json ..})}
|
||||
def get_jira_prs():
|
||||
|
@ -65,83 +68,86 @@ def get_jira_prs():
|
|||
has_next_page = True
|
||||
page_num = 0
|
||||
while has_next_page:
|
||||
page = get_url(GITHUB_API_BASE + "/pulls?page=%s&per_page=100" % page_num)
|
||||
page_json = get_json(page)
|
||||
page = get_url(GITHUB_API_BASE + "/pulls?page=%s&per_page=100" % page_num)
|
||||
page_json = get_json(page)
|
||||
|
||||
for pull in page_json:
|
||||
jiras = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull['title'])
|
||||
for jira in jiras:
|
||||
result = result + [(jira, pull)]
|
||||
for pull in page_json:
|
||||
jiras = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull['title'])
|
||||
for jira in jiras:
|
||||
result = result + [(jira, pull)]
|
||||
|
||||
# Check if there is another page
|
||||
link_header = filter(lambda k: k.startswith("Link"), page.info().headers)[0]
|
||||
if not "next"in link_header:
|
||||
has_next_page = False
|
||||
else:
|
||||
page_num = page_num + 1
|
||||
# Check if there is another page
|
||||
link_header = filter(lambda k: k.startswith("Link"), page.info().headers)[0]
|
||||
if "next" not in link_header:
|
||||
has_next_page = False
|
||||
else:
|
||||
page_num += 1
|
||||
return result
|
||||
|
||||
|
||||
def set_max_pr(max_val):
|
||||
f = open(MAX_FILE, 'w')
|
||||
f.write("%s" % max_val)
|
||||
f.close()
|
||||
print "Writing largest PR number seen: %s" % max_val
|
||||
print("Writing largest PR number seen: %s" % max_val)
|
||||
|
||||
|
||||
def get_max_pr():
|
||||
if os.path.exists(MAX_FILE):
|
||||
result = int(open(MAX_FILE, 'r').read())
|
||||
print "Read largest PR number previously seen: %s" % result
|
||||
print("Read largest PR number previously seen: %s" % result)
|
||||
return result
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
jira_client = jira.client.JIRA({'server': JIRA_API_BASE},
|
||||
basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
|
||||
basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
|
||||
|
||||
jira_prs = get_jira_prs()
|
||||
|
||||
previous_max = get_max_pr()
|
||||
print "Retrieved %s JIRA PR's from Github" % len(jira_prs)
|
||||
print("Retrieved %s JIRA PR's from Github" % len(jira_prs))
|
||||
jira_prs = [(k, v) for k, v in jira_prs if int(v['number']) > previous_max]
|
||||
print "%s PR's remain after excluding visted ones" % len(jira_prs)
|
||||
print("%s PR's remain after excluding visted ones" % len(jira_prs))
|
||||
|
||||
num_updates = 0
|
||||
considered = []
|
||||
for issue, pr in sorted(jira_prs, key=lambda (k, v): int(v['number'])):
|
||||
for issue, pr in sorted(jira_prs, key=lambda kv: int(kv[1]['number'])):
|
||||
if num_updates >= MAX_UPDATES:
|
||||
break
|
||||
break
|
||||
pr_num = int(pr['number'])
|
||||
|
||||
print "Checking issue %s" % issue
|
||||
print("Checking issue %s" % issue)
|
||||
considered = considered + [pr_num]
|
||||
|
||||
url = pr['html_url']
|
||||
title = "[Github] Pull Request #%s (%s)" % (pr['number'], pr['user']['login'])
|
||||
title = "[Github] Pull Request #%s (%s)" % (pr['number'], pr['user']['login'])
|
||||
try:
|
||||
existing_links = map(lambda l: l.raw['object']['url'], jira_client.remote_links(issue))
|
||||
existing_links = map(lambda l: l.raw['object']['url'], jira_client.remote_links(issue))
|
||||
except:
|
||||
print "Failure reading JIRA %s (does it exist?)" % issue
|
||||
print sys.exc_info()[0]
|
||||
continue
|
||||
print("Failure reading JIRA %s (does it exist?)" % issue)
|
||||
print(sys.exc_info()[0])
|
||||
continue
|
||||
|
||||
if url in existing_links:
|
||||
continue
|
||||
|
||||
icon = {"title": "Pull request #%s" % pr['number'],
|
||||
"url16x16": "https://assets-cdn.github.com/favicon.ico"}
|
||||
icon = {"title": "Pull request #%s" % pr['number'],
|
||||
"url16x16": "https://assets-cdn.github.com/favicon.ico"}
|
||||
destination = {"title": title, "url": url, "icon": icon}
|
||||
# For all possible fields see:
|
||||
# https://developer.atlassian.com/display/JIRADEV/Fields+in+Remote+Issue+Links
|
||||
# application = {"name": "Github pull requests", "type": "org.apache.spark.jira.github"}
|
||||
# https://developer.atlassian.com/display/JIRADEV/Fields+in+Remote+Issue+Links
|
||||
# application = {"name": "Github pull requests", "type": "org.apache.spark.jira.github"}
|
||||
jira_client.add_remote_link(issue, destination)
|
||||
|
||||
|
||||
comment = "User '%s' has created a pull request for this issue:" % pr['user']['login']
|
||||
comment = comment + ("\n%s" % pr['html_url'])
|
||||
comment += "\n%s" % pr['html_url']
|
||||
if pr_num >= MIN_COMMENT_PR:
|
||||
jira_client.add_comment(issue, comment)
|
||||
|
||||
print "Added link %s <-> PR #%s" % (issue, pr['number'])
|
||||
num_updates = num_updates + 1
|
||||
|
||||
print("Added link %s <-> PR #%s" % (issue, pr['number']))
|
||||
num_updates += 1
|
||||
|
||||
if len(considered) > 0:
|
||||
set_max_pr(max(considered))
|
||||
|
|
|
@ -19,10 +19,8 @@
|
|||
|
||||
SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
|
||||
SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
PATHS_TO_CHECK="./python/pyspark/ ./examples/src/main/python/ ./dev/sparktestsupport"
|
||||
# TODO: fix pep8 errors with the rest of the Python scripts under dev
|
||||
PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/run-tests.py ./python/*.py ./dev/run-tests-jenkins.py"
|
||||
PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/pip-sanity-check.py"
|
||||
# Exclude auto-geneated configuration file.
|
||||
PATHS_TO_CHECK="$( cd "$SPARK_ROOT_DIR" && find . -name "*.py" -not -path "*python/docs/conf.py" )"
|
||||
PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
|
||||
PYLINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/pylint-report.txt"
|
||||
PYLINT_INSTALL_INFO="$SPARK_ROOT_DIR/dev/pylint-info.txt"
|
||||
|
|
|
@ -70,22 +70,22 @@ def get_json(url):
|
|||
return json.load(urllib2.urlopen(request))
|
||||
except urllib2.HTTPError as e:
|
||||
if "X-RateLimit-Remaining" in e.headers and e.headers["X-RateLimit-Remaining"] == '0':
|
||||
print "Exceeded the GitHub API rate limit; see the instructions in " + \
|
||||
"dev/merge_spark_pr.py to configure an OAuth token for making authenticated " + \
|
||||
"GitHub requests."
|
||||
print("Exceeded the GitHub API rate limit; see the instructions in " +
|
||||
"dev/merge_spark_pr.py to configure an OAuth token for making authenticated " +
|
||||
"GitHub requests.")
|
||||
else:
|
||||
print "Unable to fetch URL, exiting: %s" % url
|
||||
print("Unable to fetch URL, exiting: %s" % url)
|
||||
sys.exit(-1)
|
||||
|
||||
|
||||
def fail(msg):
|
||||
print msg
|
||||
print(msg)
|
||||
clean_up()
|
||||
sys.exit(-1)
|
||||
|
||||
|
||||
def run_cmd(cmd):
|
||||
print cmd
|
||||
print(cmd)
|
||||
if isinstance(cmd, list):
|
||||
return subprocess.check_output(cmd)
|
||||
else:
|
||||
|
@ -97,14 +97,15 @@ def continue_maybe(prompt):
|
|||
if result.lower() != "y":
|
||||
fail("Okay, exiting")
|
||||
|
||||
|
||||
def clean_up():
|
||||
print "Restoring head pointer to %s" % original_head
|
||||
print("Restoring head pointer to %s" % original_head)
|
||||
run_cmd("git checkout %s" % original_head)
|
||||
|
||||
branches = run_cmd("git branch").replace(" ", "").split("\n")
|
||||
|
||||
for branch in filter(lambda x: x.startswith(BRANCH_PREFIX), branches):
|
||||
print "Deleting local branch %s" % branch
|
||||
print("Deleting local branch %s" % branch)
|
||||
run_cmd("git branch -D %s" % branch)
|
||||
|
||||
|
||||
|
@ -246,9 +247,9 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
|
|||
|
||||
if cur_status == "Resolved" or cur_status == "Closed":
|
||||
fail("JIRA issue %s already has status '%s'" % (jira_id, cur_status))
|
||||
print ("=== JIRA %s ===" % jira_id)
|
||||
print ("summary\t\t%s\nassignee\t%s\nstatus\t\t%s\nurl\t\t%s/%s\n" % (
|
||||
cur_summary, cur_assignee, cur_status, JIRA_BASE, jira_id))
|
||||
print("=== JIRA %s ===" % jira_id)
|
||||
print("summary\t\t%s\nassignee\t%s\nstatus\t\t%s\nurl\t\t%s/%s\n" %
|
||||
(cur_summary, cur_assignee, cur_status, JIRA_BASE, jira_id))
|
||||
|
||||
versions = asf_jira.project_versions("SPARK")
|
||||
versions = sorted(versions, key=lambda x: x.name, reverse=True)
|
||||
|
@ -282,10 +283,10 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
|
|||
resolve = filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id))[0]
|
||||
resolution = filter(lambda r: r.raw['name'] == "Fixed", asf_jira.resolutions())[0]
|
||||
asf_jira.transition_issue(
|
||||
jira_id, resolve["id"], fixVersions = jira_fix_versions,
|
||||
comment = comment, resolution = {'id': resolution.raw['id']})
|
||||
jira_id, resolve["id"], fixVersions=jira_fix_versions,
|
||||
comment=comment, resolution={'id': resolution.raw['id']})
|
||||
|
||||
print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
|
||||
print("Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions))
|
||||
|
||||
|
||||
def resolve_jira_issues(title, merge_branches, comment):
|
||||
|
@ -300,23 +301,29 @@ def resolve_jira_issues(title, merge_branches, comment):
|
|||
def standardize_jira_ref(text):
|
||||
"""
|
||||
Standardize the [SPARK-XXXXX] [MODULE] prefix
|
||||
Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[SPARK-XXX][MLLIB] Issue"
|
||||
Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to
|
||||
"[SPARK-XXX][MLLIB] Issue"
|
||||
|
||||
>>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
|
||||
>>> standardize_jira_ref(
|
||||
... "[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
|
||||
'[SPARK-5821][SQL] ParquetRelation2 CTAS should check if delete is successful'
|
||||
>>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
|
||||
>>> standardize_jira_ref(
|
||||
... "[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
|
||||
'[SPARK-4123][PROJECT INFRA][WIP] Show new dependencies added in pull requests'
|
||||
>>> standardize_jira_ref("[MLlib] Spark 5954: Top by key")
|
||||
'[SPARK-5954][MLLIB] Top by key'
|
||||
>>> standardize_jira_ref("[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl")
|
||||
'[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl'
|
||||
>>> standardize_jira_ref("SPARK-1094 Support MiMa for reporting binary compatibility accross versions.")
|
||||
>>> standardize_jira_ref(
|
||||
... "SPARK-1094 Support MiMa for reporting binary compatibility accross versions.")
|
||||
'[SPARK-1094] Support MiMa for reporting binary compatibility accross versions.'
|
||||
>>> standardize_jira_ref("[WIP] [SPARK-1146] Vagrant support for Spark")
|
||||
'[SPARK-1146][WIP] Vagrant support for Spark'
|
||||
>>> standardize_jira_ref("SPARK-1032. If Yarn app fails before registering, app master stays aroun...")
|
||||
>>> standardize_jira_ref(
|
||||
... "SPARK-1032. If Yarn app fails before registering, app master stays aroun...")
|
||||
'[SPARK-1032] If Yarn app fails before registering, app master stays aroun...'
|
||||
>>> standardize_jira_ref("[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.")
|
||||
>>> standardize_jira_ref(
|
||||
... "[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.")
|
||||
'[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.'
|
||||
>>> standardize_jira_ref("Additional information for users building from source code")
|
||||
'Additional information for users building from source code'
|
||||
|
@ -350,7 +357,8 @@ def standardize_jira_ref(text):
|
|||
# Assemble full text (JIRA ref(s), module(s), remaining text)
|
||||
clean_text = ''.join(jira_refs).strip() + ''.join(components).strip() + " " + text.strip()
|
||||
|
||||
# Replace multiple spaces with a single space, e.g. if no jira refs and/or components were included
|
||||
# Replace multiple spaces with a single space, e.g. if no jira refs and/or components were
|
||||
# included
|
||||
clean_text = re.sub(r'\s+', ' ', clean_text.strip())
|
||||
|
||||
return clean_text
|
||||
|
@ -385,17 +393,17 @@ def main():
|
|||
# Decide whether to use the modified title or not
|
||||
modified_title = standardize_jira_ref(pr["title"])
|
||||
if modified_title != pr["title"]:
|
||||
print "I've re-written the title as follows to match the standard format:"
|
||||
print "Original: %s" % pr["title"]
|
||||
print "Modified: %s" % modified_title
|
||||
print("I've re-written the title as follows to match the standard format:")
|
||||
print("Original: %s" % pr["title"])
|
||||
print("Modified: %s" % modified_title)
|
||||
result = raw_input("Would you like to use the modified title? (y/n): ")
|
||||
if result.lower() == "y":
|
||||
title = modified_title
|
||||
print "Using modified title:"
|
||||
print("Using modified title:")
|
||||
else:
|
||||
title = pr["title"]
|
||||
print "Using original title:"
|
||||
print title
|
||||
print("Using original title:")
|
||||
print(title)
|
||||
else:
|
||||
title = pr["title"]
|
||||
|
||||
|
@ -414,13 +422,13 @@ def main():
|
|||
merge_hash = merge_commits[0]["commit_id"]
|
||||
message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"]
|
||||
|
||||
print "Pull request %s has already been merged, assuming you want to backport" % pr_num
|
||||
print("Pull request %s has already been merged, assuming you want to backport" % pr_num)
|
||||
commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify',
|
||||
"%s^{commit}" % merge_hash]).strip() != ""
|
||||
"%s^{commit}" % merge_hash]).strip() != ""
|
||||
if not commit_is_downloaded:
|
||||
fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num)
|
||||
|
||||
print "Found commit %s:\n%s" % (merge_hash, message)
|
||||
print("Found commit %s:\n%s" % (merge_hash, message))
|
||||
cherry_pick(pr_num, merge_hash, latest_branch)
|
||||
sys.exit(0)
|
||||
|
||||
|
@ -429,9 +437,9 @@ def main():
|
|||
"Continue? (experts only!)"
|
||||
continue_maybe(msg)
|
||||
|
||||
print ("\n=== Pull Request #%s ===" % pr_num)
|
||||
print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % (
|
||||
title, pr_repo_desc, target_ref, url))
|
||||
print("\n=== Pull Request #%s ===" % pr_num)
|
||||
print("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" %
|
||||
(title, pr_repo_desc, target_ref, url))
|
||||
continue_maybe("Proceed with merging pull request #%s?" % pr_num)
|
||||
|
||||
merged_refs = [target_ref]
|
||||
|
@ -445,14 +453,15 @@ def main():
|
|||
if JIRA_IMPORTED:
|
||||
if JIRA_USERNAME and JIRA_PASSWORD:
|
||||
continue_maybe("Would you like to update an associated JIRA?")
|
||||
jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
|
||||
jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % \
|
||||
(pr_num, GITHUB_BASE, pr_num)
|
||||
resolve_jira_issues(title, merged_refs, jira_comment)
|
||||
else:
|
||||
print "JIRA_USERNAME and JIRA_PASSWORD not set"
|
||||
print "Exiting without trying to close the associated JIRA."
|
||||
print("JIRA_USERNAME and JIRA_PASSWORD not set")
|
||||
print("Exiting without trying to close the associated JIRA.")
|
||||
else:
|
||||
print "Could not find jira-python library. Run 'sudo pip install jira' to install."
|
||||
print "Exiting without trying to close the associated JIRA."
|
||||
print("Could not find jira-python library. Run 'sudo pip install jira' to install.")
|
||||
print("Exiting without trying to close the associated JIRA.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
|
|
|
@ -44,7 +44,8 @@ if __name__ == "__main__":
|
|||
# Evaluate model on test instances and compute test error
|
||||
predictions = model.predict(testData.map(lambda x: x.features))
|
||||
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
|
||||
testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
|
||||
testErr = labelsAndPredictions.filter(
|
||||
lambda lp: lp[0] != lp[1]).count() / float(testData.count())
|
||||
print('Test Error = ' + str(testErr))
|
||||
print('Learned classification tree model:')
|
||||
print(model.toDebugString())
|
||||
|
|
|
@ -44,7 +44,7 @@ if __name__ == "__main__":
|
|||
# Evaluate model on test instances and compute test error
|
||||
predictions = model.predict(testData.map(lambda x: x.features))
|
||||
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
|
||||
testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\
|
||||
testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
|
||||
float(testData.count())
|
||||
print('Test Mean Squared Error = ' + str(testMSE))
|
||||
print('Learned regression tree model:')
|
||||
|
|
|
@ -43,7 +43,8 @@ if __name__ == "__main__":
|
|||
# Evaluate model on test instances and compute test error
|
||||
predictions = model.predict(testData.map(lambda x: x.features))
|
||||
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
|
||||
testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
|
||||
testErr = labelsAndPredictions.filter(
|
||||
lambda lp: lp[0] != lp[1]).count() / float(testData.count())
|
||||
print('Test Error = ' + str(testErr))
|
||||
print('Learned classification GBT model:')
|
||||
print(model.toDebugString())
|
||||
|
|
|
@ -43,7 +43,7 @@ if __name__ == "__main__":
|
|||
# Evaluate model on test instances and compute test error
|
||||
predictions = model.predict(testData.map(lambda x: x.features))
|
||||
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
|
||||
testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\
|
||||
testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
|
||||
float(testData.count())
|
||||
print('Test Mean Squared Error = ' + str(testMSE))
|
||||
print('Learned regression GBT model:')
|
||||
|
|
|
@ -44,7 +44,7 @@ if __name__ == "__main__":
|
|||
# Evaluate the model on training data
|
||||
valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
|
||||
MSE = valuesAndPreds \
|
||||
.map(lambda (v, p): (v - p)**2) \
|
||||
.map(lambda vp: (vp[0] - vp[1])**2) \
|
||||
.reduce(lambda x, y: x + y) / valuesAndPreds.count()
|
||||
print("Mean Squared Error = " + str(MSE))
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ if __name__ == "__main__":
|
|||
|
||||
# Evaluating the model on training data
|
||||
labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
|
||||
trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
|
||||
trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(parsedData.count())
|
||||
print("Training Error = " + str(trainErr))
|
||||
|
||||
# Save and load model
|
||||
|
|
|
@ -50,7 +50,7 @@ if __name__ == "__main__":
|
|||
|
||||
# Make prediction and test accuracy.
|
||||
predictionAndLabel = test.map(lambda p: (model.predict(p.features), p.label))
|
||||
accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
|
||||
accuracy = 1.0 * predictionAndLabel.filter(lambda pl: pl[0] == pl[1]).count() / test.count()
|
||||
print('model accuracy {}'.format(accuracy))
|
||||
|
||||
# Save and load model
|
||||
|
@ -59,7 +59,7 @@ if __name__ == "__main__":
|
|||
model.save(sc, output_dir)
|
||||
sameModel = NaiveBayesModel.load(sc, output_dir)
|
||||
predictionAndLabel = test.map(lambda p: (sameModel.predict(p.features), p.label))
|
||||
accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
|
||||
accuracy = 1.0 * predictionAndLabel.filter(lambda pl: pl[0] == pl[1]).count() / test.count()
|
||||
print('sameModel accuracy {}'.format(accuracy))
|
||||
|
||||
# $example off$
|
||||
|
|
|
@ -45,7 +45,8 @@ if __name__ == "__main__":
|
|||
# Evaluate model on test instances and compute test error
|
||||
predictions = model.predict(testData.map(lambda x: x.features))
|
||||
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
|
||||
testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
|
||||
testErr = labelsAndPredictions.filter(
|
||||
lambda lp: lp[0] != lp[1]).count() / float(testData.count())
|
||||
print('Test Error = ' + str(testErr))
|
||||
print('Learned classification forest model:')
|
||||
print(model.toDebugString())
|
||||
|
|
|
@ -45,7 +45,7 @@ if __name__ == "__main__":
|
|||
# Evaluate model on test instances and compute test error
|
||||
predictions = model.predict(testData.map(lambda x: x.features))
|
||||
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
|
||||
testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\
|
||||
testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
|
||||
float(testData.count())
|
||||
print('Test Mean Squared Error = ' + str(testMSE))
|
||||
print('Learned regression forest model:')
|
||||
|
|
|
@ -38,7 +38,7 @@ if __name__ == "__main__":
|
|||
|
||||
# Evaluating the model on training data
|
||||
labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
|
||||
trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
|
||||
trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(parsedData.count())
|
||||
print("Training Error = " + str(trainErr))
|
||||
|
||||
# Save and load model
|
||||
|
|
|
@ -67,8 +67,8 @@ if __name__ == "__main__":
|
|||
# with the static RDD inside the transform() method and then multiplying
|
||||
# the frequency of the words by its sentiment value
|
||||
happiest_words = word_counts.transform(lambda rdd: word_sentiments.join(rdd)) \
|
||||
.map(lambda (word, tuple): (word, float(tuple[0]) * tuple[1])) \
|
||||
.map(lambda (word, happiness): (happiness, word)) \
|
||||
.map(lambda word_tuples: (word_tuples[0], float(word_tuples[1][0]) * word_tuples[1][1])) \
|
||||
.map(lambda word_happiness: (word_happiness[1], word_happiness[0])) \
|
||||
.transform(lambda rdd: rdd.sortByKey(False))
|
||||
|
||||
happiest_words.foreachRDD(print_happiest_words)
|
||||
|
|
|
@ -9,6 +9,7 @@ RULES = (
|
|||
('pyspark.rdd.RDD', 'RDD'),
|
||||
)
|
||||
|
||||
|
||||
def _convert_epytext(line):
|
||||
"""
|
||||
>>> _convert_epytext("L{A}")
|
||||
|
@ -19,9 +20,11 @@ def _convert_epytext(line):
|
|||
line = re.sub(p, sub, line)
|
||||
return line
|
||||
|
||||
|
||||
def _process_docstring(app, what, name, obj, options, lines):
|
||||
for i in range(len(lines)):
|
||||
lines[i] = _convert_epytext(lines[i])
|
||||
|
||||
|
||||
def setup(app):
|
||||
app.connect("autodoc-process-docstring", _process_docstring)
|
||||
|
|
|
@ -16,14 +16,14 @@
|
|||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
import sys, re
|
||||
import datetime
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
import os
|
||||
|
||||
table_name=None
|
||||
if os.environ.has_key('hive_streaming_tablename'):
|
||||
table_name=os.environ['hive_streaming_tablename']
|
||||
table_name = None
|
||||
if os.environ in 'hive_streaming_tablename':
|
||||
table_name = os.environ['hive_streaming_tablename']
|
||||
|
||||
for line in sys.stdin:
|
||||
print line
|
||||
print >> sys.stderr, "dummy"
|
||||
print(line)
|
||||
print("dummy", file=sys.stderr)
|
||||
|
|
|
@ -19,6 +19,6 @@
|
|||
import sys
|
||||
|
||||
for line in sys.stdin:
|
||||
print line
|
||||
print(line)
|
||||
|
||||
sys.exit(1)
|
||||
|
|
|
@ -19,6 +19,5 @@
|
|||
import sys
|
||||
|
||||
for line in sys.stdin:
|
||||
print "1\\\\\\t2"
|
||||
print "1\\\\\\\\t2"
|
||||
|
||||
print("1\\\\\\t2")
|
||||
print("1\\\\\\\\t2")
|
||||
|
|
|
@ -19,9 +19,9 @@
|
|||
import sys
|
||||
|
||||
for i in xrange(50):
|
||||
for j in xrange(5):
|
||||
for k in xrange(20022):
|
||||
print 20000 * i + k
|
||||
for j in xrange(5):
|
||||
for k in xrange(20022):
|
||||
print(20000 * i + k)
|
||||
|
||||
for line in sys.stdin:
|
||||
pass
|
||||
pass
|
||||
|
|
|
@ -19,5 +19,4 @@
|
|||
import sys
|
||||
|
||||
for line in sys.stdin:
|
||||
print "1\\\\r2"
|
||||
|
||||
print("1\\\\r2")
|
||||
|
|
|
@ -19,5 +19,4 @@
|
|||
import sys
|
||||
|
||||
for line in sys.stdin:
|
||||
print "1\\\\n2"
|
||||
|
||||
print("1\\\\n2")
|
||||
|
|
|
@ -19,5 +19,4 @@
|
|||
import sys
|
||||
|
||||
for line in sys.stdin:
|
||||
print "1\\\\t2"
|
||||
|
||||
print("1\\\\t2")
|
||||
|
|
|
@ -21,10 +21,10 @@ import re
|
|||
line = sys.stdin.readline()
|
||||
x = 1
|
||||
while line:
|
||||
tem = sys.stdin.readline()
|
||||
if line == tem:
|
||||
x = x + 1
|
||||
else:
|
||||
print str(x).strip()+'\t'+re.sub('\t','_',line.strip())
|
||||
line = tem
|
||||
x = 1
|
||||
tem = sys.stdin.readline()
|
||||
if line == tem:
|
||||
x += 1
|
||||
else:
|
||||
print(str(x).strip()+'\t'+re.sub('\t', '_', line.strip()))
|
||||
line = tem
|
||||
x = 1
|
||||
|
|
|
@ -19,6 +19,6 @@
|
|||
import sys
|
||||
|
||||
for line in sys.stdin:
|
||||
print "1\\n2"
|
||||
print "1\\r2"
|
||||
print "1\\t2"
|
||||
print("1\\n2")
|
||||
print("1\\r2")
|
||||
print("1\\t2")
|
||||
|
|
Loading…
Reference in a new issue