[SPARK-19002][BUILD][PYTHON] Check pep8 against all Python scripts

## What changes were proposed in this pull request?

This PR proposes to check pep8 against all other Python scripts and fix the errors as below:

```bash
./dev/create-release/generate-contributors.py
./dev/create-release/releaseutils.py
./dev/create-release/translate-contributors.py
./dev/lint-python
./python/docs/epytext.py
./examples/src/main/python/mllib/decision_tree_classification_example.py
./examples/src/main/python/mllib/decision_tree_regression_example.py
./examples/src/main/python/mllib/gradient_boosting_classification_example.py
./examples/src/main/python/mllib/gradient_boosting_regression_example.py
./examples/src/main/python/mllib/linear_regression_with_sgd_example.py
./examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py
./examples/src/main/python/mllib/naive_bayes_example.py
./examples/src/main/python/mllib/random_forest_classification_example.py
./examples/src/main/python/mllib/random_forest_regression_example.py
./examples/src/main/python/mllib/svm_with_sgd_example.py
./examples/src/main/python/streaming/network_wordjoinsentiments.py
./sql/hive/src/test/resources/data/scripts/cat.py
./sql/hive/src/test/resources/data/scripts/cat_error.py
./sql/hive/src/test/resources/data/scripts/doubleescapedtab.py
./sql/hive/src/test/resources/data/scripts/dumpdata_script.py
./sql/hive/src/test/resources/data/scripts/escapedcarriagereturn.py
./sql/hive/src/test/resources/data/scripts/escapednewline.py
./sql/hive/src/test/resources/data/scripts/escapedtab.py
./sql/hive/src/test/resources/data/scripts/input20_script.py
./sql/hive/src/test/resources/data/scripts/newline.py
```

## How was this patch tested?

- `./python/docs/epytext.py`

  ```bash
  cd ./python/docs $$ make html
  ```

- pep8 check (Python 2.7 / Python 3.3.6)

  ```
  ./dev/lint-python
  ```

- `./dev/merge_spark_pr.py` (Python 2.7 only / Python 3.3.6 not working)

  ```bash
  python -m doctest -v ./dev/merge_spark_pr.py
  ```

- `./dev/create-release/releaseutils.py` `./dev/create-release/generate-contributors.py` `./dev/create-release/translate-contributors.py` (Python 2.7 only / Python 3.3.6 not working)

  ```bash
  python generate-contributors.py
  python translate-contributors.py
  ```

- Examples (Python 2.7 / Python 3.3.6)

  ```bash
  ./bin/spark-submit examples/src/main/python/mllib/decision_tree_classification_example.py
  ./bin/spark-submit examples/src/main/python/mllib/decision_tree_regression_example.py
  ./bin/spark-submit examples/src/main/python/mllib/gradient_boosting_classification_example.py
  ./bin/spark-submit examples/src/main/python/mllib/gradient_boosting_regression_example.p
  ./bin/spark-submit examples/src/main/python/mllib/random_forest_classification_example.py
  ./bin/spark-submit examples/src/main/python/mllib/random_forest_regression_example.py
  ```

- Examples (Python 2.7 only / Python 3.3.6 not working)
  ```
  ./bin/spark-submit examples/src/main/python/mllib/linear_regression_with_sgd_example.py
  ./bin/spark-submit examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py
  ./bin/spark-submit examples/src/main/python/mllib/naive_bayes_example.py
  ./bin/spark-submit examples/src/main/python/mllib/svm_with_sgd_example.py
  ```

- `sql/hive/src/test/resources/data/scripts/*.py` (Python 2.7 / Python 3.3.6 within suggested changes)

  Manually tested only changed ones.

- `./dev/github_jira_sync.py` (Python 2.7 only / Python 3.3.6 not working)

  Manually tested this after disabling actually adding comments and links.

And also via Jenkins tests.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #16405 from HyukjinKwon/minor-pep8.
This commit is contained in:
hyukjinkwon 2017-01-02 15:23:19 +00:00 committed by Sean Owen
parent f1330b1d9e
commit 46b2126024
No known key found for this signature in database
GPG key ID: BEB3956D6717BDDC
27 changed files with 326 additions and 233 deletions

View file

@ -33,14 +33,14 @@ PREVIOUS_RELEASE_TAG = os.environ.get("PREVIOUS_RELEASE_TAG", "v1.1.0")
while not tag_exists(RELEASE_TAG):
RELEASE_TAG = raw_input("Please provide a valid release tag: ")
while not tag_exists(PREVIOUS_RELEASE_TAG):
print "Please specify the previous release tag."
PREVIOUS_RELEASE_TAG = raw_input(\
"For instance, if you are releasing v1.2.0, you should specify v1.1.0: ")
print("Please specify the previous release tag.")
PREVIOUS_RELEASE_TAG = raw_input(
"For instance, if you are releasing v1.2.0, you should specify v1.1.0: ")
# Gather commits found in the new tag but not in the old tag.
# This filters commits based on both the git hash and the PR number.
# If either is present in the old tag, then we ignore the commit.
print "Gathering new commits between tags %s and %s" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG)
print("Gathering new commits between tags %s and %s" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG))
release_commits = get_commits(RELEASE_TAG)
previous_release_commits = get_commits(PREVIOUS_RELEASE_TAG)
previous_release_hashes = set()
@ -62,17 +62,20 @@ if not new_commits:
sys.exit("There are no new commits between %s and %s!" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG))
# Prompt the user for confirmation that the commit range is correct
print "\n=================================================================================="
print "JIRA server: %s" % JIRA_API_BASE
print "Release tag: %s" % RELEASE_TAG
print "Previous release tag: %s" % PREVIOUS_RELEASE_TAG
print "Number of commits in this range: %s" % len(new_commits)
print("\n==================================================================================")
print("JIRA server: %s" % JIRA_API_BASE)
print("Release tag: %s" % RELEASE_TAG)
print("Previous release tag: %s" % PREVIOUS_RELEASE_TAG)
print("Number of commits in this range: %s" % len(new_commits))
print
def print_indented(_list):
for x in _list: print " %s" % x
for x in _list:
print(" %s" % x)
if yesOrNoPrompt("Show all commits?"):
print_indented(new_commits)
print "==================================================================================\n"
print("==================================================================================\n")
if not yesOrNoPrompt("Does this look correct?"):
sys.exit("Ok, exiting")
@ -82,45 +85,76 @@ maintenance = []
reverts = []
nojiras = []
filtered_commits = []
def is_release(commit_title):
return re.findall("\[release\]", commit_title.lower()) or\
"preparing spark release" in commit_title.lower() or\
"preparing development version" in commit_title.lower() or\
"CHANGES.txt" in commit_title
return re.findall("\[release\]", commit_title.lower()) or \
"preparing spark release" in commit_title.lower() or \
"preparing development version" in commit_title.lower() or \
"CHANGES.txt" in commit_title
def is_maintenance(commit_title):
return "maintenance" in commit_title.lower() or\
"manually close" in commit_title.lower()
return "maintenance" in commit_title.lower() or \
"manually close" in commit_title.lower()
def has_no_jira(commit_title):
return not re.findall("SPARK-[0-9]+", commit_title.upper())
def is_revert(commit_title):
return "revert" in commit_title.lower()
def is_docs(commit_title):
return re.findall("docs*", commit_title.lower()) or\
"programming guide" in commit_title.lower()
return re.findall("docs*", commit_title.lower()) or \
"programming guide" in commit_title.lower()
for c in new_commits:
t = c.get_title()
if not t: continue
elif is_release(t): releases.append(c)
elif is_maintenance(t): maintenance.append(c)
elif is_revert(t): reverts.append(c)
elif is_docs(t): filtered_commits.append(c) # docs may not have JIRA numbers
elif has_no_jira(t): nojiras.append(c)
else: filtered_commits.append(c)
if not t:
continue
elif is_release(t):
releases.append(c)
elif is_maintenance(t):
maintenance.append(c)
elif is_revert(t):
reverts.append(c)
elif is_docs(t):
filtered_commits.append(c) # docs may not have JIRA numbers
elif has_no_jira(t):
nojiras.append(c)
else:
filtered_commits.append(c)
# Warn against ignored commits
if releases or maintenance or reverts or nojiras:
print "\n=================================================================================="
if releases: print "Found %d release commits" % len(releases)
if maintenance: print "Found %d maintenance commits" % len(maintenance)
if reverts: print "Found %d revert commits" % len(reverts)
if nojiras: print "Found %d commits with no JIRA" % len(nojiras)
print "* Warning: these commits will be ignored.\n"
print("\n==================================================================================")
if releases:
print("Found %d release commits" % len(releases))
if maintenance:
print("Found %d maintenance commits" % len(maintenance))
if reverts:
print("Found %d revert commits" % len(reverts))
if nojiras:
print("Found %d commits with no JIRA" % len(nojiras))
print("* Warning: these commits will be ignored.\n")
if yesOrNoPrompt("Show ignored commits?"):
if releases: print "Release (%d)" % len(releases); print_indented(releases)
if maintenance: print "Maintenance (%d)" % len(maintenance); print_indented(maintenance)
if reverts: print "Revert (%d)" % len(reverts); print_indented(reverts)
if nojiras: print "No JIRA (%d)" % len(nojiras); print_indented(nojiras)
print "==================== Warning: the above commits will be ignored ==================\n"
if releases:
print("Release (%d)" % len(releases))
print_indented(releases)
if maintenance:
print("Maintenance (%d)" % len(maintenance))
print_indented(maintenance)
if reverts:
print("Revert (%d)" % len(reverts))
print_indented(reverts)
if nojiras:
print("No JIRA (%d)" % len(nojiras))
print_indented(nojiras)
print("==================== Warning: the above commits will be ignored ==================\n")
prompt_msg = "%d commits left to process after filtering. Ok to proceed?" % len(filtered_commits)
if not yesOrNoPrompt(prompt_msg):
sys.exit("Ok, exiting.")
@ -147,9 +181,9 @@ invalid_authors = {}
# }
#
author_info = {}
jira_options = { "server": JIRA_API_BASE }
jira_client = JIRA(options = jira_options)
print "\n=========================== Compiling contributor list ==========================="
jira_options = {"server": JIRA_API_BASE}
jira_client = JIRA(options=jira_options)
print("\n=========================== Compiling contributor list ===========================")
for commit in filtered_commits:
_hash = commit.get_hash()
title = commit.get_title()
@ -168,8 +202,9 @@ for commit in filtered_commits:
# Parse components from the commit title, if any
commit_components = find_components(title, _hash)
# Populate or merge an issue into author_info[author]
def populate(issue_type, components):
components = components or [CORE_COMPONENT] # assume core if no components provided
components = components or [CORE_COMPONENT] # assume core if no components provided
if author not in author_info:
author_info[author] = {}
if issue_type not in author_info[author]:
@ -182,17 +217,17 @@ for commit in filtered_commits:
jira_issue = jira_client.issue(issue)
jira_type = jira_issue.fields.issuetype.name
jira_type = translate_issue_type(jira_type, issue, warnings)
jira_components = [translate_component(c.name, _hash, warnings)\
for c in jira_issue.fields.components]
jira_components = [translate_component(c.name, _hash, warnings)
for c in jira_issue.fields.components]
all_components = set(jira_components + commit_components)
populate(jira_type, all_components)
except Exception as e:
print "Unexpected error:", e
print("Unexpected error:", e)
# For docs without an associated JIRA, manually add it ourselves
if is_docs(title) and not issues:
populate("documentation", commit_components)
print " Processed commit %s authored by %s on %s" % (_hash, author, date)
print "==================================================================================\n"
print(" Processed commit %s authored by %s on %s" % (_hash, author, date))
print("==================================================================================\n")
# Write to contributors file ordered by author names
# Each line takes the format " * Author name -- semi-colon delimited contributions"
@ -215,8 +250,8 @@ for author in authors:
# Otherwise, group contributions by issue types instead of modules
# e.g. Bug fixes in MLlib, Core, and Streaming; documentation in YARN
else:
contributions = ["%s in %s" % (issue_type, nice_join(comps)) \
for issue_type, comps in author_info[author].items()]
contributions = ["%s in %s" % (issue_type, nice_join(comps))
for issue_type, comps in author_info[author].items()]
contribution = "; ".join(contributions)
# Do not use python's capitalize() on the whole string to preserve case
assert contribution
@ -226,11 +261,11 @@ for author in authors:
# E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672
if author in invalid_authors and invalid_authors[author]:
author = author + "/" + "/".join(invalid_authors[author])
#line = " * %s -- %s" % (author, contribution)
# line = " * %s -- %s" % (author, contribution)
line = author
contributors_file.write(line + "\n")
contributors_file.close()
print "Contributors list is successfully written to %s!" % contributors_file_name
print("Contributors list is successfully written to %s!" % contributors_file_name)
# Prompt the user to translate author names if necessary
if invalid_authors:
@ -241,8 +276,8 @@ if invalid_authors:
# Log any warnings encountered in the process
if warnings:
print "\n============ Warnings encountered while creating the contributor list ============"
for w in warnings: print w
print "Please correct these in the final contributors list at %s." % contributors_file_name
print "==================================================================================\n"
print("\n============ Warnings encountered while creating the contributor list ============")
for w in warnings:
print(w)
print("Please correct these in the final contributors list at %s." % contributors_file_name)
print("==================================================================================\n")

View file

@ -30,28 +30,29 @@ try:
except ImportError:
from jira.utils import JIRAError
except ImportError:
print "This tool requires the jira-python library"
print "Install using 'sudo pip install jira'"
print("This tool requires the jira-python library")
print("Install using 'sudo pip install jira'")
sys.exit(-1)
try:
from github import Github
from github import GithubException
except ImportError:
print "This tool requires the PyGithub library"
print "Install using 'sudo pip install PyGithub'"
print("This tool requires the PyGithub library")
print("Install using 'sudo pip install PyGithub'")
sys.exit(-1)
try:
import unidecode
except ImportError:
print "This tool requires the unidecode library to decode obscure github usernames"
print "Install using 'sudo pip install unidecode'"
print("This tool requires the unidecode library to decode obscure github usernames")
print("Install using 'sudo pip install unidecode'")
sys.exit(-1)
# Contributors list file name
contributors_file_name = "contributors.txt"
# Prompt the user to answer yes or no until they do so
def yesOrNoPrompt(msg):
response = raw_input("%s [y/n]: " % msg)
@ -59,30 +60,50 @@ def yesOrNoPrompt(msg):
return yesOrNoPrompt(msg)
return response == "y"
# Utility functions run git commands (written with Git 1.8.5)
def run_cmd(cmd): return Popen(cmd, stdout=PIPE).communicate()[0]
def run_cmd_error(cmd): return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1]
def run_cmd(cmd):
return Popen(cmd, stdout=PIPE).communicate()[0]
def run_cmd_error(cmd):
return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1]
def get_date(commit_hash):
return run_cmd(["git", "show", "--quiet", "--pretty=format:%cd", commit_hash])
def tag_exists(tag):
stderr = run_cmd_error(["git", "show", tag])
return "error" not in stderr
# A type-safe representation of a commit
class Commit:
def __init__(self, _hash, author, title, pr_number = None):
def __init__(self, _hash, author, title, pr_number=None):
self._hash = _hash
self.author = author
self.title = title
self.pr_number = pr_number
def get_hash(self): return self._hash
def get_author(self): return self.author
def get_title(self): return self.title
def get_pr_number(self): return self.pr_number
def get_hash(self):
return self._hash
def get_author(self):
return self.author
def get_title(self):
return self.title
def get_pr_number(self):
return self.pr_number
def __str__(self):
closes_pr = "(Closes #%s)" % self.pr_number if self.pr_number else ""
return "%s %s %s %s" % (self._hash, self.author, self.title, closes_pr)
# Return all commits that belong to the specified tag.
#
# Under the hood, this runs a `git log` on that tag and parses the fields
@ -106,8 +127,9 @@ def get_commits(tag):
raw_commits = [c for c in output.split(commit_start_marker) if c]
for commit in raw_commits:
if commit.count(commit_end_marker) != 1:
print "Commit end marker not found in commit: "
for line in commit.split("\n"): print line
print("Commit end marker not found in commit: ")
for line in commit.split("\n"):
print(line)
sys.exit(1)
# Separate commit digest from the body
# From the digest we extract the hash, author and the title
@ -178,6 +200,7 @@ known_components = {
"yarn": "YARN"
}
# Translate issue types using a format appropriate for writing contributions
# If an unknown issue type is encountered, warn the user
def translate_issue_type(issue_type, issue_id, warnings):
@ -188,6 +211,7 @@ def translate_issue_type(issue_type, issue_id, warnings):
warnings.append("Unknown issue type \"%s\" (see %s)" % (issue_type, issue_id))
return issue_type
# Translate component names using a format appropriate for writing contributions
# If an unknown component is encountered, warn the user
def translate_component(component, commit_hash, warnings):
@ -198,20 +222,22 @@ def translate_component(component, commit_hash, warnings):
warnings.append("Unknown component \"%s\" (see %s)" % (component, commit_hash))
return component
# Parse components in the commit message
# The returned components are already filtered and translated
def find_components(commit, commit_hash):
components = re.findall("\[\w*\]", commit.lower())
components = [translate_component(c, commit_hash)\
for c in components if c in known_components]
components = [translate_component(c, commit_hash)
for c in components if c in known_components]
return components
# Join a list of strings in a human-readable manner
# e.g. ["Juice"] -> "Juice"
# e.g. ["Juice", "baby"] -> "Juice and baby"
# e.g. ["Juice", "baby", "moon"] -> "Juice, baby, and moon"
def nice_join(str_list):
str_list = list(str_list) # sometimes it's a set
str_list = list(str_list) # sometimes it's a set
if not str_list:
return ""
elif len(str_list) == 1:
@ -221,6 +247,7 @@ def nice_join(str_list):
else:
return ", ".join(str_list[:-1]) + ", and " + str_list[-1]
# Return the full name of the specified user on Github
# If the user doesn't exist, return None
def get_github_name(author, github_client):
@ -233,6 +260,7 @@ def get_github_name(author, github_client):
raise e
return None
# Return the full name of the specified user on JIRA
# If the user doesn't exist, return None
def get_jira_name(author, jira_client):
@ -245,15 +273,18 @@ def get_jira_name(author, jira_client):
raise e
return None
# Return whether the given name is in the form <First Name><space><Last Name>
def is_valid_author(author):
if not author: return False
if not author:
return False
return " " in author and not re.findall("[0-9]", author)
# Capitalize the first letter of each word in the given author name
def capitalize_author(author):
if not author: return None
if not author:
return None
words = author.split(" ")
words = [w[0].capitalize() + w[1:] for w in words if w]
return " ".join(words)

View file

@ -45,8 +45,8 @@ if not GITHUB_API_TOKEN:
# Write new contributors list to <old_file_name>.final
if not os.path.isfile(contributors_file_name):
print "Contributors file %s does not exist!" % contributors_file_name
print "Have you run ./generate-contributors.py yet?"
print("Contributors file %s does not exist!" % contributors_file_name)
print("Have you run ./generate-contributors.py yet?")
sys.exit(1)
contributors_file = open(contributors_file_name, "r")
warnings = []
@ -58,11 +58,11 @@ if len(sys.argv) > 1:
if "--non-interactive" in options:
INTERACTIVE_MODE = False
if INTERACTIVE_MODE:
print "Running in interactive mode. To disable this, provide the --non-interactive flag."
print("Running in interactive mode. To disable this, provide the --non-interactive flag.")
# Setup Github and JIRA clients
jira_options = { "server": JIRA_API_BASE }
jira_client = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD))
jira_options = {"server": JIRA_API_BASE}
jira_client = JIRA(options=jira_options, basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
github_client = Github(GITHUB_API_TOKEN)
# Load known author translations that are cached locally
@ -70,7 +70,8 @@ known_translations = {}
known_translations_file_name = "known_translations"
known_translations_file = open(known_translations_file_name, "r")
for line in known_translations_file:
if line.startswith("#"): continue
if line.startswith("#"):
continue
[old_name, new_name] = line.strip("\n").split(" - ")
known_translations[old_name] = new_name
known_translations_file.close()
@ -91,6 +92,8 @@ known_translations_file = open(known_translations_file_name, "a")
# (NOT_FOUND, "No assignee found for SPARK-1763")
# ]
NOT_FOUND = "Not found"
def generate_candidates(author, issues):
candidates = []
# First check for full name of Github user
@ -121,9 +124,11 @@ def generate_candidates(author, issues):
user_name = jira_assignee.name
display_name = jira_assignee.displayName
if display_name:
candidates.append((display_name, "Full name of %s assignee %s" % (issue, user_name)))
candidates.append(
(display_name, "Full name of %s assignee %s" % (issue, user_name)))
else:
candidates.append((NOT_FOUND, "No full name found for %s assignee %" % (issue, user_name)))
candidates.append(
(NOT_FOUND, "No full name found for %s assignee %s" % (issue, user_name)))
else:
candidates.append((NOT_FOUND, "No assignee found for %s" % issue))
# Guard against special characters in candidate names
@ -143,18 +148,18 @@ def generate_candidates(author, issues):
# select from this list. Additionally, the user may also choose to enter a custom name.
# In non-interactive mode, this script picks the first valid author name from the candidates
# If no such name exists, the original name is used (without the JIRA numbers).
print "\n========================== Translating contributor list =========================="
print("\n========================== Translating contributor list ==========================")
lines = contributors_file.readlines()
contributions = []
for i, line in enumerate(lines):
# It is possible that a line in the contributor file only has the github name, e.g. yhuai.
# So, we need a strip() to remove the newline.
temp_author = line.strip(" * ").split(" -- ")[0].strip()
print "Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines))
print("Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines)))
if not temp_author:
error_msg = " ERROR: Expected the following format \" * <author> -- <contributions>\"\n"
error_msg += " ERROR: Actual = %s" % line
print error_msg
print(error_msg)
warnings.append(error_msg)
contributions.append(line)
continue
@ -175,8 +180,8 @@ for i, line in enumerate(lines):
# [3] andrewor14 - Raw Github username
# [4] Custom
candidate_names = []
bad_prompts = [] # Prompts that can't actually be selected; print these first.
good_prompts = [] # Prompts that contain valid choices
bad_prompts = [] # Prompts that can't actually be selected; print these first.
good_prompts = [] # Prompts that contain valid choices
for candidate, source in candidates:
if candidate == NOT_FOUND:
bad_prompts.append(" [X] %s" % source)
@ -186,13 +191,16 @@ for i, line in enumerate(lines):
good_prompts.append(" [%d] %s - %s" % (index, candidate, source))
raw_index = len(candidate_names)
custom_index = len(candidate_names) + 1
for p in bad_prompts: print p
if bad_prompts: print " ---"
for p in good_prompts: print p
for p in bad_prompts:
print(p)
if bad_prompts:
print(" ---")
for p in good_prompts:
print(p)
# In interactive mode, additionally provide "custom" option and await user response
if INTERACTIVE_MODE:
print " [%d] %s - Raw Github username" % (raw_index, author)
print " [%d] Custom" % custom_index
print(" [%d] %s - Raw Github username" % (raw_index, author))
print(" [%d] Custom" % custom_index)
response = raw_input(" Your choice: ")
last_index = custom_index
while not response.isdigit() or int(response) > last_index:
@ -204,8 +212,8 @@ for i, line in enumerate(lines):
new_author = candidate_names[response]
# In non-interactive mode, just pick the first candidate
else:
valid_candidate_names = [name for name, _ in candidates\
if is_valid_author(name) and name != NOT_FOUND]
valid_candidate_names = [name for name, _ in candidates
if is_valid_author(name) and name != NOT_FOUND]
if valid_candidate_names:
new_author = valid_candidate_names[0]
# Finally, capitalize the author and replace the original one with it
@ -213,17 +221,20 @@ for i, line in enumerate(lines):
if is_valid_author(new_author):
new_author = capitalize_author(new_author)
else:
warnings.append("Unable to find a valid name %s for author %s" % (author, temp_author))
print " * Replacing %s with %s" % (author, new_author)
# If we are in interactive mode, prompt the user whether we want to remember this new mapping
if INTERACTIVE_MODE and\
author not in known_translations and\
yesOrNoPrompt(" Add mapping %s -> %s to known translations file?" % (author, new_author)):
warnings.append(
"Unable to find a valid name %s for author %s" % (author, temp_author))
print(" * Replacing %s with %s" % (author, new_author))
# If we are in interactive mode, prompt the user whether we want to remember this new
# mapping
if INTERACTIVE_MODE and \
author not in known_translations and \
yesOrNoPrompt(
" Add mapping %s -> %s to known translations file?" % (author, new_author)):
known_translations_file.write("%s - %s\n" % (author, new_author))
known_translations_file.flush()
line = line.replace(temp_author, author)
contributions.append(line)
print "==================================================================================\n"
print("==================================================================================\n")
contributors_file.close()
known_translations_file.close()
@ -244,12 +255,13 @@ for line in contributions:
new_contributors_file.write(line)
new_contributors_file.close()
print "Translated contributors list successfully written to %s!" % new_contributors_file_name
print("Translated contributors list successfully written to %s!" % new_contributors_file_name)
# Log any warnings encountered in the process
if warnings:
print "\n========== Warnings encountered while translating the contributor list ==========="
for w in warnings: print w
print "Please manually correct these in the final contributors list at %s." % new_contributors_file_name
print "==================================================================================\n"
print("\n========== Warnings encountered while translating the contributor list ===========")
for w in warnings:
print(w)
print("Please manually correct these in the final contributors list at %s." %
new_contributors_file_name)
print("==================================================================================\n")

View file

@ -27,8 +27,8 @@ import urllib2
try:
import jira.client
except ImportError:
print "This tool requires the jira-python library"
print "Install using 'sudo pip install jira'"
print("This tool requires the jira-python library")
print("Install using 'sudo pip install jira'")
sys.exit(-1)
# User facing configs
@ -48,16 +48,19 @@ MIN_COMMENT_PR = int(os.environ.get("MIN_COMMENT_PR", "1496"))
# the state of JIRA's that are tied to PR's we've already looked at.
MAX_FILE = ".github-jira-max"
def get_url(url):
try:
return urllib2.urlopen(url)
except urllib2.HTTPError as e:
print "Unable to fetch URL, exiting: %s" % url
except urllib2.HTTPError:
print("Unable to fetch URL, exiting: %s" % url)
sys.exit(-1)
def get_json(urllib_response):
return json.load(urllib_response)
# Return a list of (JIRA id, JSON dict) tuples:
# e.g. [('SPARK-1234', {.. json ..}), ('SPARK-5687', {.. json ..})}
def get_jira_prs():
@ -65,83 +68,86 @@ def get_jira_prs():
has_next_page = True
page_num = 0
while has_next_page:
page = get_url(GITHUB_API_BASE + "/pulls?page=%s&per_page=100" % page_num)
page_json = get_json(page)
page = get_url(GITHUB_API_BASE + "/pulls?page=%s&per_page=100" % page_num)
page_json = get_json(page)
for pull in page_json:
jiras = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull['title'])
for jira in jiras:
result = result + [(jira, pull)]
for pull in page_json:
jiras = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull['title'])
for jira in jiras:
result = result + [(jira, pull)]
# Check if there is another page
link_header = filter(lambda k: k.startswith("Link"), page.info().headers)[0]
if not "next"in link_header:
has_next_page = False
else:
page_num = page_num + 1
# Check if there is another page
link_header = filter(lambda k: k.startswith("Link"), page.info().headers)[0]
if "next" not in link_header:
has_next_page = False
else:
page_num += 1
return result
def set_max_pr(max_val):
f = open(MAX_FILE, 'w')
f.write("%s" % max_val)
f.close()
print "Writing largest PR number seen: %s" % max_val
print("Writing largest PR number seen: %s" % max_val)
def get_max_pr():
if os.path.exists(MAX_FILE):
result = int(open(MAX_FILE, 'r').read())
print "Read largest PR number previously seen: %s" % result
print("Read largest PR number previously seen: %s" % result)
return result
else:
return 0
jira_client = jira.client.JIRA({'server': JIRA_API_BASE},
basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
jira_prs = get_jira_prs()
previous_max = get_max_pr()
print "Retrieved %s JIRA PR's from Github" % len(jira_prs)
print("Retrieved %s JIRA PR's from Github" % len(jira_prs))
jira_prs = [(k, v) for k, v in jira_prs if int(v['number']) > previous_max]
print "%s PR's remain after excluding visted ones" % len(jira_prs)
print("%s PR's remain after excluding visted ones" % len(jira_prs))
num_updates = 0
considered = []
for issue, pr in sorted(jira_prs, key=lambda (k, v): int(v['number'])):
for issue, pr in sorted(jira_prs, key=lambda kv: int(kv[1]['number'])):
if num_updates >= MAX_UPDATES:
break
break
pr_num = int(pr['number'])
print "Checking issue %s" % issue
print("Checking issue %s" % issue)
considered = considered + [pr_num]
url = pr['html_url']
title = "[Github] Pull Request #%s (%s)" % (pr['number'], pr['user']['login'])
title = "[Github] Pull Request #%s (%s)" % (pr['number'], pr['user']['login'])
try:
existing_links = map(lambda l: l.raw['object']['url'], jira_client.remote_links(issue))
existing_links = map(lambda l: l.raw['object']['url'], jira_client.remote_links(issue))
except:
print "Failure reading JIRA %s (does it exist?)" % issue
print sys.exc_info()[0]
continue
print("Failure reading JIRA %s (does it exist?)" % issue)
print(sys.exc_info()[0])
continue
if url in existing_links:
continue
icon = {"title": "Pull request #%s" % pr['number'],
"url16x16": "https://assets-cdn.github.com/favicon.ico"}
icon = {"title": "Pull request #%s" % pr['number'],
"url16x16": "https://assets-cdn.github.com/favicon.ico"}
destination = {"title": title, "url": url, "icon": icon}
# For all possible fields see:
# https://developer.atlassian.com/display/JIRADEV/Fields+in+Remote+Issue+Links
# application = {"name": "Github pull requests", "type": "org.apache.spark.jira.github"}
# https://developer.atlassian.com/display/JIRADEV/Fields+in+Remote+Issue+Links
# application = {"name": "Github pull requests", "type": "org.apache.spark.jira.github"}
jira_client.add_remote_link(issue, destination)
comment = "User '%s' has created a pull request for this issue:" % pr['user']['login']
comment = comment + ("\n%s" % pr['html_url'])
comment += "\n%s" % pr['html_url']
if pr_num >= MIN_COMMENT_PR:
jira_client.add_comment(issue, comment)
print "Added link %s <-> PR #%s" % (issue, pr['number'])
num_updates = num_updates + 1
print("Added link %s <-> PR #%s" % (issue, pr['number']))
num_updates += 1
if len(considered) > 0:
set_max_pr(max(considered))

View file

@ -19,10 +19,8 @@
SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
PATHS_TO_CHECK="./python/pyspark/ ./examples/src/main/python/ ./dev/sparktestsupport"
# TODO: fix pep8 errors with the rest of the Python scripts under dev
PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/run-tests.py ./python/*.py ./dev/run-tests-jenkins.py"
PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/pip-sanity-check.py"
# Exclude auto-geneated configuration file.
PATHS_TO_CHECK="$( cd "$SPARK_ROOT_DIR" && find . -name "*.py" -not -path "*python/docs/conf.py" )"
PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
PYLINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/pylint-report.txt"
PYLINT_INSTALL_INFO="$SPARK_ROOT_DIR/dev/pylint-info.txt"

View file

@ -70,22 +70,22 @@ def get_json(url):
return json.load(urllib2.urlopen(request))
except urllib2.HTTPError as e:
if "X-RateLimit-Remaining" in e.headers and e.headers["X-RateLimit-Remaining"] == '0':
print "Exceeded the GitHub API rate limit; see the instructions in " + \
"dev/merge_spark_pr.py to configure an OAuth token for making authenticated " + \
"GitHub requests."
print("Exceeded the GitHub API rate limit; see the instructions in " +
"dev/merge_spark_pr.py to configure an OAuth token for making authenticated " +
"GitHub requests.")
else:
print "Unable to fetch URL, exiting: %s" % url
print("Unable to fetch URL, exiting: %s" % url)
sys.exit(-1)
def fail(msg):
print msg
print(msg)
clean_up()
sys.exit(-1)
def run_cmd(cmd):
print cmd
print(cmd)
if isinstance(cmd, list):
return subprocess.check_output(cmd)
else:
@ -97,14 +97,15 @@ def continue_maybe(prompt):
if result.lower() != "y":
fail("Okay, exiting")
def clean_up():
print "Restoring head pointer to %s" % original_head
print("Restoring head pointer to %s" % original_head)
run_cmd("git checkout %s" % original_head)
branches = run_cmd("git branch").replace(" ", "").split("\n")
for branch in filter(lambda x: x.startswith(BRANCH_PREFIX), branches):
print "Deleting local branch %s" % branch
print("Deleting local branch %s" % branch)
run_cmd("git branch -D %s" % branch)
@ -246,9 +247,9 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
if cur_status == "Resolved" or cur_status == "Closed":
fail("JIRA issue %s already has status '%s'" % (jira_id, cur_status))
print ("=== JIRA %s ===" % jira_id)
print ("summary\t\t%s\nassignee\t%s\nstatus\t\t%s\nurl\t\t%s/%s\n" % (
cur_summary, cur_assignee, cur_status, JIRA_BASE, jira_id))
print("=== JIRA %s ===" % jira_id)
print("summary\t\t%s\nassignee\t%s\nstatus\t\t%s\nurl\t\t%s/%s\n" %
(cur_summary, cur_assignee, cur_status, JIRA_BASE, jira_id))
versions = asf_jira.project_versions("SPARK")
versions = sorted(versions, key=lambda x: x.name, reverse=True)
@ -282,10 +283,10 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
resolve = filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id))[0]
resolution = filter(lambda r: r.raw['name'] == "Fixed", asf_jira.resolutions())[0]
asf_jira.transition_issue(
jira_id, resolve["id"], fixVersions = jira_fix_versions,
comment = comment, resolution = {'id': resolution.raw['id']})
jira_id, resolve["id"], fixVersions=jira_fix_versions,
comment=comment, resolution={'id': resolution.raw['id']})
print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
print("Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions))
def resolve_jira_issues(title, merge_branches, comment):
@ -300,23 +301,29 @@ def resolve_jira_issues(title, merge_branches, comment):
def standardize_jira_ref(text):
"""
Standardize the [SPARK-XXXXX] [MODULE] prefix
Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[SPARK-XXX][MLLIB] Issue"
Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to
"[SPARK-XXX][MLLIB] Issue"
>>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
>>> standardize_jira_ref(
... "[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
'[SPARK-5821][SQL] ParquetRelation2 CTAS should check if delete is successful'
>>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
>>> standardize_jira_ref(
... "[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
'[SPARK-4123][PROJECT INFRA][WIP] Show new dependencies added in pull requests'
>>> standardize_jira_ref("[MLlib] Spark 5954: Top by key")
'[SPARK-5954][MLLIB] Top by key'
>>> standardize_jira_ref("[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl")
'[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl'
>>> standardize_jira_ref("SPARK-1094 Support MiMa for reporting binary compatibility accross versions.")
>>> standardize_jira_ref(
... "SPARK-1094 Support MiMa for reporting binary compatibility accross versions.")
'[SPARK-1094] Support MiMa for reporting binary compatibility accross versions.'
>>> standardize_jira_ref("[WIP] [SPARK-1146] Vagrant support for Spark")
'[SPARK-1146][WIP] Vagrant support for Spark'
>>> standardize_jira_ref("SPARK-1032. If Yarn app fails before registering, app master stays aroun...")
>>> standardize_jira_ref(
... "SPARK-1032. If Yarn app fails before registering, app master stays aroun...")
'[SPARK-1032] If Yarn app fails before registering, app master stays aroun...'
>>> standardize_jira_ref("[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.")
>>> standardize_jira_ref(
... "[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.")
'[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.'
>>> standardize_jira_ref("Additional information for users building from source code")
'Additional information for users building from source code'
@ -350,7 +357,8 @@ def standardize_jira_ref(text):
# Assemble full text (JIRA ref(s), module(s), remaining text)
clean_text = ''.join(jira_refs).strip() + ''.join(components).strip() + " " + text.strip()
# Replace multiple spaces with a single space, e.g. if no jira refs and/or components were included
# Replace multiple spaces with a single space, e.g. if no jira refs and/or components were
# included
clean_text = re.sub(r'\s+', ' ', clean_text.strip())
return clean_text
@ -385,17 +393,17 @@ def main():
# Decide whether to use the modified title or not
modified_title = standardize_jira_ref(pr["title"])
if modified_title != pr["title"]:
print "I've re-written the title as follows to match the standard format:"
print "Original: %s" % pr["title"]
print "Modified: %s" % modified_title
print("I've re-written the title as follows to match the standard format:")
print("Original: %s" % pr["title"])
print("Modified: %s" % modified_title)
result = raw_input("Would you like to use the modified title? (y/n): ")
if result.lower() == "y":
title = modified_title
print "Using modified title:"
print("Using modified title:")
else:
title = pr["title"]
print "Using original title:"
print title
print("Using original title:")
print(title)
else:
title = pr["title"]
@ -414,13 +422,13 @@ def main():
merge_hash = merge_commits[0]["commit_id"]
message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"]
print "Pull request %s has already been merged, assuming you want to backport" % pr_num
print("Pull request %s has already been merged, assuming you want to backport" % pr_num)
commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify',
"%s^{commit}" % merge_hash]).strip() != ""
"%s^{commit}" % merge_hash]).strip() != ""
if not commit_is_downloaded:
fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num)
print "Found commit %s:\n%s" % (merge_hash, message)
print("Found commit %s:\n%s" % (merge_hash, message))
cherry_pick(pr_num, merge_hash, latest_branch)
sys.exit(0)
@ -429,9 +437,9 @@ def main():
"Continue? (experts only!)"
continue_maybe(msg)
print ("\n=== Pull Request #%s ===" % pr_num)
print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % (
title, pr_repo_desc, target_ref, url))
print("\n=== Pull Request #%s ===" % pr_num)
print("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" %
(title, pr_repo_desc, target_ref, url))
continue_maybe("Proceed with merging pull request #%s?" % pr_num)
merged_refs = [target_ref]
@ -445,14 +453,15 @@ def main():
if JIRA_IMPORTED:
if JIRA_USERNAME and JIRA_PASSWORD:
continue_maybe("Would you like to update an associated JIRA?")
jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % \
(pr_num, GITHUB_BASE, pr_num)
resolve_jira_issues(title, merged_refs, jira_comment)
else:
print "JIRA_USERNAME and JIRA_PASSWORD not set"
print "Exiting without trying to close the associated JIRA."
print("JIRA_USERNAME and JIRA_PASSWORD not set")
print("Exiting without trying to close the associated JIRA.")
else:
print "Could not find jira-python library. Run 'sudo pip install jira' to install."
print "Exiting without trying to close the associated JIRA."
print("Could not find jira-python library. Run 'sudo pip install jira' to install.")
print("Exiting without trying to close the associated JIRA.")
if __name__ == "__main__":
import doctest

View file

@ -44,7 +44,8 @@ if __name__ == "__main__":
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
testErr = labelsAndPredictions.filter(
lambda lp: lp[0] != lp[1]).count() / float(testData.count())
print('Test Error = ' + str(testErr))
print('Learned classification tree model:')
print(model.toDebugString())

View file

@ -44,7 +44,7 @@ if __name__ == "__main__":
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\
testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
float(testData.count())
print('Test Mean Squared Error = ' + str(testMSE))
print('Learned regression tree model:')

View file

@ -43,7 +43,8 @@ if __name__ == "__main__":
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
testErr = labelsAndPredictions.filter(
lambda lp: lp[0] != lp[1]).count() / float(testData.count())
print('Test Error = ' + str(testErr))
print('Learned classification GBT model:')
print(model.toDebugString())

View file

@ -43,7 +43,7 @@ if __name__ == "__main__":
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\
testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
float(testData.count())
print('Test Mean Squared Error = ' + str(testMSE))
print('Learned regression GBT model:')

View file

@ -44,7 +44,7 @@ if __name__ == "__main__":
# Evaluate the model on training data
valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
MSE = valuesAndPreds \
.map(lambda (v, p): (v - p)**2) \
.map(lambda vp: (vp[0] - vp[1])**2) \
.reduce(lambda x, y: x + y) / valuesAndPreds.count()
print("Mean Squared Error = " + str(MSE))

View file

@ -44,7 +44,7 @@ if __name__ == "__main__":
# Evaluating the model on training data
labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(parsedData.count())
print("Training Error = " + str(trainErr))
# Save and load model

View file

@ -50,7 +50,7 @@ if __name__ == "__main__":
# Make prediction and test accuracy.
predictionAndLabel = test.map(lambda p: (model.predict(p.features), p.label))
accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
accuracy = 1.0 * predictionAndLabel.filter(lambda pl: pl[0] == pl[1]).count() / test.count()
print('model accuracy {}'.format(accuracy))
# Save and load model
@ -59,7 +59,7 @@ if __name__ == "__main__":
model.save(sc, output_dir)
sameModel = NaiveBayesModel.load(sc, output_dir)
predictionAndLabel = test.map(lambda p: (sameModel.predict(p.features), p.label))
accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
accuracy = 1.0 * predictionAndLabel.filter(lambda pl: pl[0] == pl[1]).count() / test.count()
print('sameModel accuracy {}'.format(accuracy))
# $example off$

View file

@ -45,7 +45,8 @@ if __name__ == "__main__":
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
testErr = labelsAndPredictions.filter(
lambda lp: lp[0] != lp[1]).count() / float(testData.count())
print('Test Error = ' + str(testErr))
print('Learned classification forest model:')
print(model.toDebugString())

View file

@ -45,7 +45,7 @@ if __name__ == "__main__":
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\
testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
float(testData.count())
print('Test Mean Squared Error = ' + str(testMSE))
print('Learned regression forest model:')

View file

@ -38,7 +38,7 @@ if __name__ == "__main__":
# Evaluating the model on training data
labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(parsedData.count())
print("Training Error = " + str(trainErr))
# Save and load model

View file

@ -67,8 +67,8 @@ if __name__ == "__main__":
# with the static RDD inside the transform() method and then multiplying
# the frequency of the words by its sentiment value
happiest_words = word_counts.transform(lambda rdd: word_sentiments.join(rdd)) \
.map(lambda (word, tuple): (word, float(tuple[0]) * tuple[1])) \
.map(lambda (word, happiness): (happiness, word)) \
.map(lambda word_tuples: (word_tuples[0], float(word_tuples[1][0]) * word_tuples[1][1])) \
.map(lambda word_happiness: (word_happiness[1], word_happiness[0])) \
.transform(lambda rdd: rdd.sortByKey(False))
happiest_words.foreachRDD(print_happiest_words)

View file

@ -9,6 +9,7 @@ RULES = (
('pyspark.rdd.RDD', 'RDD'),
)
def _convert_epytext(line):
"""
>>> _convert_epytext("L{A}")
@ -19,9 +20,11 @@ def _convert_epytext(line):
line = re.sub(p, sub, line)
return line
def _process_docstring(app, what, name, obj, options, lines):
for i in range(len(lines)):
lines[i] = _convert_epytext(lines[i])
def setup(app):
app.connect("autodoc-process-docstring", _process_docstring)

View file

@ -16,14 +16,14 @@
# specific language governing permissions and limitations
# under the License.
#
import sys, re
import datetime
from __future__ import print_function
import sys
import os
table_name=None
if os.environ.has_key('hive_streaming_tablename'):
table_name=os.environ['hive_streaming_tablename']
table_name = None
if os.environ in 'hive_streaming_tablename':
table_name = os.environ['hive_streaming_tablename']
for line in sys.stdin:
print line
print >> sys.stderr, "dummy"
print(line)
print("dummy", file=sys.stderr)

View file

@ -19,6 +19,6 @@
import sys
for line in sys.stdin:
print line
print(line)
sys.exit(1)

View file

@ -19,6 +19,5 @@
import sys
for line in sys.stdin:
print "1\\\\\\t2"
print "1\\\\\\\\t2"
print("1\\\\\\t2")
print("1\\\\\\\\t2")

View file

@ -19,9 +19,9 @@
import sys
for i in xrange(50):
for j in xrange(5):
for k in xrange(20022):
print 20000 * i + k
for j in xrange(5):
for k in xrange(20022):
print(20000 * i + k)
for line in sys.stdin:
pass
pass

View file

@ -19,5 +19,4 @@
import sys
for line in sys.stdin:
print "1\\\\r2"
print("1\\\\r2")

View file

@ -19,5 +19,4 @@
import sys
for line in sys.stdin:
print "1\\\\n2"
print("1\\\\n2")

View file

@ -19,5 +19,4 @@
import sys
for line in sys.stdin:
print "1\\\\t2"
print("1\\\\t2")

View file

@ -21,10 +21,10 @@ import re
line = sys.stdin.readline()
x = 1
while line:
tem = sys.stdin.readline()
if line == tem:
x = x + 1
else:
print str(x).strip()+'\t'+re.sub('\t','_',line.strip())
line = tem
x = 1
tem = sys.stdin.readline()
if line == tem:
x += 1
else:
print(str(x).strip()+'\t'+re.sub('\t', '_', line.strip()))
line = tem
x = 1

View file

@ -19,6 +19,6 @@
import sys
for line in sys.stdin:
print "1\\n2"
print "1\\r2"
print "1\\t2"
print("1\\n2")
print("1\\r2")
print("1\\t2")