[SPARK-8116][PYSPARK] Allow sc.range() to take a single argument.

Author: Ted Blackman <ted.blackman@gmail.com>

Closes #6656 from belisarius222/branch-1.4 and squashes the following commits:

747cbc2 [Ted Blackman] [SPARK-8116][PYSPARK] Allow sc.range() to take a single argument.

(cherry picked from commit f02af7c8f7)
Signed-off-by: Reynold Xin <rxin@databricks.com>
This commit is contained in:
Ted Blackman 2015-06-04 22:21:11 -07:00 committed by Reynold Xin
parent 8f16b94afb
commit e505460599

View file

@ -324,10 +324,12 @@ class SparkContext(object):
with SparkContext._lock:
SparkContext._active_spark_context = None
def range(self, start, end, step=1, numSlices=None):
def range(self, start, end=None, step=1, numSlices=None):
"""
Create a new RDD of int containing elements from `start` to `end`
(exclusive), increased by `step` every element.
(exclusive), increased by `step` every element. Can be called the same
way as python's built-in range() function. If called with a single argument,
the argument is interpreted as `end`, and `start` is set to 0.
:param start: the start value
:param end: the end value (exclusive)
@ -335,9 +337,17 @@ class SparkContext(object):
:param numSlices: the number of partitions of the new RDD
:return: An RDD of int
>>> sc.range(5).collect()
[0, 1, 2, 3, 4]
>>> sc.range(2, 4).collect()
[2, 3]
>>> sc.range(1, 7, 2).collect()
[1, 3, 5]
"""
if end is None:
end = start
start = 0
return self.parallelize(xrange(start, end, step), numSlices)
def parallelize(self, c, numSlices=None):