[SPARK-8116][PYSPARK] Allow sc.range() to take a single argument.

Author: Ted Blackman <ted.blackman@gmail.com> Closes #6656 from belisarius222/branch-1.4 and squashes the following commits: 747cbc2 [Ted Blackman] [SPARK-8116][PYSPARK] Allow sc.range() to take a single argument. (cherry picked from commit f02af7c8f7) Signed-off-by: Reynold Xin <rxin@databricks.com>
2015-06-04 22:21:11 -07:00 · 2015-06-04 22:21:11 -07:00 · e505460599
parent 8f16b94afb
commit e505460599
1 changed files with 12 additions and 2 deletions
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@ -324,10 +324,12 @@ class SparkContext(object):
        with SparkContext._lock:
            SparkContext._active_spark_context = None

-    def range(self, start, end, step=1, numSlices=None):
+    def range(self, start, end=None, step=1, numSlices=None):
        """
        Create a new RDD of int containing elements from `start` to `end`
-        (exclusive), increased by `step` every element.
+        (exclusive), increased by `step` every element. Can be called the same
+        way as python's built-in range() function. If called with a single argument,
+        the argument is interpreted as `end`, and `start` is set to 0.

        :param start: the start value
        :param end: the end value (exclusive)
@ -335,9 +337,17 @@ class SparkContext(object):
        :param numSlices: the number of partitions of the new RDD
        :return: An RDD of int

+        >>> sc.range(5).collect()
+        [0, 1, 2, 3, 4]
+        >>> sc.range(2, 4).collect()
+        [2, 3]
        >>> sc.range(1, 7, 2).collect()
        [1, 3, 5]
        """
+        if end is None:
+            end = start
+            start = 0
+
        return self.parallelize(xrange(start, end, step), numSlices)

    def parallelize(self, c, numSlices=None):