[SPARK-2061] Made splits deprecated in JavaRDDLike

The jira for the issue can be found at: https://issues.apache.org/jira/browse/SPARK-2061
Most of spark has used over to consistently using `partitions` instead of `splits`. We should do likewise and add a `partitions` method to JavaRDDLike and have `splits` just call that. We should also go through all cases where other API's (e.g. Python) call `splits` and we should change those to use the newer API.

Author: Anant <anant.asty@gmail.com>

Closes #1062 from anantasty/SPARK-2061 and squashes the following commits:

b83ce6b [Anant] Fixed syntax issue
21f9210 [Anant] Fixed version number in deprecation string
9315b76 [Anant] made related changes to use partitions in python api
8c62dd1 [Anant] Made splits deprecated in JavaRDDLike
This commit is contained in:
Anant 2014-06-20 18:54:00 -07:00 committed by Patrick Wendell
parent a678642495
commit 010c460d62
4 changed files with 8 additions and 5 deletions

View file

@ -43,8 +43,11 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
def rdd: RDD[T]
/** Set of partitions in this RDD. */
@deprecated("Use partitions() instead.", "1.1.0")
def splits: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq)
/** Set of partitions in this RDD. */
def partitions: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq)
/** The [[org.apache.spark.SparkContext]] that this RDD was created on. */
def context: SparkContext = rdd.context

View file

@ -741,7 +741,7 @@ public class JavaAPISuite implements Serializable {
public void iterator() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 2);
TaskContext context = new TaskContext(0, 0, 0, false, new TaskMetrics());
Assert.assertEquals(1, rdd.iterator(rdd.splits().get(0), context).next().intValue());
Assert.assertEquals(1, rdd.iterator(rdd.partitions().get(0), context).next().intValue());
}
@Test

View file

@ -704,7 +704,7 @@ class SparkContext(object):
[0, 1, 16, 25]
"""
if partitions == None:
partitions = range(rdd._jrdd.splits().size())
partitions = range(rdd._jrdd.partitions().size())
javaPartitions = ListConverter().convert(partitions, self._gateway._gateway_client)
# Implementation note: This is implemented as a mapPartitions followed

View file

@ -321,7 +321,7 @@ class RDD(object):
>>> rdd.getNumPartitions()
2
"""
return self._jrdd.splits().size()
return self._jrdd.partitions().size()
def filter(self, f):
"""
@ -922,7 +922,7 @@ class RDD(object):
[91, 92, 93]
"""
items = []
totalParts = self._jrdd.splits().size()
totalParts = self._jrdd.partitions().size()
partsScanned = 0
while len(items) < num and partsScanned < totalParts: