[SPARK-14873][CORE] Java sampleByKey methods take ju.Map but with Scala Double values; results in type Object

## What changes were proposed in this pull request?

Java `sampleByKey` methods should accept `Map` with `java.lang.Double` values

## How was this patch tested?

Existing (updated) Jenkins tests

Author: Sean Owen <sowen@cloudera.com>

Closes #12637 from srowen/SPARK-14873.
This commit is contained in:
Sean Owen 2016-04-23 10:47:50 -07:00 committed by Reynold Xin
parent a55fbe2a16
commit be0d5d3bbe
3 changed files with 23 additions and 21 deletions

View file

@ -139,9 +139,12 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* math.ceil(numItems * samplingRate) over all key values.
*/
def sampleByKey(withReplacement: Boolean,
fractions: java.util.Map[K, Double],
fractions: java.util.Map[K, jl.Double],
seed: Long): JavaPairRDD[K, V] =
new JavaPairRDD[K, V](rdd.sampleByKey(withReplacement, fractions.asScala, seed))
new JavaPairRDD[K, V](rdd.sampleByKey(
withReplacement,
fractions.asScala.mapValues(_.toDouble).toMap, // map to Scala Double; toMap to serialize
seed))
/**
* Return a subset of this RDD sampled by key (via stratified sampling).
@ -154,7 +157,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* Use Utils.random.nextLong as the default seed for the random number generator.
*/
def sampleByKey(withReplacement: Boolean,
fractions: java.util.Map[K, Double]): JavaPairRDD[K, V] =
fractions: java.util.Map[K, jl.Double]): JavaPairRDD[K, V] =
sampleByKey(withReplacement, fractions, Utils.random.nextLong)
/**
@ -168,9 +171,12 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* two additional passes.
*/
def sampleByKeyExact(withReplacement: Boolean,
fractions: java.util.Map[K, Double],
fractions: java.util.Map[K, jl.Double],
seed: Long): JavaPairRDD[K, V] =
new JavaPairRDD[K, V](rdd.sampleByKeyExact(withReplacement, fractions.asScala, seed))
new JavaPairRDD[K, V](rdd.sampleByKeyExact(
withReplacement,
fractions.asScala.mapValues(_.toDouble).toMap, // map to Scala Double; toMap to serialize
seed))
/**
* Return a subset of this RDD sampled by key (via stratified sampling) containing exactly
@ -186,7 +192,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
*/
def sampleByKeyExact(
withReplacement: Boolean,
fractions: java.util.Map[K, Double]): JavaPairRDD[K, V] =
fractions: java.util.Map[K, jl.Double]): JavaPairRDD[K, V] =
sampleByKeyExact(withReplacement, fractions, Utils.random.nextLong)
/**

View file

@ -44,7 +44,6 @@ import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.base.Throwables;
import com.google.common.io.Files;
import org.apache.hadoop.io.IntWritable;
@ -1644,7 +1643,7 @@ public class JavaAPISuite implements Serializable {
return new Tuple2<>(i % 2, 1);
}
});
Map<Integer, Object> fractions = Maps.newHashMap();
Map<Integer, Double> fractions = new HashMap<>();
fractions.put(0, 0.5);
fractions.put(1, 1.0);
JavaPairRDD<Integer, Integer> wr = rdd2.sampleByKey(true, fractions, 1L);
@ -1670,7 +1669,7 @@ public class JavaAPISuite implements Serializable {
return new Tuple2<>(i % 2, 1);
}
});
Map<Integer, Object> fractions = Maps.newHashMap();
Map<Integer, Double> fractions = new HashMap<>();
fractions.put(0, 0.5);
fractions.put(1, 1.0);
JavaPairRDD<Integer, Integer> wrExact = rdd2.sampleByKeyExact(true, fractions, 1L);

View file

@ -37,22 +37,19 @@ public class JavaStratifiedSamplingExample {
@SuppressWarnings("unchecked")
// $example on$
List<Tuple2<Integer, Character>> list = new ArrayList<>(
Arrays.<Tuple2<Integer, Character>>asList(
new Tuple2(1, 'a'),
new Tuple2(1, 'b'),
new Tuple2(2, 'c'),
new Tuple2(2, 'd'),
new Tuple2(2, 'e'),
new Tuple2(3, 'f')
)
List<Tuple2<Integer, Character>> list = Arrays.asList(
new Tuple2<>(1, 'a'),
new Tuple2<>(1, 'b'),
new Tuple2<>(2, 'c'),
new Tuple2<>(2, 'd'),
new Tuple2<>(2, 'e'),
new Tuple2<>(3, 'f')
);
JavaPairRDD<Integer, Character> data = jsc.parallelizePairs(list);
// specify the exact fraction desired from each key Map<K, Object>
ImmutableMap<Integer, Object> fractions =
ImmutableMap.of(1, (Object)0.1, 2, (Object) 0.6, 3, (Object) 0.3);
// specify the exact fraction desired from each key Map<K, Double>
ImmutableMap<Integer, Double> fractions = ImmutableMap.of(1, 0.1, 2, 0.6, 3, 0.3);
// Get an approximate sample from each stratum
JavaPairRDD<Integer, Character> approxSample = data.sampleByKey(false, fractions);