[SPARK-2887] fix bug of countApproxDistinct() when have more than one partition
fix bug of countApproxDistinct() when have more than one partition Author: Davies Liu <davies.liu@gmail.com> Closes #1812 from davies/approx and squashes the following commits: bf757ce [Davies Liu] fix bug of countApproxDistinct() when have more than one partition
This commit is contained in:
parent
a263a7e9f0
commit
ffd1f59a62
|
@ -1004,7 +1004,7 @@ abstract class RDD[T: ClassTag](
|
||||||
},
|
},
|
||||||
(h1: HyperLogLogPlus, h2: HyperLogLogPlus) => {
|
(h1: HyperLogLogPlus, h2: HyperLogLogPlus) => {
|
||||||
h1.addAll(h2)
|
h1.addAll(h2)
|
||||||
h2
|
h1
|
||||||
}).cardinality()
|
}).cardinality()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -81,11 +81,11 @@ class RDDSuite extends FunSuite with SharedSparkContext {
|
||||||
|
|
||||||
def error(est: Long, size: Long) = math.abs(est - size) / size.toDouble
|
def error(est: Long, size: Long) = math.abs(est - size) / size.toDouble
|
||||||
|
|
||||||
val size = 100
|
val size = 1000
|
||||||
val uniformDistro = for (i <- 1 to 100000) yield i % size
|
val uniformDistro = for (i <- 1 to 5000) yield i % size
|
||||||
val simpleRdd = sc.makeRDD(uniformDistro)
|
val simpleRdd = sc.makeRDD(uniformDistro, 10)
|
||||||
assert(error(simpleRdd.countApproxDistinct(4, 0), size) < 0.4)
|
assert(error(simpleRdd.countApproxDistinct(8, 0), size) < 0.2)
|
||||||
assert(error(simpleRdd.countApproxDistinct(8, 0), size) < 0.1)
|
assert(error(simpleRdd.countApproxDistinct(12, 0), size) < 0.1)
|
||||||
}
|
}
|
||||||
|
|
||||||
test("SparkContext.union") {
|
test("SparkContext.union") {
|
||||||
|
|
Loading…
Reference in a new issue