From da303526e54e9a0adfedb49417f383cde7870a69 Mon Sep 17 00:00:00 2001 From: Mridul Muralidharan Date: Sat, 2 May 2015 23:05:51 +0100 Subject: [PATCH] [SPARK-7323] [SPARK CORE] Use insertAll instead of insert while merging combiners in reducer Author: Mridul Muralidharan Closes #5862 from mridulm/optimize_aggregator and squashes the following commits: 61cf43a [Mridul Muralidharan] Use insertAll instead of insert - much more expensive to do it per tuple --- core/src/main/scala/org/apache/spark/Aggregator.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/Aggregator.scala b/core/src/main/scala/org/apache/spark/Aggregator.scala index 3b684bbece..af9765d313 100644 --- a/core/src/main/scala/org/apache/spark/Aggregator.scala +++ b/core/src/main/scala/org/apache/spark/Aggregator.scala @@ -88,10 +88,7 @@ case class Aggregator[K, V, C] ( combiners.iterator } else { val combiners = new ExternalAppendOnlyMap[K, C, C](identity, mergeCombiners, mergeCombiners) - while (iter.hasNext) { - val pair = iter.next() - combiners.insert(pair._1, pair._2) - } + combiners.insertAll(iter) // Update task metrics if context is not null // TODO: Make context non-optional in a future release Option(context).foreach { c =>