Made DFS shuffle's "reduce tasks" fetch inputs in a random order so they
don't all hit the same nodes at the same time.
This commit is contained in:
parent
820dac5afe
commit
44530c310b
|
@ -80,8 +80,8 @@ extends Logging
|
||||||
}
|
}
|
||||||
val fs = DfsShuffle.getFileSystem()
|
val fs = DfsShuffle.getFileSystem()
|
||||||
val outputStreams = (0 until numOutputSplits).map(i => {
|
val outputStreams = (0 until numOutputSplits).map(i => {
|
||||||
val path = new Path(dir, "intermediate-%d-%d".format(myIndex, i))
|
val path = new Path(dir, "%d-to-%d".format(myIndex, i))
|
||||||
new ObjectOutputStream(fs.create(path, 1.toShort))
|
new ObjectOutputStream(fs.create(path, true))
|
||||||
}).toArray
|
}).toArray
|
||||||
for ((k, c) <- combiners) {
|
for ((k, c) <- combiners) {
|
||||||
val bucket = k.hashCode % numOutputSplits
|
val bucket = k.hashCode % numOutputSplits
|
||||||
|
@ -96,8 +96,8 @@ extends Logging
|
||||||
override def default(key: K) = createCombiner()
|
override def default(key: K) = createCombiner()
|
||||||
}
|
}
|
||||||
val fs = DfsShuffle.getFileSystem()
|
val fs = DfsShuffle.getFileSystem()
|
||||||
for (i <- 0 until numInputSplits) {
|
for (i <- Utils.shuffle(0 until numInputSplits)) {
|
||||||
val path = new Path(dir, "intermediate-%d-%d".format(i, myIndex))
|
val path = new Path(dir, "%d-to-%d".format(i, myIndex))
|
||||||
val inputStream = new ObjectInputStream(fs.open(path))
|
val inputStream = new ObjectInputStream(fs.open(path))
|
||||||
try {
|
try {
|
||||||
while (true) {
|
while (true) {
|
||||||
|
|
|
@ -4,13 +4,14 @@ import java.io._
|
||||||
import java.util.UUID
|
import java.util.UUID
|
||||||
|
|
||||||
import scala.collection.mutable.ArrayBuffer
|
import scala.collection.mutable.ArrayBuffer
|
||||||
|
import scala.util.Random
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Various utility methods used by Spark.
|
* Various utility methods used by Spark.
|
||||||
*/
|
*/
|
||||||
object Utils {
|
object Utils {
|
||||||
def serialize[T](o: T): Array[Byte] = {
|
def serialize[T](o: T): Array[Byte] = {
|
||||||
val bos = new ByteArrayOutputStream
|
val bos = new ByteArrayOutputStream()
|
||||||
val oos = new ObjectOutputStream(bos)
|
val oos = new ObjectOutputStream(bos)
|
||||||
oos.writeObject(o)
|
oos.writeObject(o)
|
||||||
oos.close
|
oos.close
|
||||||
|
@ -95,4 +96,19 @@ object Utils {
|
||||||
out.close()
|
out.close()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Shuffle the elements of a collection into a random order, returning the
|
||||||
|
// result in a new collection. Unlike scala.util.Random.shuffle, this method
|
||||||
|
// uses a local random number generator, avoiding inter-thread contention.
|
||||||
|
def shuffle[T](seq: Seq[T]): Seq[T] = {
|
||||||
|
val buf = ArrayBuffer(seq: _*)
|
||||||
|
val rand = new Random()
|
||||||
|
for (i <- (buf.size - 1) to 1 by -1) {
|
||||||
|
val j = rand.nextInt(i)
|
||||||
|
val tmp = buf(j)
|
||||||
|
buf(j) = buf(i)
|
||||||
|
buf(i) = tmp
|
||||||
|
}
|
||||||
|
buf
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue