Merge pull request #365 from ryanlecompte/rate_limiter_timing_cleanup

Improve sleeping algorithm for rate limiting output streams
This commit is contained in:
Matei Zaharia 2013-01-13 10:47:00 -08:00
commit 530493b0e8
2 changed files with 58 additions and 29 deletions

View file

@ -1,9 +1,14 @@
package spark.util package spark.util
import scala.annotation.tailrec
import java.io.OutputStream import java.io.OutputStream
import java.util.concurrent.TimeUnit._
class RateLimitedOutputStream(out: OutputStream, bytesPerSec: Int) extends OutputStream { class RateLimitedOutputStream(out: OutputStream, bytesPerSec: Int) extends OutputStream {
var lastSyncTime = System.nanoTime() val SYNC_INTERVAL = NANOSECONDS.convert(10, SECONDS)
val CHUNK_SIZE = 8192
var lastSyncTime = System.nanoTime
var bytesWrittenSinceSync: Long = 0 var bytesWrittenSinceSync: Long = 0
override def write(b: Int) { override def write(b: Int) {
@ -15,34 +20,13 @@ class RateLimitedOutputStream(out: OutputStream, bytesPerSec: Int) extends Outpu
write(bytes, 0, bytes.length) write(bytes, 0, bytes.length)
} }
override def write(bytes: Array[Byte], offset: Int, length: Int) { @tailrec
val CHUNK_SIZE = 8192 override final def write(bytes: Array[Byte], offset: Int, length: Int) {
var pos = 0 val writeSize = math.min(length - offset, CHUNK_SIZE)
while (pos < length) { if (writeSize > 0) {
val writeSize = math.min(length - pos, CHUNK_SIZE)
waitToWrite(writeSize) waitToWrite(writeSize)
out.write(bytes, offset + pos, writeSize) out.write(bytes, offset, writeSize)
pos += writeSize write(bytes, offset + writeSize, length)
}
}
def waitToWrite(numBytes: Int) {
while (true) {
val now = System.nanoTime()
val elapsed = math.max(now - lastSyncTime, 1)
val rate = bytesWrittenSinceSync.toDouble / (elapsed / 1.0e9)
if (rate < bytesPerSec) {
// It's okay to write; just update some variables and return
bytesWrittenSinceSync += numBytes
if (now > lastSyncTime + (1e10).toLong) {
// Ten seconds have passed since lastSyncTime; let's resync
lastSyncTime = now
bytesWrittenSinceSync = numBytes
}
return
} else {
Thread.sleep(5)
}
} }
} }
@ -53,4 +37,26 @@ class RateLimitedOutputStream(out: OutputStream, bytesPerSec: Int) extends Outpu
override def close() { override def close() {
out.close() out.close()
} }
}
@tailrec
private def waitToWrite(numBytes: Int) {
val now = System.nanoTime
val elapsedSecs = SECONDS.convert(math.max(now - lastSyncTime, 1), NANOSECONDS)
val rate = bytesWrittenSinceSync.toDouble / elapsedSecs
if (rate < bytesPerSec) {
// It's okay to write; just update some variables and return
bytesWrittenSinceSync += numBytes
if (now > lastSyncTime + SYNC_INTERVAL) {
// Sync interval has passed; let's resync
lastSyncTime = now
bytesWrittenSinceSync = numBytes
}
} else {
// Calculate how much time we should sleep to bring ourselves to the desired rate.
// Based on throttler in Kafka (https://github.com/kafka-dev/kafka/blob/master/core/src/main/scala/kafka/utils/Throttler.scala)
val sleepTime = MILLISECONDS.convert((bytesWrittenSinceSync / bytesPerSec - elapsedSecs), SECONDS)
if (sleepTime > 0) Thread.sleep(sleepTime)
waitToWrite(numBytes)
}
}
}

View file

@ -0,0 +1,23 @@
package spark.util
import org.scalatest.FunSuite
import java.io.ByteArrayOutputStream
import java.util.concurrent.TimeUnit._
class RateLimitedOutputStreamSuite extends FunSuite {
private def benchmark[U](f: => U): Long = {
val start = System.nanoTime
f
System.nanoTime - start
}
test("write") {
val underlying = new ByteArrayOutputStream
val data = "X" * 41000
val stream = new RateLimitedOutputStream(underlying, 10000)
val elapsedNs = benchmark { stream.write(data.getBytes("UTF-8")) }
assert(SECONDS.convert(elapsedNs, NANOSECONDS) == 4)
assert(underlying.toString("UTF-8") == data)
}
}