[SPARK-34387][CORE][TESTS] Add ZStandardBenchmark
### What changes were proposed in this pull request? This PR aims to add ZStandardBenchmark as a base-line. ### Why are the changes needed? This will prevent any regression when we upgrade Zstandard library in the future. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually. Closes #31498 from dongjoon-hyun/SPARK-ZSTD-BENCH. Authored-by: Dongjoon Hyun <dhyun@apple.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
178dc50b7a
commit
466c045bfa
27
core/benchmarks/ZStandardBenchmark-jdk11-results.txt
Normal file
27
core/benchmarks/ZStandardBenchmark-jdk11-results.txt
Normal file
|
@ -0,0 +1,27 @@
|
|||
================================================================================================
|
||||
Benchmark ZStandardCompressionCodec
|
||||
================================================================================================
|
||||
|
||||
OpenJDK 64-Bit Server VM 11.0.9.1+1-Ubuntu-0ubuntu1.18.04 on Linux 4.15.0-1044-aws
|
||||
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
|
||||
Benchmark ZStandardCompressionCodec: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
|
||||
--------------------------------------------------------------------------------------------------------------------------------------
|
||||
Compression 10000 times at level 1 without buffer pool 1175 1307 187 0.0 117459.2 1.0X
|
||||
Compression 10000 times at level 2 without buffer pool 979 1020 58 0.0 97889.6 1.2X
|
||||
Compression 10000 times at level 3 without buffer pool 1241 1282 58 0.0 124101.1 0.9X
|
||||
Compression 10000 times at level 1 with buffer pool 466 476 6 0.0 46593.2 2.5X
|
||||
Compression 10000 times at level 2 with buffer pool 544 554 6 0.0 54421.3 2.2X
|
||||
Compression 10000 times at level 3 with buffer pool 795 804 8 0.0 79453.9 1.5X
|
||||
|
||||
OpenJDK 64-Bit Server VM 11.0.9.1+1-Ubuntu-0ubuntu1.18.04 on Linux 4.15.0-1044-aws
|
||||
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
|
||||
Benchmark ZStandardCompressionCodec: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
|
||||
------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Decompression 10000 times from level 1 without buffer pool 1033 1069 52 0.0 103254.9 1.0X
|
||||
Decompression 10000 times from level 2 without buffer pool 1033 1070 53 0.0 103262.6 1.0X
|
||||
Decompression 10000 times from level 3 without buffer pool 1031 1076 64 0.0 103104.7 1.0X
|
||||
Decompression 10000 times from level 1 with buffer pool 603 609 5 0.0 60285.0 1.7X
|
||||
Decompression 10000 times from level 2 with buffer pool 602 607 4 0.0 60156.6 1.7X
|
||||
Decompression 10000 times from level 3 with buffer pool 608 613 4 0.0 60767.3 1.7X
|
||||
|
||||
|
27
core/benchmarks/ZStandardBenchmark-results.txt
Normal file
27
core/benchmarks/ZStandardBenchmark-results.txt
Normal file
|
@ -0,0 +1,27 @@
|
|||
================================================================================================
|
||||
Benchmark ZStandardCompressionCodec
|
||||
================================================================================================
|
||||
|
||||
OpenJDK 64-Bit Server VM 1.8.0_275-8u275-b01-0ubuntu1~18.04-b01 on Linux 4.15.0-1044-aws
|
||||
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
|
||||
Benchmark ZStandardCompressionCodec: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
|
||||
--------------------------------------------------------------------------------------------------------------------------------------
|
||||
Compression 10000 times at level 1 without buffer pool 1414 1435 30 0.0 141387.0 1.0X
|
||||
Compression 10000 times at level 2 without buffer pool 978 1017 55 0.0 97829.9 1.4X
|
||||
Compression 10000 times at level 3 without buffer pool 1229 1272 61 0.0 122918.0 1.2X
|
||||
Compression 10000 times at level 1 with buffer pool 443 453 6 0.0 44315.4 3.2X
|
||||
Compression 10000 times at level 2 with buffer pool 532 543 7 0.0 53229.1 2.7X
|
||||
Compression 10000 times at level 3 with buffer pool 783 790 7 0.0 78263.5 1.8X
|
||||
|
||||
OpenJDK 64-Bit Server VM 1.8.0_275-8u275-b01-0ubuntu1~18.04-b01 on Linux 4.15.0-1044-aws
|
||||
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
|
||||
Benchmark ZStandardCompressionCodec: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
|
||||
------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Decompression 10000 times from level 1 without buffer pool 1047 1084 53 0.0 104669.3 1.0X
|
||||
Decompression 10000 times from level 2 without buffer pool 1050 1089 55 0.0 105023.0 1.0X
|
||||
Decompression 10000 times from level 3 without buffer pool 1054 1101 66 0.0 105398.5 1.0X
|
||||
Decompression 10000 times from level 1 with buffer pool 608 613 4 0.0 60752.1 1.7X
|
||||
Decompression 10000 times from level 2 with buffer pool 607 612 4 0.0 60660.4 1.7X
|
||||
Decompression 10000 times from level 3 with buffer pool 607 612 3 0.0 60746.5 1.7X
|
||||
|
||||
|
104
core/src/test/scala/org/apache/spark/io/ZStandardBenchmark.scala
Normal file
104
core/src/test/scala/org/apache/spark/io/ZStandardBenchmark.scala
Normal file
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.io
|
||||
|
||||
import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
|
||||
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
|
||||
import org.apache.spark.internal.config.{IO_COMPRESSION_ZSTD_BUFFERPOOL_ENABLED, IO_COMPRESSION_ZSTD_BUFFERSIZE, IO_COMPRESSION_ZSTD_LEVEL}
|
||||
|
||||
|
||||
/**
|
||||
* Benchmark for ZStandard codec performance.
|
||||
* {{{
|
||||
* To run this benchmark:
|
||||
* 1. without sbt: bin/spark-submit --class <this class> --jars <core test jar>
|
||||
* 2. build/sbt "core/test:runMain <this class>"
|
||||
* 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
|
||||
* Results will be written to "benchmarks/ZStandardBenchmark-results.txt".
|
||||
* }}}
|
||||
*/
|
||||
object ZStandardBenchmark extends BenchmarkBase {
|
||||
|
||||
val N = 10000
|
||||
val numInteger = IO_COMPRESSION_ZSTD_BUFFERSIZE.defaultValue.get.toInt / 4
|
||||
|
||||
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
|
||||
val name = "Benchmark ZStandardCompressionCodec"
|
||||
runBenchmark(name) {
|
||||
val benchmark1 = new Benchmark(name, N, output = output)
|
||||
compressionBenchmark(benchmark1, N)
|
||||
benchmark1.run()
|
||||
|
||||
val benchmark2 = new Benchmark(name, N, output = output)
|
||||
decompressionBenchmark(benchmark2, N)
|
||||
benchmark2.run()
|
||||
}
|
||||
}
|
||||
|
||||
private def compressionBenchmark(benchmark: Benchmark, N: Int): Unit = {
|
||||
Seq(false, true).foreach { enablePool =>
|
||||
Seq(1, 2, 3).foreach { level =>
|
||||
val conf = new SparkConf(false)
|
||||
.set(IO_COMPRESSION_ZSTD_BUFFERPOOL_ENABLED, enablePool)
|
||||
.set(IO_COMPRESSION_ZSTD_LEVEL, level)
|
||||
val condition = if (enablePool) "with" else "without"
|
||||
benchmark.addCase(s"Compression $N times at level $level $condition buffer pool") { _ =>
|
||||
(1 until N).foreach { _ =>
|
||||
val os = new ZStdCompressionCodec(conf)
|
||||
.compressedOutputStream(new ByteArrayOutputStream())
|
||||
for (i <- 1 until numInteger) {
|
||||
os.write(i)
|
||||
}
|
||||
os.close()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def decompressionBenchmark(benchmark: Benchmark, N: Int): Unit = {
|
||||
Seq(false, true).foreach { enablePool =>
|
||||
Seq(1, 2, 3).foreach { level =>
|
||||
val conf = new SparkConf(false)
|
||||
.set(IO_COMPRESSION_ZSTD_BUFFERPOOL_ENABLED, enablePool)
|
||||
.set(IO_COMPRESSION_ZSTD_LEVEL, level)
|
||||
val outputStream = new ByteArrayOutputStream()
|
||||
val out = new ZStdCompressionCodec(conf).compressedOutputStream(outputStream)
|
||||
for (i <- 1 until numInteger) {
|
||||
out.write(i)
|
||||
}
|
||||
out.close()
|
||||
val bytes = outputStream.toByteArray
|
||||
|
||||
val condition = if (enablePool) "with" else "without"
|
||||
benchmark.addCase(s"Decompression $N times from level $level $condition buffer pool") { _ =>
|
||||
(1 until N).foreach { _ =>
|
||||
val bais = new ByteArrayInputStream(bytes)
|
||||
val is = new ZStdCompressionCodec(conf).compressedInputStream(bais)
|
||||
for (i <- 1 until numInteger) {
|
||||
is.read()
|
||||
}
|
||||
is.close()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue