[SPARK-29533][SQL][TEST] Benchmark casting strings to intervals
### What changes were proposed in this pull request?
Added new benchmark `IntervalBenchmark` to measure performance of interval related functions. In the PR, I added benchmarks for casting strings to interval. In particular, interval strings with `interval` prefix and without it because there is special code for this da576a737c/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java (L100-L103)
. And also I added benchmarks for different number of units in interval strings, for example 1 unit is `interval 10 years`, 2 units w/o interval is `10 years 5 months`, and etc.
### Why are the changes needed?
- To find out current performance issues in casting to intervals
- The benchmark can be used while refactoring/re-implementing `CalendarInterval.fromString()` or `CalendarInterval.fromCaseInsensitiveString()`.
### Does this PR introduce any user-facing change?
No
### How was this patch tested?
By running the benchmark via the command:
```shell
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.IntervalBenchmark"
```
Closes #26189 from MaxGekk/interval-from-string-benchmark.
Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
31a5dea48f
commit
6ffec5e6a6
25
sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt
Normal file
25
sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt
Normal file
|
@ -0,0 +1,25 @@
|
|||
OpenJDK 64-Bit Server VM 11.0.2+9 on Mac OS X 10.15
|
||||
Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
|
||||
cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
string w/ interval 471 513 57 2.1 470.7 1.0X
|
||||
string w/o interval 437 444 8 2.3 436.9 1.1X
|
||||
1 units w/ interval 726 758 45 1.4 726.3 0.6X
|
||||
1 units w/o interval 712 717 5 1.4 711.7 0.7X
|
||||
2 units w/ interval 926 935 12 1.1 925.9 0.5X
|
||||
2 units w/o interval 943 947 3 1.1 943.4 0.5X
|
||||
3 units w/ interval 1089 1116 31 0.9 1089.0 0.4X
|
||||
3 units w/o interval 1105 1108 3 0.9 1105.1 0.4X
|
||||
4 units w/ interval 1260 1261 1 0.8 1260.4 0.4X
|
||||
4 units w/o interval 1276 1277 1 0.8 1275.9 0.4X
|
||||
5 units w/ interval 1436 1445 11 0.7 1435.6 0.3X
|
||||
5 units w/o interval 1455 1463 6 0.7 1455.5 0.3X
|
||||
6 units w/ interval 1634 1639 4 0.6 1634.4 0.3X
|
||||
6 units w/o interval 1642 1644 3 0.6 1641.7 0.3X
|
||||
7 units w/ interval 1829 1838 8 0.5 1828.6 0.3X
|
||||
7 units w/o interval 1850 1853 4 0.5 1849.5 0.3X
|
||||
8 units w/ interval 2065 2070 5 0.5 2065.4 0.2X
|
||||
8 units w/o interval 2070 2090 21 0.5 2070.0 0.2X
|
||||
9 units w/ interval 2279 2290 10 0.4 2278.7 0.2X
|
||||
9 units w/o interval 2276 2285 8 0.4 2275.7 0.2X
|
||||
|
25
sql/core/benchmarks/IntervalBenchmark-results.txt
Normal file
25
sql/core/benchmarks/IntervalBenchmark-results.txt
Normal file
|
@ -0,0 +1,25 @@
|
|||
Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.15
|
||||
Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
|
||||
cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
string w/ interval 420 435 18 2.4 419.8 1.0X
|
||||
string w/o interval 359 365 10 2.8 358.7 1.2X
|
||||
1 units w/ interval 752 759 8 1.3 752.0 0.6X
|
||||
1 units w/o interval 762 766 4 1.3 762.0 0.6X
|
||||
2 units w/ interval 961 970 8 1.0 960.7 0.4X
|
||||
2 units w/o interval 970 976 9 1.0 970.2 0.4X
|
||||
3 units w/ interval 1130 1136 7 0.9 1130.4 0.4X
|
||||
3 units w/o interval 1150 1158 9 0.9 1150.3 0.4X
|
||||
4 units w/ interval 1333 1336 3 0.7 1333.5 0.3X
|
||||
4 units w/o interval 1354 1359 4 0.7 1354.5 0.3X
|
||||
5 units w/ interval 1523 1525 2 0.7 1523.3 0.3X
|
||||
5 units w/o interval 1549 1551 3 0.6 1549.4 0.3X
|
||||
6 units w/ interval 1661 1663 2 0.6 1660.8 0.3X
|
||||
6 units w/o interval 1691 1704 13 0.6 1691.2 0.2X
|
||||
7 units w/ interval 1811 1817 8 0.6 1810.6 0.2X
|
||||
7 units w/o interval 1853 1854 1 0.5 1853.2 0.2X
|
||||
8 units w/ interval 2029 2037 8 0.5 2028.7 0.2X
|
||||
8 units w/o interval 2075 2075 1 0.5 2074.5 0.2X
|
||||
9 units w/ interval 2170 2175 5 0.5 2170.0 0.2X
|
||||
9 units w/o interval 2204 2212 8 0.5 2203.6 0.2X
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.sql.execution.benchmark
|
||||
|
||||
import scala.collection.mutable.ListBuffer
|
||||
|
||||
import org.apache.spark.benchmark.Benchmark
|
||||
import org.apache.spark.sql.Column
|
||||
import org.apache.spark.sql.SaveMode.Overwrite
|
||||
import org.apache.spark.sql.functions._
|
||||
import org.apache.spark.sql.internal.SQLConf
|
||||
|
||||
/**
|
||||
* Synthetic benchmark for interval functions.
|
||||
* To run this benchmark:
|
||||
* {{{
|
||||
* 1. without sbt:
|
||||
* bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
|
||||
* 2. build/sbt "sql/test:runMain <this class>"
|
||||
* 3. generate result:
|
||||
* SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
|
||||
* Results will be written to "benchmarks/IntervalBenchmark-results.txt".
|
||||
* }}}
|
||||
*/
|
||||
object IntervalBenchmark extends SqlBasedBenchmark {
|
||||
import spark.implicits._
|
||||
|
||||
private def doBenchmark(cardinality: Long, exprs: Column*): Unit = {
|
||||
withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
|
||||
spark
|
||||
.range(0, cardinality, 1, 1)
|
||||
.select(exprs: _*)
|
||||
.write
|
||||
.format("noop")
|
||||
.mode(Overwrite)
|
||||
.save()
|
||||
}
|
||||
}
|
||||
|
||||
private def addCase(
|
||||
benchmark: Benchmark,
|
||||
cardinality: Long,
|
||||
name: String,
|
||||
exprs: Column*): Unit = {
|
||||
benchmark.addCase(name, numIters = 3) { _ =>
|
||||
doBenchmark(cardinality, exprs: _*)
|
||||
}
|
||||
}
|
||||
|
||||
private def buildString(withPrefix: Boolean, units: Seq[String] = Seq.empty): Column = {
|
||||
val init = lit(if (withPrefix) "interval" else "") ::
|
||||
($"id" % 10000).cast("string") ::
|
||||
lit("years") :: Nil
|
||||
|
||||
concat_ws(" ", (init ++ units.map(lit)): _*)
|
||||
}
|
||||
|
||||
private def addCase(benchmark: Benchmark, cardinality: Long, units: Seq[String]): Unit = {
|
||||
Seq(true, false).foreach { withPrefix =>
|
||||
val expr = buildString(withPrefix, units).cast("interval")
|
||||
val note = if (withPrefix) "w/ interval" else "w/o interval"
|
||||
benchmark.addCase(s"${units.length + 1} units $note", numIters = 3) { _ =>
|
||||
doBenchmark(cardinality, expr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
|
||||
val N = 1000000
|
||||
val timeUnits = Seq(
|
||||
"13 months", "100 weeks", "9 days", "12 hours",
|
||||
"5 minutes", "45 seconds", "123 milliseconds", "567 microseconds")
|
||||
val intervalToTest = ListBuffer[String]()
|
||||
|
||||
val benchmark = new Benchmark("cast strings to intervals", N, output = output)
|
||||
addCase(benchmark, N, "string w/ interval", buildString(true, timeUnits))
|
||||
addCase(benchmark, N, "string w/o interval", buildString(false, timeUnits))
|
||||
addCase(benchmark, N, intervalToTest) // Only years
|
||||
|
||||
for (unit <- timeUnits) {
|
||||
intervalToTest.append(unit)
|
||||
addCase(benchmark, N, intervalToTest)
|
||||
}
|
||||
|
||||
benchmark.run()
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue