[SPARK-29533][SQL][TEST] Benchmark casting strings to intervals

### What changes were proposed in this pull request?
Added new benchmark `IntervalBenchmark` to measure performance of interval related functions. In the PR, I added benchmarks for casting strings to interval. In particular, interval strings with `interval` prefix and without it because there is special code for this da576a737c/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java (L100-L103) . And also I added benchmarks for different number of units in interval strings, for example 1 unit is `interval 10 years`, 2 units w/o interval is `10 years 5 months`, and etc.

### Why are the changes needed?
- To find out current performance issues in casting to intervals
- The benchmark can be used while refactoring/re-implementing `CalendarInterval.fromString()` or `CalendarInterval.fromCaseInsensitiveString()`.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running the benchmark via the command:
```shell
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.IntervalBenchmark"
```

Closes #26189 from MaxGekk/interval-from-string-benchmark.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
Maxim Gekk 2019-10-22 10:47:04 +09:00 committed by HyukjinKwon
parent 31a5dea48f
commit 6ffec5e6a6
3 changed files with 152 additions and 0 deletions

View file

@ -0,0 +1,25 @@
OpenJDK 64-Bit Server VM 11.0.2+9 on Mac OS X 10.15
Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
string w/ interval 471 513 57 2.1 470.7 1.0X
string w/o interval 437 444 8 2.3 436.9 1.1X
1 units w/ interval 726 758 45 1.4 726.3 0.6X
1 units w/o interval 712 717 5 1.4 711.7 0.7X
2 units w/ interval 926 935 12 1.1 925.9 0.5X
2 units w/o interval 943 947 3 1.1 943.4 0.5X
3 units w/ interval 1089 1116 31 0.9 1089.0 0.4X
3 units w/o interval 1105 1108 3 0.9 1105.1 0.4X
4 units w/ interval 1260 1261 1 0.8 1260.4 0.4X
4 units w/o interval 1276 1277 1 0.8 1275.9 0.4X
5 units w/ interval 1436 1445 11 0.7 1435.6 0.3X
5 units w/o interval 1455 1463 6 0.7 1455.5 0.3X
6 units w/ interval 1634 1639 4 0.6 1634.4 0.3X
6 units w/o interval 1642 1644 3 0.6 1641.7 0.3X
7 units w/ interval 1829 1838 8 0.5 1828.6 0.3X
7 units w/o interval 1850 1853 4 0.5 1849.5 0.3X
8 units w/ interval 2065 2070 5 0.5 2065.4 0.2X
8 units w/o interval 2070 2090 21 0.5 2070.0 0.2X
9 units w/ interval 2279 2290 10 0.4 2278.7 0.2X
9 units w/o interval 2276 2285 8 0.4 2275.7 0.2X

View file

@ -0,0 +1,25 @@
Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.15
Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
string w/ interval 420 435 18 2.4 419.8 1.0X
string w/o interval 359 365 10 2.8 358.7 1.2X
1 units w/ interval 752 759 8 1.3 752.0 0.6X
1 units w/o interval 762 766 4 1.3 762.0 0.6X
2 units w/ interval 961 970 8 1.0 960.7 0.4X
2 units w/o interval 970 976 9 1.0 970.2 0.4X
3 units w/ interval 1130 1136 7 0.9 1130.4 0.4X
3 units w/o interval 1150 1158 9 0.9 1150.3 0.4X
4 units w/ interval 1333 1336 3 0.7 1333.5 0.3X
4 units w/o interval 1354 1359 4 0.7 1354.5 0.3X
5 units w/ interval 1523 1525 2 0.7 1523.3 0.3X
5 units w/o interval 1549 1551 3 0.6 1549.4 0.3X
6 units w/ interval 1661 1663 2 0.6 1660.8 0.3X
6 units w/o interval 1691 1704 13 0.6 1691.2 0.2X
7 units w/ interval 1811 1817 8 0.6 1810.6 0.2X
7 units w/o interval 1853 1854 1 0.5 1853.2 0.2X
8 units w/ interval 2029 2037 8 0.5 2028.7 0.2X
8 units w/o interval 2075 2075 1 0.5 2074.5 0.2X
9 units w/ interval 2170 2175 5 0.5 2170.0 0.2X
9 units w/o interval 2204 2212 8 0.5 2203.6 0.2X

View file

@ -0,0 +1,102 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution.benchmark
import scala.collection.mutable.ListBuffer
import org.apache.spark.benchmark.Benchmark
import org.apache.spark.sql.Column
import org.apache.spark.sql.SaveMode.Overwrite
import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
/**
* Synthetic benchmark for interval functions.
* To run this benchmark:
* {{{
* 1. without sbt:
* bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
* 2. build/sbt "sql/test:runMain <this class>"
* 3. generate result:
* SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
* Results will be written to "benchmarks/IntervalBenchmark-results.txt".
* }}}
*/
object IntervalBenchmark extends SqlBasedBenchmark {
import spark.implicits._
private def doBenchmark(cardinality: Long, exprs: Column*): Unit = {
withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
spark
.range(0, cardinality, 1, 1)
.select(exprs: _*)
.write
.format("noop")
.mode(Overwrite)
.save()
}
}
private def addCase(
benchmark: Benchmark,
cardinality: Long,
name: String,
exprs: Column*): Unit = {
benchmark.addCase(name, numIters = 3) { _ =>
doBenchmark(cardinality, exprs: _*)
}
}
private def buildString(withPrefix: Boolean, units: Seq[String] = Seq.empty): Column = {
val init = lit(if (withPrefix) "interval" else "") ::
($"id" % 10000).cast("string") ::
lit("years") :: Nil
concat_ws(" ", (init ++ units.map(lit)): _*)
}
private def addCase(benchmark: Benchmark, cardinality: Long, units: Seq[String]): Unit = {
Seq(true, false).foreach { withPrefix =>
val expr = buildString(withPrefix, units).cast("interval")
val note = if (withPrefix) "w/ interval" else "w/o interval"
benchmark.addCase(s"${units.length + 1} units $note", numIters = 3) { _ =>
doBenchmark(cardinality, expr)
}
}
}
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
val N = 1000000
val timeUnits = Seq(
"13 months", "100 weeks", "9 days", "12 hours",
"5 minutes", "45 seconds", "123 milliseconds", "567 microseconds")
val intervalToTest = ListBuffer[String]()
val benchmark = new Benchmark("cast strings to intervals", N, output = output)
addCase(benchmark, N, "string w/ interval", buildString(true, timeUnits))
addCase(benchmark, N, "string w/o interval", buildString(false, timeUnits))
addCase(benchmark, N, intervalToTest) // Only years
for (unit <- timeUnits) {
intervalToTest.append(unit)
addCase(benchmark, N, intervalToTest)
}
benchmark.run()
}
}