[SPARK-29115][SQL][TEST] Add benchmarks for make_date() and make_timestamp()

### What changes were proposed in this pull request?

Added new benchmarks for `make_date()` and `make_timestamp()` to detect performance issues, and figure out functions speed on foldable arguments.
- `make_date()` is benchmarked on fully foldable arguments.
- `make_timestamp()` is benchmarked on corner case `60.0`, foldable time fields and foldable date.

### Why are the changes needed?

To find out inputs where `make_date()` and `make_timestamp()` have performance problems. This should be useful in the future optimizations of the functions and users apps.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running the benchmark and manually checking generated dates/timestamps.

Closes #25813 from MaxGekk/make_datetime-benchmark.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
Maxim Gekk 2019-09-17 15:09:16 -07:00 committed by Dongjoon Hyun
parent dd32476a82
commit 02db706090
2 changed files with 142 additions and 0 deletions

View file

@ -0,0 +1,22 @@
Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6
Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
make_date(): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
prepare make_date() 2149 2289 196 46.5 21.5 1.0X
make_date(2019, 9, 16) 1829 1868 58 54.7 18.3 1.2X
make_date(*, *, *) 3180 3339 139 31.4 31.8 0.7X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6
Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
make_timestamp(): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
prepare make_timestamp() 2950 3025 96 0.3 2950.3 1.0X
make_timestamp(2019, 1, 2, 3, 4, 50.123456) 45 47 1 22.1 45.2 65.3X
make_timestamp(2019, 1, 2, 3, 4, 60.000000) 42 42 1 24.0 41.7 70.8X
make_timestamp(2019, 12, 31, 23, 59, 60.00) 41 42 1 24.2 41.3 71.4X
make_timestamp(*, *, *, 3, 4, 50.123456) 252 256 7 4.0 251.5 11.7X
make_timestamp(*, *, *, *, *, 0) 225 227 3 4.5 224.6 13.1X
make_timestamp(*, *, *, *, *, 60.0) 230 233 2 4.3 230.4 12.8X
make_timestamp(2019, 1, 2, *, *, *) 3078 3118 35 0.3 3078.5 1.0X
make_timestamp(*, *, *, *, *, *) 3092 3109 17 0.3 3092.4 1.0X

View file

@ -0,0 +1,120 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution.benchmark
import org.apache.spark.benchmark.Benchmark
import org.apache.spark.sql.internal.SQLConf
/**
* Synthetic benchmark for the make_date() and make_timestamp() functions.
* To run this benchmark:
* {{{
* 1. without sbt:
* bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
* 2. build/sbt "sql/test:runMain <this class>"
* 3. generate result:
* SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
* Results will be written to "benchmarks/MakeDateTimeBenchmark-results.txt".
* }}}
*/
object MakeDateTimeBenchmark extends SqlBasedBenchmark {
private def doBenchmark(cardinality: Long, exprs: String*): Unit = {
withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
spark
.range(0, cardinality, 1, 1)
.selectExpr(exprs: _*)
.write
.format("noop")
.save()
}
}
private def run(benchmark: Benchmark, cardinality: Long, name: String, exprs: String*): Unit = {
benchmark.addCase(name, numIters = 3) { _ => doBenchmark(cardinality, exprs: _*) }
}
private val ymdExprs = Seq("(2000 + (id % 30))", "((id % 12) + 1)", "((id % 27) + 1)")
private def benchmarkMakeDate(cardinality: Long): Unit = {
val benchmark = new Benchmark("make_date()", cardinality, output = output)
val args = ymdExprs
run(benchmark, cardinality, "prepare make_date()", args: _*)
val foldableExpr = "make_date(2019, 9, 16)"
run(benchmark, cardinality, foldableExpr, foldableExpr)
run(
benchmark,
cardinality,
"make_date(*, *, *)",
"make_date" + args.mkString("(", ",", ")"))
benchmark.run()
}
private def benchmarkMakeTimestamp(cardinality: Long): Unit = {
val benchmark = new Benchmark("make_timestamp()", cardinality, output = output)
val hmExprs = Seq("id % 24", "id % 60")
val hmsExprs = hmExprs ++ Seq("cast((id % 60000000) / 1000000.0 as decimal(8, 6))")
val args = ymdExprs ++ hmsExprs
run(
benchmark,
cardinality,
"prepare make_timestamp()",
args: _*)
var foldableExpr = "make_timestamp(2019, 1, 2, 3, 4, 50.123456)"
run(benchmark, cardinality, foldableExpr, foldableExpr)
foldableExpr = "make_timestamp(2019, 1, 2, 3, 4, 60.000000)"
run(benchmark, cardinality, foldableExpr, foldableExpr)
foldableExpr = "make_timestamp(2019, 12, 31, 23, 59, 60.00)"
run(benchmark, cardinality, foldableExpr, foldableExpr)
run(
benchmark,
cardinality,
"make_timestamp(*, *, *, 3, 4, 50.123456)",
s"make_timestamp(${ymdExprs.mkString(",")}, 3, 4, 50.123456)")
run(
benchmark,
cardinality,
"make_timestamp(*, *, *, *, *, 0)",
s"make_timestamp(" + (ymdExprs ++ hmExprs).mkString(", ") + ", 0)")
run(
benchmark,
cardinality,
"make_timestamp(*, *, *, *, *, 60.0)",
s"make_timestamp(" + (ymdExprs ++ hmExprs).mkString(", ") + ", 60.0)")
run(
benchmark,
cardinality,
"make_timestamp(2019, 1, 2, *, *, *)",
s"make_timestamp(2019, 1, 2, ${hmsExprs.mkString(",")})")
run(
benchmark,
cardinality,
"make_timestamp(*, *, *, *, *, *)",
s"make_timestamp" + args.mkString("(", ", ", ")"))
benchmark.run()
}
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
benchmarkMakeDate(100000000L)
benchmarkMakeTimestamp(1000000L)
}
}