From 8c44d744631516a5cdaf63406e69a9dd11e5b878 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sat, 27 Jun 2020 17:54:06 -0700 Subject: [PATCH] [SPARK-32071][SQL][TESTS] Add `make_interval` benchmark ### What changes were proposed in this pull request? Add benchmarks for interval constructor `make_interval` and measure perf of 4 cases: 1. Constant (year, month) 2. Constant (week, day) 3. Constant (hour, minute, second, second fraction) 4. All fields are NOT constant. The benchmark results are generated in the environment: | Item | Description | | ---- | ----| | Region | us-west-2 (Oregon) | | Instance | r3.xlarge | | AMI | ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server-20190722.1 (ami-06f2f779464715dc5) | | Java | OpenJDK 64-Bit Server VM 1.8.0_252 and OpenJDK 64-Bit Server VM 11.0.7+10 | ### Why are the changes needed? To have a base line for future perf improvements of `make_interval`, and to prevent perf regressions in the future. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running `IntervalBenchmark` via: ``` $ SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.IntervalBenchmark" ``` Closes #28905 from MaxGekk/benchmark-make_interval. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../IntervalBenchmark-jdk11-results.txt | 63 +++++++++------ .../benchmarks/IntervalBenchmark-results.txt | 63 +++++++++------ .../benchmark/IntervalBenchmark.scala | 81 ++++++++++++++++--- 3 files changed, 146 insertions(+), 61 deletions(-) diff --git a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt index 8958d7c534..70a6493104 100644 --- a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt @@ -1,29 +1,40 @@ -Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.3 -Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz +OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -prepare string w/ interval 448 469 20 2.2 447.6 1.0X -prepare string w/o interval 405 409 4 2.5 404.6 1.1X -1 units w/ interval 321 328 6 3.1 321.4 1.4X -1 units w/o interval 303 307 4 3.3 303.1 1.5X -2 units w/ interval 445 458 12 2.2 444.6 1.0X -2 units w/o interval 416 424 10 2.4 416.2 1.1X -3 units w/ interval 1006 1012 8 1.0 1006.4 0.4X -3 units w/o interval 1240 1249 8 0.8 1239.6 0.4X -4 units w/ interval 1295 1418 106 0.8 1295.4 0.3X -4 units w/o interval 1172 1188 15 0.9 1171.6 0.4X -5 units w/ interval 1326 1335 11 0.8 1325.6 0.3X -5 units w/o interval 1309 1336 44 0.8 1308.7 0.3X -6 units w/ interval 1441 1464 29 0.7 1441.0 0.3X -6 units w/o interval 1350 1369 17 0.7 1350.1 0.3X -7 units w/ interval 1606 1669 99 0.6 1605.6 0.3X -7 units w/o interval 1546 1557 12 0.6 1546.3 0.3X -8 units w/ interval 1771 1875 120 0.6 1770.6 0.3X -8 units w/o interval 1775 1789 13 0.6 1775.2 0.3X -9 units w/ interval 2126 2757 849 0.5 2126.4 0.2X -9 units w/o interval 2053 2070 21 0.5 2053.3 0.2X -10 units w/ interval 2209 2243 30 0.5 2209.1 0.2X -10 units w/o interval 2400 2702 365 0.4 2400.2 0.2X -11 units w/ interval 2616 2699 72 0.4 2616.5 0.2X -11 units w/o interval 3218 3380 195 0.3 3218.4 0.1X +prepare string w/ interval 708 829 110 1.4 708.0 1.0X +prepare string w/o interval 660 672 14 1.5 660.3 1.1X +1 units w/ interval 514 543 33 1.9 514.2 1.4X +1 units w/o interval 476 492 20 2.1 475.9 1.5X +2 units w/ interval 751 767 14 1.3 751.0 0.9X +2 units w/o interval 709 716 11 1.4 709.0 1.0X +3 units w/ interval 1541 1551 15 0.6 1540.9 0.5X +3 units w/o interval 1531 1532 1 0.7 1531.5 0.5X +4 units w/ interval 1764 1768 5 0.6 1763.5 0.4X +4 units w/o interval 1737 1745 8 0.6 1736.6 0.4X +5 units w/ interval 1920 1930 10 0.5 1919.7 0.4X +5 units w/o interval 1928 1936 11 0.5 1927.9 0.4X +6 units w/ interval 2124 2127 4 0.5 2124.2 0.3X +6 units w/o interval 2124 2125 1 0.5 2123.7 0.3X +7 units w/ interval 2525 2541 15 0.4 2525.5 0.3X +7 units w/o interval 2512 2518 11 0.4 2511.5 0.3X +8 units w/ interval 2578 2597 19 0.4 2578.1 0.3X +8 units w/o interval 2558 2562 6 0.4 2558.1 0.3X +9 units w/ interval 2742 2750 9 0.4 2741.8 0.3X +9 units w/o interval 2752 2762 11 0.4 2751.8 0.3X +10 units w/ interval 3112 3123 10 0.3 3111.9 0.2X +10 units w/o interval 3116 3130 14 0.3 3115.7 0.2X +11 units w/ interval 3255 3273 20 0.3 3255.3 0.2X +11 units w/o interval 3294 3305 14 0.3 3293.6 0.2X + +OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +make_interval(): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------- +prepare make_interval() 3395 3410 16 0.3 3395.0 1.0X +make_interval(0, 1, 2, 3, 4, 5, 50.123456) 94 102 9 10.7 93.8 36.2X +make_interval(*, *, 2, 3, 4, 5, 50.123456) 136 139 4 7.3 136.5 24.9X +make_interval(0, 1, *, *, 4, 5, 50.123456) 115 119 4 8.7 114.8 29.6X +make_interval(0, 1, 2, 3, *, *, *) 3359 3382 37 0.3 3358.7 1.0X +make_interval(*, *, *, *, *, *, *) 3382 3388 9 0.3 3382.3 1.0X diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt index 48af333b78..98b9f55c2e 100644 --- a/sql/core/benchmarks/IntervalBenchmark-results.txt +++ b/sql/core/benchmarks/IntervalBenchmark-results.txt @@ -1,29 +1,40 @@ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.3 -Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz +OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -prepare string w/ interval 389 410 21 2.6 388.7 1.0X -prepare string w/o interval 340 360 18 2.9 340.5 1.1X -1 units w/ interval 378 389 16 2.6 377.8 1.0X -1 units w/o interval 346 350 5 2.9 346.2 1.1X -2 units w/ interval 444 457 11 2.3 444.2 0.9X -2 units w/o interval 455 464 12 2.2 455.1 0.9X -3 units w/ interval 942 964 20 1.1 941.5 0.4X -3 units w/o interval 927 1020 93 1.1 927.3 0.4X -4 units w/ interval 1114 1127 17 0.9 1113.9 0.3X -4 units w/o interval 1100 1105 4 0.9 1100.3 0.4X -5 units w/ interval 1180 1244 57 0.8 1180.1 0.3X -5 units w/o interval 1135 1141 6 0.9 1135.2 0.3X -6 units w/ interval 1284 1316 48 0.8 1284.0 0.3X -6 units w/o interval 1276 1357 122 0.8 1276.1 0.3X -7 units w/ interval 1609 1636 32 0.6 1609.1 0.2X -7 units w/o interval 1551 1578 36 0.6 1550.9 0.3X -8 units w/ interval 1787 1874 129 0.6 1787.1 0.2X -8 units w/o interval 1751 1767 15 0.6 1750.6 0.2X -9 units w/ interval 1960 2065 141 0.5 1959.7 0.2X -9 units w/o interval 1885 1908 39 0.5 1885.1 0.2X -10 units w/ interval 2178 2185 11 0.5 2177.9 0.2X -10 units w/o interval 2150 2255 164 0.5 2150.1 0.2X -11 units w/ interval 2457 2542 139 0.4 2456.7 0.2X -11 units w/o interval 2557 2770 188 0.4 2556.7 0.2X +prepare string w/ interval 677 718 40 1.5 677.2 1.0X +prepare string w/o interval 602 624 19 1.7 602.2 1.1X +1 units w/ interval 582 598 20 1.7 581.8 1.2X +1 units w/o interval 549 591 64 1.8 549.1 1.2X +2 units w/ interval 758 773 14 1.3 758.2 0.9X +2 units w/o interval 723 738 14 1.4 722.6 0.9X +3 units w/ interval 1442 1450 11 0.7 1441.8 0.5X +3 units w/o interval 1426 1429 3 0.7 1426.4 0.5X +4 units w/ interval 1645 1652 11 0.6 1645.1 0.4X +4 units w/o interval 1618 1626 10 0.6 1617.6 0.4X +5 units w/ interval 1794 1803 13 0.6 1794.4 0.4X +5 units w/o interval 1783 1793 9 0.6 1783.2 0.4X +6 units w/ interval 1976 1984 11 0.5 1976.2 0.3X +6 units w/o interval 1948 1959 10 0.5 1947.9 0.3X +7 units w/ interval 2394 2408 18 0.4 2393.7 0.3X +7 units w/o interval 2387 2392 8 0.4 2386.8 0.3X +8 units w/ interval 2578 2588 15 0.4 2577.5 0.3X +8 units w/o interval 2572 2578 5 0.4 2571.8 0.3X +9 units w/ interval 2812 2829 19 0.4 2811.7 0.2X +9 units w/o interval 2811 2816 4 0.4 2810.7 0.2X +10 units w/ interval 3108 3116 10 0.3 3107.8 0.2X +10 units w/o interval 3107 3109 3 0.3 3106.8 0.2X +11 units w/ interval 3386 3392 8 0.3 3386.3 0.2X +11 units w/o interval 3374 3377 4 0.3 3374.0 0.2X + +OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +make_interval(): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------- +prepare make_interval() 3634 3684 47 0.3 3634.1 1.0X +make_interval(0, 1, 2, 3, 4, 5, 50.123456) 90 100 12 11.1 90.0 40.4X +make_interval(*, *, 2, 3, 4, 5, 50.123456) 114 119 5 8.8 114.3 31.8X +make_interval(0, 1, *, *, 4, 5, 50.123456) 121 138 21 8.3 120.7 30.1X +make_interval(0, 1, 2, 3, *, *, *) 3615 3621 9 0.3 3614.7 1.0X +make_interval(*, *, *, *, *, *, *) 3638 3657 21 0.3 3637.7 1.0X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala index 907e3f40c1..96ad453aeb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala @@ -39,11 +39,11 @@ import org.apache.spark.sql.internal.SQLConf object IntervalBenchmark extends SqlBasedBenchmark { import spark.implicits._ - private def doBenchmark(cardinality: Long, exprs: Column*): Unit = { + private def doBenchmark(cardinality: Long, columns: Column*): Unit = { withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { spark .range(0, cardinality, 1, 1) - .select(exprs: _*) + .select(columns: _*) .queryExecution .toRdd .foreach(_ => ()) @@ -60,6 +60,26 @@ object IntervalBenchmark extends SqlBasedBenchmark { } } + private def doBenchmarkExpr(cardinality: Long, exprs: String*): Unit = { + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { + spark + .range(0, cardinality, 1, 1) + .selectExpr(exprs: _*) + .queryExecution + .toRdd + .foreach(_ => ()) + } + } + + private def addCaseExpr( + benchmark: Benchmark, + cardinality: Long, + name: String, + exprs: String*): Unit = { + benchmark.addCase(name, numIters = 3) { _ => doBenchmarkExpr(cardinality, exprs: _*) } + } + + private def buildString(withPrefix: Boolean, units: Seq[String] = Seq.empty): Column = { val init = lit(if (withPrefix) "interval" else "") :: ($"id" % 10000).cast("string") :: @@ -78,25 +98,68 @@ object IntervalBenchmark extends SqlBasedBenchmark { } } - override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { - val N = 1000000 + private def benchmarkIntervalStringParsing(cardinality: Long): Unit = { val timeUnits = Seq( "13 months", " 1 months", "100 weeks", "9 days", "12 hours", "- 3 hours", "5 minutes", "45 seconds", "123 milliseconds", "567 microseconds") val intervalToTest = ListBuffer[String]() - val benchmark = new Benchmark("cast strings to intervals", N, output = output) + val benchmark = new Benchmark("cast strings to intervals", cardinality, output = output) // The first 2 cases are used to show the overhead of preparing the interval string. - addCase(benchmark, N, "prepare string w/ interval", buildString(true, timeUnits)) - addCase(benchmark, N, "prepare string w/o interval", buildString(false, timeUnits)) - addCase(benchmark, N, intervalToTest) // Only years + addCase(benchmark, cardinality, "prepare string w/ interval", buildString(true, timeUnits)) + addCase(benchmark, cardinality, "prepare string w/o interval", buildString(false, timeUnits)) + addCase(benchmark, cardinality, intervalToTest) // Only years for (unit <- timeUnits) { intervalToTest.append(unit) - addCase(benchmark, N, intervalToTest) + addCase(benchmark, cardinality, intervalToTest) } benchmark.run() } + + private def benchmarkMakeInterval(cardinality: Long): Unit = { + val benchmark = new Benchmark("make_interval()", cardinality, output = output) + val hmExprs = Seq("id % 24", "id % 60") + val hmsExprs = hmExprs ++ Seq("cast((id % 500000000) / 1000000.0 as decimal(18, 6))") + val ymExprs = Seq("(2000 + (id % 30))", "((id % 12) + 1)") + val wdExpr = Seq("((id % 54) + 1)", "((id % 1000) + 1)") + val args = ymExprs ++ wdExpr ++ hmsExprs + + addCaseExpr( + benchmark, + cardinality, + "prepare make_interval()", + args: _*) + val foldableExpr = "make_interval(0, 1, 2, 3, 4, 5, 50.123456)" + addCaseExpr(benchmark, cardinality, foldableExpr, foldableExpr) + addCaseExpr( + benchmark, + cardinality, + "make_interval(*, *, 2, 3, 4, 5, 50.123456)", + s"make_interval(${ymExprs.mkString(",")}, 2, 3, 4, 5, 50.123456)") + addCaseExpr( + benchmark, + cardinality, + "make_interval(0, 1, *, *, 4, 5, 50.123456)", + s"make_interval(0, 1, ${wdExpr.mkString(",")}, 4, 5, 50.123456)") + addCaseExpr( + benchmark, + cardinality, + "make_interval(0, 1, 2, 3, *, *, *)", + s"make_interval(0, 1, 2, 3, ${hmsExprs.mkString(",")})") + addCaseExpr( + benchmark, + cardinality, + "make_interval(*, *, *, *, *, *, *)", + s"make_interval(${args.mkString(",")})") + + benchmark.run() + } + + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + benchmarkIntervalStringParsing(1000000) + benchmarkMakeInterval(1000000) + } }