[SPARK-29084][SQL][TESTS] Check method bytecode size in BenchmarkQueryTest

### What changes were proposed in this pull request? This pr proposes to check method bytecode size in `BenchmarkQueryTest`. This metric is critical for performance numbers. ### Why are the changes needed? For performance checks ### Does this PR introduce any user-facing change? No ### How was this patch tested? N/A Closes #25788 from maropu/CheckMethodSize. Authored-by: Takeshi Yamamuro <yamamuro@apache.org> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
2019-09-22 14:47:42 -07:00 · 2019-09-22 14:47:42 -07:00 · 7a2ea58e78
parent 51d3509428
commit 7a2ea58e78
3 changed files with 19 additions and 7 deletions
--- a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
@ -18,7 +18,7 @@
 package org.apache.spark.sql

 import org.apache.spark.internal.config.Tests.IS_TESTING
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodeGenerator}
+import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeFormatter, CodeGenerator}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec}
 import org.apache.spark.sql.test.SharedSparkSession
@ -48,7 +48,7 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
    RuleExecutor.resetMetrics()
  }

-  protected def checkGeneratedCode(plan: SparkPlan): Unit = {
+  protected def checkGeneratedCode(plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = {
    val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]()
    plan foreach {
      case s: WholeStageCodegenExec =>
@ -57,7 +57,7 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
    }
    codegenSubtrees.toSeq.foreach { subtree =>
      val code = subtree.doCodeGen()._2
-      try {
+      val (_, ByteCodeStats(maxMethodCodeSize, _, _)) = try {
        // Just check the generated code can be properly compiled
        CodeGenerator.compile(code)
      } catch {
@ -72,6 +72,11 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
             """.stripMargin
          throw new Exception(msg, e)
      }
+
+      assert(!checkMethodCodeSize ||
+          maxMethodCodeSize <= CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT,
+        s"too long generated codes found in the WholeStageCodegenExec subtree (id=${subtree.id}) " +
+          s"and JIT optimization might not work:\n${subtree.treeString}")
    }
  }
 }
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
@ -82,13 +82,19 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSSchema {
    "q3", "q7", "q10", "q19", "q27", "q34", "q42", "q43", "q46", "q52", "q53", "q55", "q59",
    "q63", "q65", "q68", "q73", "q79", "q89", "q98", "ss_max")

+  // List up the known queries having too large code in a generated function.
+  // A JIRA file for `modified-q3` is as follows;
+  // [SPARK-29128] Split predicate code in OR expressions
+  val blackListForMethodCodeSizeCheck = Set("modified-q3")
+
  modifiedTPCDSQueries.foreach { name =>
    val queryString = resourceToString(s"tpcds-modifiedQueries/$name.sql",
      classLoader = Thread.currentThread().getContextClassLoader)
-    test(s"modified-$name") {
+    val testName = s"modified-$name"
+    test(testName) {
      // check the plans can be properly generated
      val plan = sql(queryString).queryExecution.executedPlan
-      checkGeneratedCode(plan)
+      checkGeneratedCode(plan, !blackListForMethodCodeSizeCheck.contains(testName))
    }
  }
 }
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala
@ -32,8 +32,9 @@ import org.apache.spark.sql.execution.window.WindowExec

 class LogicalPlanTagInSparkPlanSuite extends TPCDSQuerySuite {

-  override protected def checkGeneratedCode(plan: SparkPlan): Unit = {
-    super.checkGeneratedCode(plan)
+  override protected def checkGeneratedCode(
+      plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = {
+    super.checkGeneratedCode(plan, checkMethodCodeSize)
    checkLogicalPlanTag(plan)
  }