From b9494206a50d39973b46f32f2d44cc8099c078d4 Mon Sep 17 00:00:00 2001 From: beliefer Date: Tue, 5 May 2020 10:04:16 +0900 Subject: [PATCH] [SPARK-31372][SQL][TEST][FOLLOW-UP] Improve ExpressionsSchemaSuite so that easy to track the diff ### What changes were proposed in this pull request? This PR follows up https://github.com/apache/spark/pull/28194. As discussed at https://github.com/apache/spark/pull/28194/files#r418418796. This PR will improve `ExpressionsSchemaSuite` so that easy to track the diff. Although `ExpressionsSchemaSuite` at line https://github.com/apache/spark/blob/b7cde42b04b21c9bfee6535199cf385855c15853/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala#L165 just want to compare the total size between expected output size and the newest output size, the scalatest framework will output the extra information contains all the content of expected output and newest output. This PR will try to avoid this issue. After this PR, the exception looks like below: ``` [info] - Check schemas for expression examples *** FAILED *** (7 seconds, 336 milliseconds) [info] 340 did not equal 341 Expected 332 blocks in result file but got 333. Try regenerate the result files. (ExpressionsSchemaSuite.scala:167) [info] org.scalatest.exceptions.TestFailedException: [info] at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:530) [info] at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:529) [info] at org.scalatest.FunSuite.newAssertionFailedException(FunSuite.scala:1560) [info] at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:503) [info] at org.apache.spark.sql.ExpressionsSchemaSuite.$anonfun$new$1(ExpressionsSchemaSuite.scala:167) ``` ### Why are the changes needed? Make the exception more concise and clear. ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? Jenkins test. Closes #28430 from beliefer/improve-expressions-schema-suite. Authored-by: beliefer Signed-off-by: HyukjinKwon --- .../spark/sql/ExpressionsSchemaSuite.scala | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index dd72473f0e..6d6cbf7508 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -136,19 +136,19 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { } } + val header = Seq( + s"", + "## Summary", + s" - Number of queries: ${outputs.size}", + s" - Number of expressions that missing example: ${missingExamples.size}", + s" - Expressions missing examples: ${missingExamples.mkString(",")}", + "## Schema of Built-in Functions", + "| Class name | Function name or alias | Query example | Output schema |", + "| ---------- | ---------------------- | ------------- | ------------- |" + ) + if (regenerateGoldenFiles) { - val missingExampleStr = missingExamples.mkString(",") - val goldenOutput = { - s"\n" + - "## Summary\n" + - s" - Number of queries: ${outputs.size}\n" + - s" - Number of expressions that missing example: ${missingExamples.size}\n" + - s" - Expressions missing examples: $missingExampleStr\n" + - "## Schema of Built-in Functions\n" + - "| Class name | Function name or alias | Query example | Output schema |\n" + - "| ---------- | ---------------------- | ------------- | ------------- |\n" + - outputBuffer.mkString("\n") - } + val goldenOutput = (header ++ outputBuffer).mkString("\n") val parent = resultFile.getParentFile if (!parent.exists()) { assert(parent.mkdirs(), "Could not create directory: " + parent) @@ -156,18 +156,19 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { stringToFile(resultFile, goldenOutput) } + val outputSize = outputs.size + val headerSize = header.size val expectedOutputs: Seq[QueryOutput] = { - val goldenOutput = fileToString(resultFile) - val lines = goldenOutput.split("\n") + val expectedGoldenOutput = fileToString(resultFile) + val lines = expectedGoldenOutput.split("\n") + val expectedSize = lines.size - // The header of golden file has one line, plus four lines of the summary and three - // lines of the header of schema table. - assert(lines.size == outputs.size + 8, - s"Expected ${outputs.size + 8} blocks in result file but got ${lines.size}. " + - s"Try regenerate the result files.") + assert(expectedSize == outputSize + headerSize, + s"Expected $expectedSize blocks in result file but got " + + s"${outputSize + headerSize}. Try regenerate the result files.") - Seq.tabulate(outputs.size) { i => - val segments = lines(i + 8).split('|') + Seq.tabulate(outputSize) { i => + val segments = lines(i + headerSize).split('|') QueryOutput( className = segments(1).trim, funcName = segments(2).trim, @@ -177,7 +178,8 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { } // Compare results. - assert(expectedOutputs.size == outputs.size, s"Number of queries not equals") + assert(expectedOutputs.size == outputSize, + "The number of queries not equals the number of expected queries.") outputs.zip(expectedOutputs).foreach { case (output, expected) => assert(expected.sql == output.sql, "SQL query did not match")