[SPARK-31372][SQL][TEST][FOLLOW-UP] Improve ExpressionsSchemaSuite so that easy to track the diff

### What changes were proposed in this pull request?
This PR follows up https://github.com/apache/spark/pull/28194.
As discussed at https://github.com/apache/spark/pull/28194/files#r418418796.
This PR will improve `ExpressionsSchemaSuite` so that easy to track the diff.
Although `ExpressionsSchemaSuite` at line
b7cde42b04/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala (L165)
just want to compare the total size between expected output size and the newest output size, the scalatest framework will output the extra information contains all the content of expected output and newest output.
This PR will try to avoid this issue.
After this PR, the exception looks like below:
```
[info] - Check schemas for expression examples *** FAILED *** (7 seconds, 336 milliseconds)
[info]   340 did not equal 341 Expected 332 blocks in result file but got 333. Try regenerate the result files. (ExpressionsSchemaSuite.scala:167)
[info]   org.scalatest.exceptions.TestFailedException:
[info]   at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:530)
[info]   at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:529)
[info]   at org.scalatest.FunSuite.newAssertionFailedException(FunSuite.scala:1560)
[info]   at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:503)
[info]   at org.apache.spark.sql.ExpressionsSchemaSuite.$anonfun$new$1(ExpressionsSchemaSuite.scala:167)
```

### Why are the changes needed?
Make the exception more concise and clear.

### Does this PR introduce _any_ user-facing change?
'No'.

### How was this patch tested?
Jenkins test.

Closes #28430 from beliefer/improve-expressions-schema-suite.

Authored-by: beliefer <beliefer@163.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
beliefer 2020-05-05 10:04:16 +09:00 committed by HyukjinKwon
parent 372ccba063
commit b9494206a5

View file

@ -136,19 +136,19 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
}
}
val header = Seq(
s"<!-- Automatically generated by${getClass.getSimpleName} -->",
"## Summary",
s" - Number of queries: ${outputs.size}",
s" - Number of expressions that missing example: ${missingExamples.size}",
s" - Expressions missing examples: ${missingExamples.mkString(",")}",
"## Schema of Built-in Functions",
"| Class name | Function name or alias | Query example | Output schema |",
"| ---------- | ---------------------- | ------------- | ------------- |"
)
if (regenerateGoldenFiles) {
val missingExampleStr = missingExamples.mkString(",")
val goldenOutput = {
s"<!-- Automatically generated by${getClass.getSimpleName} -->\n" +
"## Summary\n" +
s" - Number of queries: ${outputs.size}\n" +
s" - Number of expressions that missing example: ${missingExamples.size}\n" +
s" - Expressions missing examples: $missingExampleStr\n" +
"## Schema of Built-in Functions\n" +
"| Class name | Function name or alias | Query example | Output schema |\n" +
"| ---------- | ---------------------- | ------------- | ------------- |\n" +
outputBuffer.mkString("\n")
}
val goldenOutput = (header ++ outputBuffer).mkString("\n")
val parent = resultFile.getParentFile
if (!parent.exists()) {
assert(parent.mkdirs(), "Could not create directory: " + parent)
@ -156,18 +156,19 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
stringToFile(resultFile, goldenOutput)
}
val outputSize = outputs.size
val headerSize = header.size
val expectedOutputs: Seq[QueryOutput] = {
val goldenOutput = fileToString(resultFile)
val lines = goldenOutput.split("\n")
val expectedGoldenOutput = fileToString(resultFile)
val lines = expectedGoldenOutput.split("\n")
val expectedSize = lines.size
// The header of golden file has one line, plus four lines of the summary and three
// lines of the header of schema table.
assert(lines.size == outputs.size + 8,
s"Expected ${outputs.size + 8} blocks in result file but got ${lines.size}. " +
s"Try regenerate the result files.")
assert(expectedSize == outputSize + headerSize,
s"Expected $expectedSize blocks in result file but got " +
s"${outputSize + headerSize}. Try regenerate the result files.")
Seq.tabulate(outputs.size) { i =>
val segments = lines(i + 8).split('|')
Seq.tabulate(outputSize) { i =>
val segments = lines(i + headerSize).split('|')
QueryOutput(
className = segments(1).trim,
funcName = segments(2).trim,
@ -177,7 +178,8 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
}
// Compare results.
assert(expectedOutputs.size == outputs.size, s"Number of queries not equals")
assert(expectedOutputs.size == outputSize,
"The number of queries not equals the number of expected queries.")
outputs.zip(expectedOutputs).foreach { case (output, expected) =>
assert(expected.sql == output.sql, "SQL query did not match")