[SPARK-32251][SQL][TESTS][FOLLOWUP] improve SQL keyword test

### What changes were proposed in this pull request?

Improve the `SQLKeywordSuite` so that:
1. it checks keywords under default mode as well
2. it checks if there are typos in the doc (found one and fixed in this PR)

### Why are the changes needed?

better test coverage

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

N/A

Closes #29200 from cloud-fan/test.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Wenchen Fan 2020-07-23 14:02:38 +00:00
parent aed8dbab1d
commit aa54dcf193
3 changed files with 43 additions and 7 deletions

View file

@ -264,7 +264,7 @@ Below is a list of all the keywords in Spark SQL.
|MAP|non-reserved|non-reserved|non-reserved|
|MATCHED|non-reserved|non-reserved|non-reserved|
|MERGE|non-reserved|non-reserved|non-reserved|
|MINUS|not-reserved|strict-non-reserved|non-reserved|
|MINUS|non-reserved|strict-non-reserved|non-reserved|
|MINUTE|reserved|non-reserved|reserved|
|MONTH|reserved|non-reserved|reserved|
|MSCK|non-reserved|non-reserved|non-reserved|

View file

@ -1226,6 +1226,7 @@ strictNonReserved
;
nonReserved
//--DEFAULT-NON-RESERVED-START
: ADD
| AFTER
| ALL
@ -1466,6 +1467,7 @@ nonReserved
| WITH
| YEAR
| ZONE
//--DEFAULT-NON-RESERVED-END
;
// NOTE: If you add a new token in the list below, you should update the list of keywords

View file

@ -38,7 +38,7 @@ trait SQLKeywordUtils extends SQLHelper {
}
// each element is an array of 4 string: the keyword name, reserve or not in Spark ANSI mode,
// Spark non-ANSI mode, and the SQL standard.
// Spark default mode, and the SQL standard.
val keywordsInDoc: Array[Array[String]] = {
val docPath = {
java.nio.file.Paths.get(sparkHome, "docs", "sql-ref-ansi-compliance.md").toFile
@ -135,6 +135,19 @@ trait SQLKeywordUtils extends SQLHelper {
}
val reservedKeywordsInAnsiMode = allCandidateKeywords -- nonReservedKeywordsInAnsiMode
val nonReservedKeywordsInDefaultMode: Set[String] = {
val kwDef = """\s*[\|:]\s*([A-Z_]+)\s*""".r
parseAntlrGrammars("//--DEFAULT-NON-RESERVED-START", "//--DEFAULT-NON-RESERVED-END") {
// Parses a pattern, e.g., ` | AFTER`
case kwDef(symbol) =>
if (symbolsToExpandIntoDifferentLiterals.contains(symbol)) {
symbolsToExpandIntoDifferentLiterals(symbol)
} else {
symbol :: Nil
}
}
}
}
class SQLKeywordSuite extends SparkFunSuite with SQLKeywordUtils {
@ -146,11 +159,32 @@ class SQLKeywordSuite extends SparkFunSuite with SQLKeywordUtils {
}
}
test("Spark keywords are documented correctly") {
val reservedKeywordsInDoc = keywordsInDoc.filter(_.apply(1) == "reserved").map(_.head).toSet
if (reservedKeywordsInAnsiMode != reservedKeywordsInDoc) {
val misImplemented = (reservedKeywordsInDoc -- reservedKeywordsInAnsiMode).toSeq.sorted
fail("Some keywords are documented as reserved but actually not: " +
test("Spark keywords are documented correctly under ANSI mode") {
// keywords under ANSI mode should either be reserved or non-reserved.
keywordsInDoc.map(_.apply(1)).foreach { desc =>
assert(desc == "reserved" || desc == "non-reserved")
}
val nonReservedInDoc = keywordsInDoc.filter(_.apply(1) == "non-reserved").map(_.head).toSet
if (nonReservedKeywordsInAnsiMode != nonReservedInDoc) {
val misImplemented = ((nonReservedInDoc -- nonReservedKeywordsInAnsiMode) ++
(nonReservedKeywordsInAnsiMode -- nonReservedInDoc)).toSeq.sorted
fail("Some keywords are documented and implemented inconsistently: " +
misImplemented.mkString(", "))
}
}
test("Spark keywords are documented correctly under default mode") {
// keywords under default mode should either be strict-non-reserved or non-reserved.
keywordsInDoc.map(_.apply(2)).foreach { desc =>
assert(desc == "strict-non-reserved" || desc == "non-reserved")
}
val nonReservedInDoc = keywordsInDoc.filter(_.apply(2) == "non-reserved").map(_.head).toSet
if (nonReservedKeywordsInDefaultMode != nonReservedInDoc) {
val misImplemented = ((nonReservedInDoc -- nonReservedKeywordsInDefaultMode) ++
(nonReservedKeywordsInDefaultMode -- nonReservedInDoc)).toSeq.sorted
fail("Some keywords are documented and implemented inconsistently: " +
misImplemented.mkString(", "))
}
}