[SPARK-26905][SQL] Follow the SQL:2016 reserved keywords

### What changes were proposed in this pull request?

This PR intends to move keywords `ANTI`, `SEMI`, and `MINUS` from reserved to non-reserved.

### Why are the changes needed?

To comply with the ANSI/SQL standard.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added tests.

Closes #28807 from maropu/SPARK-26905-2.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
This commit is contained in:
Takeshi Yamamuro 2020-06-16 00:27:45 +09:00
parent eae1747b66
commit 3698a14204
4 changed files with 429 additions and 5 deletions

View file

@ -135,7 +135,7 @@ Below is a list of all the keywords in Spark SQL.
|ALTER|non-reserved|non-reserved|reserved|
|ANALYZE|non-reserved|non-reserved|non-reserved|
|AND|reserved|non-reserved|reserved|
|ANTI|reserved|strict-non-reserved|non-reserved|
|ANTI|non-reserved|strict-non-reserved|non-reserved|
|ANY|reserved|non-reserved|reserved|
|ARCHIVE|non-reserved|non-reserved|non-reserved|
|ARRAY|non-reserved|non-reserved|reserved|
@ -264,7 +264,7 @@ Below is a list of all the keywords in Spark SQL.
|MAP|non-reserved|non-reserved|non-reserved|
|MATCHED|non-reserved|non-reserved|non-reserved|
|MERGE|non-reserved|non-reserved|non-reserved|
|MINUS|reserved|strict-non-reserved|non-reserved|
|MINUS|not-reserved|strict-non-reserved|non-reserved|
|MINUTE|reserved|non-reserved|reserved|
|MONTH|reserved|non-reserved|reserved|
|MSCK|non-reserved|non-reserved|non-reserved|
@ -325,7 +325,7 @@ Below is a list of all the keywords in Spark SQL.
|SCHEMA|non-reserved|non-reserved|non-reserved|
|SECOND|reserved|non-reserved|reserved|
|SELECT|reserved|non-reserved|reserved|
|SEMI|reserved|strict-non-reserved|non-reserved|
|SEMI|non-reserved|strict-non-reserved|non-reserved|
|SEPARATED|non-reserved|non-reserved|non-reserved|
|SERDE|non-reserved|non-reserved|non-reserved|
|SERDEPROPERTIES|non-reserved|non-reserved|non-reserved|

View file

@ -1014,6 +1014,7 @@ ansiNonReserved
| AFTER
| ALTER
| ANALYZE
| ANTI
| ARCHIVE
| ARRAY
| ASC
@ -1146,10 +1147,12 @@ ansiNonReserved
| ROW
| ROWS
| SCHEMA
| SEMI
| SEPARATED
| SERDE
| SERDEPROPERTIES
| SET
| SETMINUS
| SETS
| SHOW
| SKEWED

View file

@ -0,0 +1,401 @@
-- This file comes from: https://github.com/postgres/postgres/tree/master/doc/src/sgml/keywords
ABS
ACOS
ALL
ALLOCATE
ALTER
AND
ANY
ARE
ARRAY
ARRAY_AGG
ARRAY_MAX_CARDINALITY
AS
ASENSITIVE
ASIN
ASYMMETRIC
AT
ATAN
ATOMIC
AUTHORIZATION
AVG
BEGIN
BEGIN_FRAME
BEGIN_PARTITION
BETWEEN
BIGINT
BINARY
BLOB
BOOLEAN
BOTH
BY
CALL
CALLED
CARDINALITY
CASCADED
CASE
CAST
CEIL
CEILING
CHAR
CHAR_LENGTH
CHARACTER
CHARACTER_LENGTH
CHECK
CLASSIFIER
CLOB
CLOSE
COALESCE
COLLATE
COLLECT
COLUMN
COMMIT
CONDITION
CONNECT
CONSTRAINT
CONTAINS
CONVERT
COPY
CORR
CORRESPONDING
COS
COSH
COUNT
COVAR_POP
COVAR_SAMP
CREATE
CROSS
CUBE
CUME_DIST
CURRENT
CURRENT_CATALOG
CURRENT_DATE
CURRENT_DEFAULT_TRANSFORM_GROUP
CURRENT_PATH
CURRENT_ROLE
CURRENT_ROW
CURRENT_SCHEMA
CURRENT_TIME
CURRENT_TIMESTAMP
CURRENT_TRANSFORM_GROUP_FOR_TYPE
CURRENT_USER
CURSOR
CYCLE
DATE
DAY
DEALLOCATE
DEC
DECIMAL
DECFLOAT
DECLARE
DEFAULT
DEFINE
DELETE
DENSE_RANK
DEREF
DESCRIBE
DETERMINISTIC
DISCONNECT
DISTINCT
DOUBLE
DROP
DYNAMIC
EACH
ELEMENT
ELSE
EMPTY
END
END_FRAME
END_PARTITION
END-EXEC
EQUALS
ESCAPE
EVERY
EXCEPT
EXEC
EXECUTE
EXISTS
EXP
EXTERNAL
EXTRACT
FALSE
FETCH
FILTER
FIRST_VALUE
FLOAT
FLOOR
FOR
FOREIGN
FRAME_ROW
FREE
FROM
FULL
FUNCTION
FUSION
GET
GLOBAL
GRANT
GROUP
GROUPING
GROUPS
HAVING
HOLD
HOUR
IDENTITY
IN
INDICATOR
INITIAL
INNER
INOUT
INSENSITIVE
INSERT
INT
INTEGER
INTERSECT
INTERSECTION
INTERVAL
INTO
IS
JOIN
JSON_ARRAY
JSON_ARRAYAGG
JSON_EXISTS
JSON_OBJECT
JSON_OBJECTAGG
JSON_QUERY
JSON_TABLE
JSON_TABLE_PRIMITIVE
JSON_VALUE
LAG
LANGUAGE
LARGE
LAST_VALUE
LATERAL
LEAD
LEADING
LEFT
LIKE
LIKE_REGEX
LISTAGG
LN
LOCAL
LOCALTIME
LOCALTIMESTAMP
LOG
LOG10
LOWER
MATCH
MATCH_NUMBER
MATCH_RECOGNIZE
MATCHES
MAX
MEASURES
MEMBER
MERGE
METHOD
MIN
MINUTE
MOD
MODIFIES
MODULE
MONTH
MULTISET
NATIONAL
NATURAL
NCHAR
NCLOB
NEW
NO
NONE
NORMALIZE
NOT
NTH_VALUE
NTILE
NULL
NULLIF
NUMERIC
OCTET_LENGTH
OCCURRENCES_REGEX
OF
OFFSET
OLD
OMIT
ON
ONE
ONLY
OPEN
OR
ORDER
OUT
OUTER
OVER
OVERLAPS
OVERLAY
PARAMETER
PARTITION
PATTERN
PER
PERCENT
PERCENT_RANK
PERCENTILE_CONT
PERCENTILE_DISC
PERIOD
PERMUTE
PORTION
POSITION
POSITION_REGEX
POWER
PRECEDES
PRECISION
PREPARE
PRIMARY
PROCEDURE
PTF
RANGE
RANK
READS
REAL
RECURSIVE
REF
REFERENCES
REFERENCING
REGR_AVGX
REGR_AVGY
REGR_COUNT
REGR_INTERCEPT
REGR_R2
REGR_SLOPE
REGR_SXX
REGR_SXY
REGR_SYY
RELEASE
RESULT
RETURN
RETURNS
REVOKE
RIGHT
ROLLBACK
ROLLUP
ROW
ROW_NUMBER
ROWS
RUNNING
SAVEPOINT
SCOPE
SCROLL
SEARCH
SECOND
SEEK
SELECT
SENSITIVE
SESSION_USER
SET
SHOW
SIMILAR
SIN
SINH
SKIP
SMALLINT
SOME
SPECIFIC
SPECIFICTYPE
SQL
SQLEXCEPTION
SQLSTATE
SQLWARNING
SQRT
START
STATIC
STDDEV_POP
STDDEV_SAMP
SUBMULTISET
SUBSET
SUBSTRING
SUBSTRING_REGEX
SUCCEEDS
SUM
SYMMETRIC
SYSTEM
SYSTEM_TIME
SYSTEM_USER
TABLE
TABLESAMPLE
TAN
TANH
THEN
TIME
TIMESTAMP
TIMEZONE_HOUR
TIMEZONE_MINUTE
TO
TRAILING
TRANSLATE
TRANSLATE_REGEX
TRANSLATION
TREAT
TRIGGER
TRIM
TRIM_ARRAY
TRUE
TRUNCATE
UESCAPE
UNION
UNIQUE
UNKNOWN
UNMATCHED
UNNEST
UPDATE
UPPER
USER
USING
VALUE
VALUES
VALUE_OF
VAR_POP
VAR_SAMP
VARBINARY
VARCHAR
VARYING
VERSIONING
WHEN
WHENEVER
WHERE
WIDTH_BUCKET
WINDOW
WITH
WITHIN
WITHOUT
YEAR
DATALINK
DLNEWCOPY
DLPREVIOUSCOPY
DLURLCOMPLETE
DLURLCOMPLETEWRITE
DLURLCOMPLETEONLY
DLURLPATH
DLURLPATHWRITE
DLURLPATHONLY
DLURLSCHEME
DLURLSERVER
DLVALUE
IMPORT
XML
XMLAGG
XMLATTRIBUTES
XMLBINARY
XMLCAST
XMLCOMMENT
XMLCONCAT
XMLDOCUMENT
XMLELEMENT
XMLEXISTS
XMLFOREST
XMLITERATE
XMLNAMESPACES
XMLPARSE
XMLPI
XMLQUERY
XMLSERIALIZE
XMLTABLE
XMLTEXT
XMLVALIDATE

View file

@ -16,8 +16,11 @@
*/
package org.apache.spark.sql.catalyst.parser
import java.io.File
import java.nio.file.Files
import java.util.Locale
import scala.collection.JavaConverters._
import scala.collection.mutable
import org.apache.spark.SparkFunSuite
@ -340,7 +343,12 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
// The case where a symbol has multiple literal definitions,
// e.g., `DATABASES: 'DATABASES' | 'SCHEMAS';`.
if (hasMultipleLiterals) {
val literals = splitDefs.map(_.replaceAll("'", "").trim).toSeq
// Filters out inappropriate entries, e.g., `!` in `NOT: 'NOT' | '!';`
val litDef = """([A-Z_]+)""".r
val literals = splitDefs.map(_.replaceAll("'", "").trim).toSeq.flatMap {
case litDef(lit) => Some(lit)
case _ => None
}
(symbol, literals) :: Nil
} else {
val literal = literalDef.replaceAll("'", "").trim
@ -388,12 +396,24 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
val reservedKeywordsInAnsiMode = allCandidateKeywords -- nonReservedKeywordsInAnsiMode
test("check # of reserved keywords") {
val numReservedKeywords = 78
val numReservedKeywords = 74
assert(reservedKeywordsInAnsiMode.size == numReservedKeywords,
s"The expected number of reserved keywords is $numReservedKeywords, but " +
s"${reservedKeywordsInAnsiMode.size} found.")
}
test("reserved keywords in Spark are also reserved in SQL 2016") {
withTempDir { dir =>
val tmpFile = new File(dir, "tmp")
val is = Thread.currentThread().getContextClassLoader
.getResourceAsStream("ansi-sql-2016-reserved-keywords.txt")
Files.copy(is, tmpFile.toPath)
val reservedKeywordsInSql2016 = Files.readAllLines(tmpFile.toPath)
.asScala.filterNot(_.startsWith("--")).map(_.trim).toSet
assert((reservedKeywordsInAnsiMode -- reservedKeywordsInSql2016).isEmpty)
}
}
test("table identifier") {
// Regular names.
assert(TableIdentifier("q") === parseTableIdentifier("q"))