[SPARK-26905][SQL] Follow the SQL:2016 reserved keywords
### What changes were proposed in this pull request? This PR intends to move keywords `ANTI`, `SEMI`, and `MINUS` from reserved to non-reserved. ### Why are the changes needed? To comply with the ANSI/SQL standard. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added tests. Closes #28807 from maropu/SPARK-26905-2. Authored-by: Takeshi Yamamuro <yamamuro@apache.org> Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
This commit is contained in:
parent
eae1747b66
commit
3698a14204
|
@ -135,7 +135,7 @@ Below is a list of all the keywords in Spark SQL.
|
|||
|ALTER|non-reserved|non-reserved|reserved|
|
||||
|ANALYZE|non-reserved|non-reserved|non-reserved|
|
||||
|AND|reserved|non-reserved|reserved|
|
||||
|ANTI|reserved|strict-non-reserved|non-reserved|
|
||||
|ANTI|non-reserved|strict-non-reserved|non-reserved|
|
||||
|ANY|reserved|non-reserved|reserved|
|
||||
|ARCHIVE|non-reserved|non-reserved|non-reserved|
|
||||
|ARRAY|non-reserved|non-reserved|reserved|
|
||||
|
@ -264,7 +264,7 @@ Below is a list of all the keywords in Spark SQL.
|
|||
|MAP|non-reserved|non-reserved|non-reserved|
|
||||
|MATCHED|non-reserved|non-reserved|non-reserved|
|
||||
|MERGE|non-reserved|non-reserved|non-reserved|
|
||||
|MINUS|reserved|strict-non-reserved|non-reserved|
|
||||
|MINUS|not-reserved|strict-non-reserved|non-reserved|
|
||||
|MINUTE|reserved|non-reserved|reserved|
|
||||
|MONTH|reserved|non-reserved|reserved|
|
||||
|MSCK|non-reserved|non-reserved|non-reserved|
|
||||
|
@ -325,7 +325,7 @@ Below is a list of all the keywords in Spark SQL.
|
|||
|SCHEMA|non-reserved|non-reserved|non-reserved|
|
||||
|SECOND|reserved|non-reserved|reserved|
|
||||
|SELECT|reserved|non-reserved|reserved|
|
||||
|SEMI|reserved|strict-non-reserved|non-reserved|
|
||||
|SEMI|non-reserved|strict-non-reserved|non-reserved|
|
||||
|SEPARATED|non-reserved|non-reserved|non-reserved|
|
||||
|SERDE|non-reserved|non-reserved|non-reserved|
|
||||
|SERDEPROPERTIES|non-reserved|non-reserved|non-reserved|
|
||||
|
|
|
@ -1014,6 +1014,7 @@ ansiNonReserved
|
|||
| AFTER
|
||||
| ALTER
|
||||
| ANALYZE
|
||||
| ANTI
|
||||
| ARCHIVE
|
||||
| ARRAY
|
||||
| ASC
|
||||
|
@ -1146,10 +1147,12 @@ ansiNonReserved
|
|||
| ROW
|
||||
| ROWS
|
||||
| SCHEMA
|
||||
| SEMI
|
||||
| SEPARATED
|
||||
| SERDE
|
||||
| SERDEPROPERTIES
|
||||
| SET
|
||||
| SETMINUS
|
||||
| SETS
|
||||
| SHOW
|
||||
| SKEWED
|
||||
|
|
|
@ -0,0 +1,401 @@
|
|||
-- This file comes from: https://github.com/postgres/postgres/tree/master/doc/src/sgml/keywords
|
||||
ABS
|
||||
ACOS
|
||||
ALL
|
||||
ALLOCATE
|
||||
ALTER
|
||||
AND
|
||||
ANY
|
||||
ARE
|
||||
ARRAY
|
||||
ARRAY_AGG
|
||||
ARRAY_MAX_CARDINALITY
|
||||
AS
|
||||
ASENSITIVE
|
||||
ASIN
|
||||
ASYMMETRIC
|
||||
AT
|
||||
ATAN
|
||||
ATOMIC
|
||||
AUTHORIZATION
|
||||
AVG
|
||||
BEGIN
|
||||
BEGIN_FRAME
|
||||
BEGIN_PARTITION
|
||||
BETWEEN
|
||||
BIGINT
|
||||
BINARY
|
||||
BLOB
|
||||
BOOLEAN
|
||||
BOTH
|
||||
BY
|
||||
CALL
|
||||
CALLED
|
||||
CARDINALITY
|
||||
CASCADED
|
||||
CASE
|
||||
CAST
|
||||
CEIL
|
||||
CEILING
|
||||
CHAR
|
||||
CHAR_LENGTH
|
||||
CHARACTER
|
||||
CHARACTER_LENGTH
|
||||
CHECK
|
||||
CLASSIFIER
|
||||
CLOB
|
||||
CLOSE
|
||||
COALESCE
|
||||
COLLATE
|
||||
COLLECT
|
||||
COLUMN
|
||||
COMMIT
|
||||
CONDITION
|
||||
CONNECT
|
||||
CONSTRAINT
|
||||
CONTAINS
|
||||
CONVERT
|
||||
COPY
|
||||
CORR
|
||||
CORRESPONDING
|
||||
COS
|
||||
COSH
|
||||
COUNT
|
||||
COVAR_POP
|
||||
COVAR_SAMP
|
||||
CREATE
|
||||
CROSS
|
||||
CUBE
|
||||
CUME_DIST
|
||||
CURRENT
|
||||
CURRENT_CATALOG
|
||||
CURRENT_DATE
|
||||
CURRENT_DEFAULT_TRANSFORM_GROUP
|
||||
CURRENT_PATH
|
||||
CURRENT_ROLE
|
||||
CURRENT_ROW
|
||||
CURRENT_SCHEMA
|
||||
CURRENT_TIME
|
||||
CURRENT_TIMESTAMP
|
||||
CURRENT_TRANSFORM_GROUP_FOR_TYPE
|
||||
CURRENT_USER
|
||||
CURSOR
|
||||
CYCLE
|
||||
DATE
|
||||
DAY
|
||||
DEALLOCATE
|
||||
DEC
|
||||
DECIMAL
|
||||
DECFLOAT
|
||||
DECLARE
|
||||
DEFAULT
|
||||
DEFINE
|
||||
DELETE
|
||||
DENSE_RANK
|
||||
DEREF
|
||||
DESCRIBE
|
||||
DETERMINISTIC
|
||||
DISCONNECT
|
||||
DISTINCT
|
||||
DOUBLE
|
||||
DROP
|
||||
DYNAMIC
|
||||
EACH
|
||||
ELEMENT
|
||||
ELSE
|
||||
EMPTY
|
||||
END
|
||||
END_FRAME
|
||||
END_PARTITION
|
||||
END-EXEC
|
||||
EQUALS
|
||||
ESCAPE
|
||||
EVERY
|
||||
EXCEPT
|
||||
EXEC
|
||||
EXECUTE
|
||||
EXISTS
|
||||
EXP
|
||||
EXTERNAL
|
||||
EXTRACT
|
||||
FALSE
|
||||
FETCH
|
||||
FILTER
|
||||
FIRST_VALUE
|
||||
FLOAT
|
||||
FLOOR
|
||||
FOR
|
||||
FOREIGN
|
||||
FRAME_ROW
|
||||
FREE
|
||||
FROM
|
||||
FULL
|
||||
FUNCTION
|
||||
FUSION
|
||||
GET
|
||||
GLOBAL
|
||||
GRANT
|
||||
GROUP
|
||||
GROUPING
|
||||
GROUPS
|
||||
HAVING
|
||||
HOLD
|
||||
HOUR
|
||||
IDENTITY
|
||||
IN
|
||||
INDICATOR
|
||||
INITIAL
|
||||
INNER
|
||||
INOUT
|
||||
INSENSITIVE
|
||||
INSERT
|
||||
INT
|
||||
INTEGER
|
||||
INTERSECT
|
||||
INTERSECTION
|
||||
INTERVAL
|
||||
INTO
|
||||
IS
|
||||
JOIN
|
||||
JSON_ARRAY
|
||||
JSON_ARRAYAGG
|
||||
JSON_EXISTS
|
||||
JSON_OBJECT
|
||||
JSON_OBJECTAGG
|
||||
JSON_QUERY
|
||||
JSON_TABLE
|
||||
JSON_TABLE_PRIMITIVE
|
||||
JSON_VALUE
|
||||
LAG
|
||||
LANGUAGE
|
||||
LARGE
|
||||
LAST_VALUE
|
||||
LATERAL
|
||||
LEAD
|
||||
LEADING
|
||||
LEFT
|
||||
LIKE
|
||||
LIKE_REGEX
|
||||
LISTAGG
|
||||
LN
|
||||
LOCAL
|
||||
LOCALTIME
|
||||
LOCALTIMESTAMP
|
||||
LOG
|
||||
LOG10
|
||||
LOWER
|
||||
MATCH
|
||||
MATCH_NUMBER
|
||||
MATCH_RECOGNIZE
|
||||
MATCHES
|
||||
MAX
|
||||
MEASURES
|
||||
MEMBER
|
||||
MERGE
|
||||
METHOD
|
||||
MIN
|
||||
MINUTE
|
||||
MOD
|
||||
MODIFIES
|
||||
MODULE
|
||||
MONTH
|
||||
MULTISET
|
||||
NATIONAL
|
||||
NATURAL
|
||||
NCHAR
|
||||
NCLOB
|
||||
NEW
|
||||
NO
|
||||
NONE
|
||||
NORMALIZE
|
||||
NOT
|
||||
NTH_VALUE
|
||||
NTILE
|
||||
NULL
|
||||
NULLIF
|
||||
NUMERIC
|
||||
OCTET_LENGTH
|
||||
OCCURRENCES_REGEX
|
||||
OF
|
||||
OFFSET
|
||||
OLD
|
||||
OMIT
|
||||
ON
|
||||
ONE
|
||||
ONLY
|
||||
OPEN
|
||||
OR
|
||||
ORDER
|
||||
OUT
|
||||
OUTER
|
||||
OVER
|
||||
OVERLAPS
|
||||
OVERLAY
|
||||
PARAMETER
|
||||
PARTITION
|
||||
PATTERN
|
||||
PER
|
||||
PERCENT
|
||||
PERCENT_RANK
|
||||
PERCENTILE_CONT
|
||||
PERCENTILE_DISC
|
||||
PERIOD
|
||||
PERMUTE
|
||||
PORTION
|
||||
POSITION
|
||||
POSITION_REGEX
|
||||
POWER
|
||||
PRECEDES
|
||||
PRECISION
|
||||
PREPARE
|
||||
PRIMARY
|
||||
PROCEDURE
|
||||
PTF
|
||||
RANGE
|
||||
RANK
|
||||
READS
|
||||
REAL
|
||||
RECURSIVE
|
||||
REF
|
||||
REFERENCES
|
||||
REFERENCING
|
||||
REGR_AVGX
|
||||
REGR_AVGY
|
||||
REGR_COUNT
|
||||
REGR_INTERCEPT
|
||||
REGR_R2
|
||||
REGR_SLOPE
|
||||
REGR_SXX
|
||||
REGR_SXY
|
||||
REGR_SYY
|
||||
RELEASE
|
||||
RESULT
|
||||
RETURN
|
||||
RETURNS
|
||||
REVOKE
|
||||
RIGHT
|
||||
ROLLBACK
|
||||
ROLLUP
|
||||
ROW
|
||||
ROW_NUMBER
|
||||
ROWS
|
||||
RUNNING
|
||||
SAVEPOINT
|
||||
SCOPE
|
||||
SCROLL
|
||||
SEARCH
|
||||
SECOND
|
||||
SEEK
|
||||
SELECT
|
||||
SENSITIVE
|
||||
SESSION_USER
|
||||
SET
|
||||
SHOW
|
||||
SIMILAR
|
||||
SIN
|
||||
SINH
|
||||
SKIP
|
||||
SMALLINT
|
||||
SOME
|
||||
SPECIFIC
|
||||
SPECIFICTYPE
|
||||
SQL
|
||||
SQLEXCEPTION
|
||||
SQLSTATE
|
||||
SQLWARNING
|
||||
SQRT
|
||||
START
|
||||
STATIC
|
||||
STDDEV_POP
|
||||
STDDEV_SAMP
|
||||
SUBMULTISET
|
||||
SUBSET
|
||||
SUBSTRING
|
||||
SUBSTRING_REGEX
|
||||
SUCCEEDS
|
||||
SUM
|
||||
SYMMETRIC
|
||||
SYSTEM
|
||||
SYSTEM_TIME
|
||||
SYSTEM_USER
|
||||
TABLE
|
||||
TABLESAMPLE
|
||||
TAN
|
||||
TANH
|
||||
THEN
|
||||
TIME
|
||||
TIMESTAMP
|
||||
TIMEZONE_HOUR
|
||||
TIMEZONE_MINUTE
|
||||
TO
|
||||
TRAILING
|
||||
TRANSLATE
|
||||
TRANSLATE_REGEX
|
||||
TRANSLATION
|
||||
TREAT
|
||||
TRIGGER
|
||||
TRIM
|
||||
TRIM_ARRAY
|
||||
TRUE
|
||||
TRUNCATE
|
||||
UESCAPE
|
||||
UNION
|
||||
UNIQUE
|
||||
UNKNOWN
|
||||
UNMATCHED
|
||||
UNNEST
|
||||
UPDATE
|
||||
UPPER
|
||||
USER
|
||||
USING
|
||||
VALUE
|
||||
VALUES
|
||||
VALUE_OF
|
||||
VAR_POP
|
||||
VAR_SAMP
|
||||
VARBINARY
|
||||
VARCHAR
|
||||
VARYING
|
||||
VERSIONING
|
||||
WHEN
|
||||
WHENEVER
|
||||
WHERE
|
||||
WIDTH_BUCKET
|
||||
WINDOW
|
||||
WITH
|
||||
WITHIN
|
||||
WITHOUT
|
||||
YEAR
|
||||
DATALINK
|
||||
DLNEWCOPY
|
||||
DLPREVIOUSCOPY
|
||||
DLURLCOMPLETE
|
||||
DLURLCOMPLETEWRITE
|
||||
DLURLCOMPLETEONLY
|
||||
DLURLPATH
|
||||
DLURLPATHWRITE
|
||||
DLURLPATHONLY
|
||||
DLURLSCHEME
|
||||
DLURLSERVER
|
||||
DLVALUE
|
||||
IMPORT
|
||||
XML
|
||||
XMLAGG
|
||||
XMLATTRIBUTES
|
||||
XMLBINARY
|
||||
XMLCAST
|
||||
XMLCOMMENT
|
||||
XMLCONCAT
|
||||
XMLDOCUMENT
|
||||
XMLELEMENT
|
||||
XMLEXISTS
|
||||
XMLFOREST
|
||||
XMLITERATE
|
||||
XMLNAMESPACES
|
||||
XMLPARSE
|
||||
XMLPI
|
||||
XMLQUERY
|
||||
XMLSERIALIZE
|
||||
XMLTABLE
|
||||
XMLTEXT
|
||||
XMLVALIDATE
|
|
@ -16,8 +16,11 @@
|
|||
*/
|
||||
package org.apache.spark.sql.catalyst.parser
|
||||
|
||||
import java.io.File
|
||||
import java.nio.file.Files
|
||||
import java.util.Locale
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.collection.mutable
|
||||
|
||||
import org.apache.spark.SparkFunSuite
|
||||
|
@ -340,7 +343,12 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
|
|||
// The case where a symbol has multiple literal definitions,
|
||||
// e.g., `DATABASES: 'DATABASES' | 'SCHEMAS';`.
|
||||
if (hasMultipleLiterals) {
|
||||
val literals = splitDefs.map(_.replaceAll("'", "").trim).toSeq
|
||||
// Filters out inappropriate entries, e.g., `!` in `NOT: 'NOT' | '!';`
|
||||
val litDef = """([A-Z_]+)""".r
|
||||
val literals = splitDefs.map(_.replaceAll("'", "").trim).toSeq.flatMap {
|
||||
case litDef(lit) => Some(lit)
|
||||
case _ => None
|
||||
}
|
||||
(symbol, literals) :: Nil
|
||||
} else {
|
||||
val literal = literalDef.replaceAll("'", "").trim
|
||||
|
@ -388,12 +396,24 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
|
|||
val reservedKeywordsInAnsiMode = allCandidateKeywords -- nonReservedKeywordsInAnsiMode
|
||||
|
||||
test("check # of reserved keywords") {
|
||||
val numReservedKeywords = 78
|
||||
val numReservedKeywords = 74
|
||||
assert(reservedKeywordsInAnsiMode.size == numReservedKeywords,
|
||||
s"The expected number of reserved keywords is $numReservedKeywords, but " +
|
||||
s"${reservedKeywordsInAnsiMode.size} found.")
|
||||
}
|
||||
|
||||
test("reserved keywords in Spark are also reserved in SQL 2016") {
|
||||
withTempDir { dir =>
|
||||
val tmpFile = new File(dir, "tmp")
|
||||
val is = Thread.currentThread().getContextClassLoader
|
||||
.getResourceAsStream("ansi-sql-2016-reserved-keywords.txt")
|
||||
Files.copy(is, tmpFile.toPath)
|
||||
val reservedKeywordsInSql2016 = Files.readAllLines(tmpFile.toPath)
|
||||
.asScala.filterNot(_.startsWith("--")).map(_.trim).toSet
|
||||
assert((reservedKeywordsInAnsiMode -- reservedKeywordsInSql2016).isEmpty)
|
||||
}
|
||||
}
|
||||
|
||||
test("table identifier") {
|
||||
// Regular names.
|
||||
assert(TableIdentifier("q") === parseTableIdentifier("q"))
|
||||
|
|
Loading…
Reference in a new issue