[SPARK-15789][SQL] Allow reserved keywords in most places

## What changes were proposed in this pull request?
The parser currently does not allow the use of some SQL keywords as table or field names. This PR adds supports for all keywords as identifier. The exception to this are table aliases, in this case most keywords are allowed except for join keywords (```anti, full, inner, left, semi, right, natural, on, join, cross```) and set-operator keywords (```union, intersect, except```).

## How was this patch tested?
I have added/move/renamed test in the catalyst `*ParserSuite`s.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #13534 from hvanhovell/SPARK-15789.
This commit is contained in:
Herman van Hovell 2016-06-07 17:01:11 -07:00 committed by Wenchen Fan
parent 0cfd6192f3
commit 91fbc880b6
6 changed files with 35 additions and 28 deletions

View file

@ -109,9 +109,9 @@ statement
| SHOW FUNCTIONS (LIKE? (qualifiedName | pattern=STRING))? #showFunctions
| SHOW CREATE TABLE tableIdentifier #showCreateTable
| (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction
| (DESC | DESCRIBE) DATABASE EXTENDED? identifier #describeDatabase
| (DESC | DESCRIBE) option=(EXTENDED | FORMATTED)?
tableIdentifier partitionSpec? describeColName? #describeTable
| (DESC | DESCRIBE) DATABASE EXTENDED? identifier #describeDatabase
| REFRESH TABLE tableIdentifier #refreshTable
| CACHE LAZY? TABLE identifier (AS? query)? #cacheTable
| UNCACHE TABLE identifier #uncacheTable
@ -251,7 +251,7 @@ tableProperty
;
tablePropertyKey
: looseIdentifier ('.' looseIdentifier)*
: identifier ('.' identifier)*
| STRING
;
@ -419,9 +419,9 @@ identifierComment
;
relationPrimary
: tableIdentifier sample? (AS? identifier)? #tableName
| '(' queryNoWith ')' sample? (AS? identifier)? #aliasedQuery
| '(' relation ')' sample? (AS? identifier)? #aliasedRelation
: tableIdentifier sample? (AS? strictIdentifier)? #tableName
| '(' queryNoWith ')' sample? (AS? strictIdentifier)? #aliasedQuery
| '(' relation ')' sample? (AS? strictIdentifier)? #aliasedRelation
| inlineTable #inlineTableDefault2
;
@ -456,8 +456,8 @@ expression
;
booleanExpression
: predicated #booleanDefault
| NOT booleanExpression #logicalNot
: NOT booleanExpression #logicalNot
| predicated #booleanDefault
| left=booleanExpression operator=AND right=booleanExpression #logicalBinary
| left=booleanExpression operator=OR right=booleanExpression #logicalBinary
| EXISTS '(' query ')' #exists
@ -597,16 +597,13 @@ qualifiedName
: identifier ('.' identifier)*
;
// Identifier that also allows the use of a number of SQL keywords (mainly for backwards compatibility).
looseIdentifier
: identifier
| FROM
| TO
| TABLE
| WITH
identifier
: strictIdentifier
| ANTI | FULL | INNER | LEFT | SEMI | RIGHT | NATURAL | JOIN | CROSS | ON
| UNION | INTERSECT | EXCEPT
;
identifier
strictIdentifier
: IDENTIFIER #unquotedIdentifier
| quotedIdentifier #quotedIdentifierAlternative
| nonReserved #unquotedIdentifier
@ -652,6 +649,9 @@ nonReserved
| AT | NULLS | OVERWRITE | ALL | ALTER | AS | BETWEEN | BY | CREATE | DELETE
| DESCRIBE | DROP | EXISTS | FALSE | FOR | GROUP | IN | INSERT | INTO | IS |LIKE
| NULL | ORDER | OUTER | TABLE | TRUE | WITH | RLIKE
| AND | CASE | CAST | DISTINCT | DIV | ELSE | END | FUNCTION | INTERVAL | MACRO | OR | STRATIFY | THEN
| UNBOUNDED | WHEN
| DATABASE | SELECT | FROM | WHERE | HAVING | TO | TABLE | WITH | NOT
;
SELECT: 'SELECT';

View file

@ -642,7 +642,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
override def visitTableName(ctx: TableNameContext): LogicalPlan = withOrigin(ctx) {
val table = UnresolvedRelation(
visitTableIdentifier(ctx.tableIdentifier),
Option(ctx.identifier).map(_.getText))
Option(ctx.strictIdentifier).map(_.getText))
table.optionalMap(ctx.sample)(withSample)
}
@ -692,7 +692,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
* hooks.
*/
override def visitAliasedRelation(ctx: AliasedRelationContext): LogicalPlan = withOrigin(ctx) {
plan(ctx.relation).optionalMap(ctx.sample)(withSample).optionalMap(ctx.identifier)(aliasPlan)
plan(ctx.relation)
.optionalMap(ctx.sample)(withSample)
.optionalMap(ctx.strictIdentifier)(aliasPlan)
}
/**
@ -701,13 +703,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
* hooks.
*/
override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) {
plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample).optionalMap(ctx.identifier)(aliasPlan)
plan(ctx.queryNoWith)
.optionalMap(ctx.sample)(withSample)
.optionalMap(ctx.strictIdentifier)(aliasPlan)
}
/**
* Create an alias (SubqueryAlias) for a LogicalPlan.
*/
private def aliasPlan(alias: IdentifierContext, plan: LogicalPlan): LogicalPlan = {
private def aliasPlan(alias: ParserRuleContext, plan: LogicalPlan): LogicalPlan = {
SubqueryAlias(alias.getText, plan)
}

View file

@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types._
class CatalystQlDataTypeParserSuite extends SparkFunSuite {
class DataTypeParserSuite extends SparkFunSuite {
def parse(sql: String): DataType = CatalystSqlParser.parseDataType(sql)
@ -133,4 +133,8 @@ class CatalystQlDataTypeParserSuite extends SparkFunSuite {
checkDataType(
"struct<`x``y` int>",
(new StructType).add("x`y", IntegerType))
// Use SQL keywords.
checkDataType("struct<end: long, select: int, from: string>",
(new StructType).add("end", LongType).add("select", IntegerType).add("from", StringType))
}

View file

@ -39,8 +39,6 @@ class ErrorParserSuite extends SparkFunSuite {
}
test("no viable input") {
intercept("select from tbl", 1, 7, "no viable alternative at input", "-------^^^")
intercept("select\nfrom tbl", 2, 0, "no viable alternative at input", "^^^")
intercept("select ((r + 1) ", 1, 16, "no viable alternative at input", "----------------^^^")
}

View file

@ -107,6 +107,7 @@ class PlanParserSuite extends PlanTest {
table("db", "c").select('a, 'b).where('x < 1))
assertEqual("select distinct a, b from db.c", Distinct(table("db", "c").select('a, 'b)))
assertEqual("select all a, b from db.c", table("db", "c").select('a, 'b))
assertEqual("select from tbl", OneRowRelation.select('from.as("tbl")))
}
test("reverse select query") {

View file

@ -53,8 +53,9 @@ class TableIdentifierParserSuite extends SparkFunSuite {
"bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
"int", "smallint", "timestamp", "at")
val hiveNonReservedRegression = Seq("left", "right", "left", "right", "full", "inner", "semi",
"union", "except", "intersect", "schema", "database")
val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right",
"natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from",
"where", "having", "from", "to", "table", "with", "not")
test("table identifier") {
// Regular names.
@ -67,11 +68,10 @@ class TableIdentifierParserSuite extends SparkFunSuite {
}
}
test("table identifier - keywords") {
test("table identifier - strict keywords") {
// SQL Keywords.
val keywords = Seq("select", "from", "where") ++ hiveNonReservedRegression
keywords.foreach { keyword =>
intercept[ParseException](parseTableIdentifier(keyword))
hiveStrictNonReservedKeyword.foreach { keyword =>
assert(TableIdentifier(keyword) === parseTableIdentifier(keyword))
assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
}