[SPARK-27809][SQL] Make optional clauses order insensitive for CREATE DATABASE/VIEW SQL statement

## What changes were proposed in this pull request?

Each time, when I write a complex CREATE DATABASE/VIEW statements, I have to open the .g4 file to find the EXACT order of clauses in CREATE TABLE statement. When the order is not right, I will get A strange confusing error message generated from ANTLR4.

The original g4 grammar for CREATE VIEW is
```
CREATE [OR REPLACE] [[GLOBAL] TEMPORARY] VIEW [db_name.]view_name
  [(col_name1 [COMMENT col_comment1], ...)]
  [COMMENT table_comment]
  [TBLPROPERTIES (key1=val1, key2=val2, ...)]
AS select_statement
```
The proposal is to make the following clauses order insensitive.
```
  [COMMENT table_comment]
  [TBLPROPERTIES (key1=val1, key2=val2, ...)]
```
–
The original g4 grammar for CREATE DATABASE is
```
CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] db_name
  [COMMENT comment_text]
  [LOCATION path]
  [WITH DBPROPERTIES (key1=val1, key2=val2, ...)]
```
The proposal is to make the following clauses order insensitive.
```
  [COMMENT comment_text]
  [LOCATION path]
  [WITH DBPROPERTIES (key1=val1, key2=val2, ...)]
```
## How was this patch tested?

By adding new unit tests to test duplicate clauses and modifying some existing unit tests to test whether those clauses are actually order insensitive

Closes #24681 from yeshengm/create-view-parser.

Authored-by: Yesheng Ma <kimi.ysma@gmail.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
This commit is contained in:
Yesheng Ma 2019-05-24 15:19:14 -07:00 committed by gatorsmile
parent a12de29c1a
commit 5e3520f7f4
4 changed files with 97 additions and 43 deletions

View file

@ -84,8 +84,9 @@ statement
| ctes? dmlStatementNoWith #dmlStatement
| USE db=identifier #use
| CREATE database (IF NOT EXISTS)? identifier
(COMMENT comment=STRING)? locationSpec?
(WITH DBPROPERTIES tablePropertyList)? #createDatabase
((COMMENT comment=STRING) |
locationSpec |
(WITH DBPROPERTIES tablePropertyList))* #createDatabase
| ALTER database identifier SET DBPROPERTIES tablePropertyList #setDatabaseProperties
| DROP database (IF EXISTS)? identifier (RESTRICT | CASCADE)? #dropDatabase
| SHOW DATABASES (LIKE? pattern=STRING)? #showDatabases
@ -142,9 +143,11 @@ statement
| DROP VIEW (IF EXISTS)? tableIdentifier #dropTable
| CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)?
VIEW (IF NOT EXISTS)? tableIdentifier
identifierCommentList? (COMMENT STRING)?
(PARTITIONED ON identifierList)?
(TBLPROPERTIES tablePropertyList)? AS query #createView
identifierCommentList?
((COMMENT STRING) |
(PARTITIONED ON identifierList) |
(TBLPROPERTIES tablePropertyList))*
AS query #createView
| CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW
tableIdentifier ('(' colTypeList ')')? tableProvider
(OPTIONS tablePropertyList)? #createTempViewUsing

View file

@ -17,6 +17,7 @@
package org.apache.spark.sql.catalyst.parser
import org.antlr.v4.runtime.{CharStreams, CommonTokenStream, ParserRuleContext}
import scala.collection.JavaConverters._
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
@ -152,14 +153,14 @@ class ParserUtilsSuite extends SparkFunSuite {
assert(string(showDbsContext.pattern) == "identifier_with_wildcards")
assert(string(createDbContext.comment) == "database_comment")
assert(string(createDbContext.locationSpec.STRING) == "/home/user/db")
assert(string(createDbContext.locationSpec.asScala.head.STRING) == "/home/user/db")
}
test("position") {
assert(position(setConfContext.start) == Origin(Some(1), Some(0)))
assert(position(showFuncContext.stop) == Origin(Some(1), Some(19)))
assert(position(descFuncContext.describeFuncName.start) == Origin(Some(1), Some(27)))
assert(position(createDbContext.locationSpec.start) == Origin(Some(3), Some(27)))
assert(position(createDbContext.locationSpec.asScala.head.start) == Origin(Some(3), Some(27)))
assert(position(emptyContext.stop) == Origin(None, None))
}
@ -177,7 +178,7 @@ class ParserUtilsSuite extends SparkFunSuite {
}
test("withOrigin") {
val ctx = createDbContext.locationSpec
val ctx = createDbContext.locationSpec.asScala.head
val current = CurrentOrigin.get
val (location, origin) = withOrigin(ctx) {
(string(ctx.STRING), CurrentOrigin.get)

View file

@ -495,17 +495,26 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
*
* For example:
* {{{
* CREATE DATABASE [IF NOT EXISTS] database_name [COMMENT database_comment]
* [LOCATION path] [WITH DBPROPERTIES (key1=val1, key2=val2, ...)]
* CREATE DATABASE [IF NOT EXISTS] database_name
* create_database_clauses;
*
* create_database_clauses (order insensitive):
* [COMMENT database_comment]
* [LOCATION path]
* [WITH DBPROPERTIES (key1=val1, key2=val2, ...)]
* }}}
*/
override def visitCreateDatabase(ctx: CreateDatabaseContext): LogicalPlan = withOrigin(ctx) {
checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
checkDuplicateClauses(ctx.DBPROPERTIES, "WITH DBPROPERTIES", ctx)
CreateDatabaseCommand(
ctx.identifier.getText,
ctx.EXISTS != null,
Option(ctx.locationSpec).map(visitLocationSpec),
ctx.locationSpec.asScala.headOption.map(visitLocationSpec),
Option(ctx.comment).map(string),
Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty))
ctx.tablePropertyList.asScala.headOption.map(visitPropertyKeyValues).getOrElse(Map.empty))
}
/**
@ -1260,40 +1269,49 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
* {{{
* CREATE [OR REPLACE] [[GLOBAL] TEMPORARY] VIEW [IF NOT EXISTS] [db_name.]view_name
* [(column_name [COMMENT column_comment], ...) ]
* [COMMENT view_comment]
* [TBLPROPERTIES (property_name = property_value, ...)]
* create_view_clauses
*
* AS SELECT ...;
*
* create_view_clauses (order insensitive):
* [COMMENT view_comment]
* [TBLPROPERTIES (property_name = property_value, ...)]
* }}}
*/
override def visitCreateView(ctx: CreateViewContext): LogicalPlan = withOrigin(ctx) {
if (ctx.identifierList != null) {
if (!ctx.identifierList.isEmpty) {
operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx)
} else {
val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
icl.identifierComment.asScala.map { ic =>
ic.identifier.getText -> Option(ic.STRING).map(string)
}
}
val viewType = if (ctx.TEMPORARY == null) {
PersistedView
} else if (ctx.GLOBAL != null) {
GlobalTempView
} else {
LocalTempView
}
CreateViewCommand(
name = visitTableIdentifier(ctx.tableIdentifier),
userSpecifiedColumns = userSpecifiedColumns,
comment = Option(ctx.STRING).map(string),
properties = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty),
originalText = Option(source(ctx.query)),
child = plan(ctx.query),
allowExisting = ctx.EXISTS != null,
replace = ctx.REPLACE != null,
viewType = viewType)
}
checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED ON", ctx)
checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
icl.identifierComment.asScala.map { ic =>
ic.identifier.getText -> Option(ic.STRING).map(string)
}
}
val viewType = if (ctx.TEMPORARY == null) {
PersistedView
} else if (ctx.GLOBAL != null) {
GlobalTempView
} else {
LocalTempView
}
CreateViewCommand(
name = visitTableIdentifier(ctx.tableIdentifier),
userSpecifiedColumns = userSpecifiedColumns,
comment = ctx.STRING.asScala.headOption.map(string),
properties = ctx.tablePropertyList.asScala.headOption.map(visitPropertyKeyValues)
.getOrElse(Map.empty),
originalText = Option(source(ctx.query)),
child = plan(ctx.query),
allowExisting = ctx.EXISTS != null,
replace = ctx.REPLACE != null,
viewType = viewType)
}
/**

View file

@ -40,7 +40,6 @@ import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
class DDLParserSuite extends PlanTest with SharedSQLContext {
private lazy val parser = new SparkSqlParser(new SQLConf)
@ -85,8 +84,8 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
val sql =
"""
|CREATE DATABASE IF NOT EXISTS database_name
|COMMENT 'database_comment' LOCATION '/home/user/db'
|WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')
|COMMENT 'database_comment' LOCATION '/home/user/db'
""".stripMargin
val parsed = parser.parsePlan(sql)
val expected = CreateDatabaseCommand(
@ -98,6 +97,23 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
comparePlans(parsed, expected)
}
test("create database -- check duplicates") {
def createDatabase(duplicateClause: String): String = {
s"""
|CREATE DATABASE IF NOT EXISTS database_name
|$duplicateClause
|$duplicateClause
""".stripMargin
}
val sql1 = createDatabase("COMMENT 'database_comment'")
val sql2 = createDatabase("LOCATION '/home/user/db'")
val sql3 = createDatabase("WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')")
intercept(sql1, "Found duplicate clauses: COMMENT")
intercept(sql2, "Found duplicate clauses: LOCATION")
intercept(sql3, "Found duplicate clauses: WITH DBPROPERTIES")
}
test("create database - property values must be set") {
assertUnsupported(
sql = "CREATE DATABASE my_db WITH DBPROPERTIES('key_without_value', 'key_with_value'='x')",
@ -1517,8 +1533,8 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
"""
|CREATE OR REPLACE VIEW view1
|(col1, col3 COMMENT 'hello')
|COMMENT 'BLABLA'
|TBLPROPERTIES('prop1Key'="prop1Val")
|COMMENT 'BLABLA'
|AS SELECT * FROM tab1
""".stripMargin
val command = parser.parsePlan(v1).asInstanceOf[CreateViewCommand]
@ -1537,6 +1553,22 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
}
}
test("create view - duplicate clauses") {
def createViewStatement(duplicateClause: String): String = {
s"""
|CREATE OR REPLACE VIEW view1
|(col1, col3 COMMENT 'hello')
|$duplicateClause
|$duplicateClause
|AS SELECT * FROM tab1
""".stripMargin
}
val sql1 = createViewStatement("COMMENT 'BLABLA'")
val sql2 = createViewStatement("TBLPROPERTIES('prop1Key'=\"prop1Val\")")
intercept(sql1, "Found duplicate clauses: COMMENT")
intercept(sql2, "Found duplicate clauses: TBLPROPERTIES")
}
test("MSCK REPAIR table") {
val sql = "MSCK REPAIR TABLE tab1"
val parsed = parser.parsePlan(sql)