[SPARK-27809][SQL] Make optional clauses order insensitive for CREATE DATABASE/VIEW SQL statement

## What changes were proposed in this pull request?

Each time, when I write a complex CREATE DATABASE/VIEW statements, I have to open the .g4 file to find the EXACT order of clauses in CREATE TABLE statement. When the order is not right, I will get A strange confusing error message generated from ANTLR4.

The original g4 grammar for CREATE VIEW is
```
CREATE [OR REPLACE] [[GLOBAL] TEMPORARY] VIEW [db_name.]view_name
  [(col_name1 [COMMENT col_comment1], ...)]
  [COMMENT table_comment]
  [TBLPROPERTIES (key1=val1, key2=val2, ...)]
AS select_statement
```
The proposal is to make the following clauses order insensitive.
```
  [COMMENT table_comment]
  [TBLPROPERTIES (key1=val1, key2=val2, ...)]
```
–
The original g4 grammar for CREATE DATABASE is
```
CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] db_name
  [COMMENT comment_text]
  [LOCATION path]
  [WITH DBPROPERTIES (key1=val1, key2=val2, ...)]
```
The proposal is to make the following clauses order insensitive.
```
  [COMMENT comment_text]
  [LOCATION path]
  [WITH DBPROPERTIES (key1=val1, key2=val2, ...)]
```
## How was this patch tested?

By adding new unit tests to test duplicate clauses and modifying some existing unit tests to test whether those clauses are actually order insensitive

Closes #24681 from yeshengm/create-view-parser.

Authored-by: Yesheng Ma <kimi.ysma@gmail.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
This commit is contained in:
Yesheng Ma 2019-05-24 15:19:14 -07:00 committed by gatorsmile
parent a12de29c1a
commit 5e3520f7f4
4 changed files with 97 additions and 43 deletions

View file

@ -84,8 +84,9 @@ statement
| ctes? dmlStatementNoWith #dmlStatement | ctes? dmlStatementNoWith #dmlStatement
| USE db=identifier #use | USE db=identifier #use
| CREATE database (IF NOT EXISTS)? identifier | CREATE database (IF NOT EXISTS)? identifier
(COMMENT comment=STRING)? locationSpec? ((COMMENT comment=STRING) |
(WITH DBPROPERTIES tablePropertyList)? #createDatabase locationSpec |
(WITH DBPROPERTIES tablePropertyList))* #createDatabase
| ALTER database identifier SET DBPROPERTIES tablePropertyList #setDatabaseProperties | ALTER database identifier SET DBPROPERTIES tablePropertyList #setDatabaseProperties
| DROP database (IF EXISTS)? identifier (RESTRICT | CASCADE)? #dropDatabase | DROP database (IF EXISTS)? identifier (RESTRICT | CASCADE)? #dropDatabase
| SHOW DATABASES (LIKE? pattern=STRING)? #showDatabases | SHOW DATABASES (LIKE? pattern=STRING)? #showDatabases
@ -142,9 +143,11 @@ statement
| DROP VIEW (IF EXISTS)? tableIdentifier #dropTable | DROP VIEW (IF EXISTS)? tableIdentifier #dropTable
| CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)? | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)?
VIEW (IF NOT EXISTS)? tableIdentifier VIEW (IF NOT EXISTS)? tableIdentifier
identifierCommentList? (COMMENT STRING)? identifierCommentList?
(PARTITIONED ON identifierList)? ((COMMENT STRING) |
(TBLPROPERTIES tablePropertyList)? AS query #createView (PARTITIONED ON identifierList) |
(TBLPROPERTIES tablePropertyList))*
AS query #createView
| CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW | CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW
tableIdentifier ('(' colTypeList ')')? tableProvider tableIdentifier ('(' colTypeList ')')? tableProvider
(OPTIONS tablePropertyList)? #createTempViewUsing (OPTIONS tablePropertyList)? #createTempViewUsing

View file

@ -17,6 +17,7 @@
package org.apache.spark.sql.catalyst.parser package org.apache.spark.sql.catalyst.parser
import org.antlr.v4.runtime.{CharStreams, CommonTokenStream, ParserRuleContext} import org.antlr.v4.runtime.{CharStreams, CommonTokenStream, ParserRuleContext}
import scala.collection.JavaConverters._
import org.apache.spark.SparkFunSuite import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
@ -152,14 +153,14 @@ class ParserUtilsSuite extends SparkFunSuite {
assert(string(showDbsContext.pattern) == "identifier_with_wildcards") assert(string(showDbsContext.pattern) == "identifier_with_wildcards")
assert(string(createDbContext.comment) == "database_comment") assert(string(createDbContext.comment) == "database_comment")
assert(string(createDbContext.locationSpec.STRING) == "/home/user/db") assert(string(createDbContext.locationSpec.asScala.head.STRING) == "/home/user/db")
} }
test("position") { test("position") {
assert(position(setConfContext.start) == Origin(Some(1), Some(0))) assert(position(setConfContext.start) == Origin(Some(1), Some(0)))
assert(position(showFuncContext.stop) == Origin(Some(1), Some(19))) assert(position(showFuncContext.stop) == Origin(Some(1), Some(19)))
assert(position(descFuncContext.describeFuncName.start) == Origin(Some(1), Some(27))) assert(position(descFuncContext.describeFuncName.start) == Origin(Some(1), Some(27)))
assert(position(createDbContext.locationSpec.start) == Origin(Some(3), Some(27))) assert(position(createDbContext.locationSpec.asScala.head.start) == Origin(Some(3), Some(27)))
assert(position(emptyContext.stop) == Origin(None, None)) assert(position(emptyContext.stop) == Origin(None, None))
} }
@ -177,7 +178,7 @@ class ParserUtilsSuite extends SparkFunSuite {
} }
test("withOrigin") { test("withOrigin") {
val ctx = createDbContext.locationSpec val ctx = createDbContext.locationSpec.asScala.head
val current = CurrentOrigin.get val current = CurrentOrigin.get
val (location, origin) = withOrigin(ctx) { val (location, origin) = withOrigin(ctx) {
(string(ctx.STRING), CurrentOrigin.get) (string(ctx.STRING), CurrentOrigin.get)

View file

@ -495,17 +495,26 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
* *
* For example: * For example:
* {{{ * {{{
* CREATE DATABASE [IF NOT EXISTS] database_name [COMMENT database_comment] * CREATE DATABASE [IF NOT EXISTS] database_name
* [LOCATION path] [WITH DBPROPERTIES (key1=val1, key2=val2, ...)] * create_database_clauses;
*
* create_database_clauses (order insensitive):
* [COMMENT database_comment]
* [LOCATION path]
* [WITH DBPROPERTIES (key1=val1, key2=val2, ...)]
* }}} * }}}
*/ */
override def visitCreateDatabase(ctx: CreateDatabaseContext): LogicalPlan = withOrigin(ctx) { override def visitCreateDatabase(ctx: CreateDatabaseContext): LogicalPlan = withOrigin(ctx) {
checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
checkDuplicateClauses(ctx.DBPROPERTIES, "WITH DBPROPERTIES", ctx)
CreateDatabaseCommand( CreateDatabaseCommand(
ctx.identifier.getText, ctx.identifier.getText,
ctx.EXISTS != null, ctx.EXISTS != null,
Option(ctx.locationSpec).map(visitLocationSpec), ctx.locationSpec.asScala.headOption.map(visitLocationSpec),
Option(ctx.comment).map(string), Option(ctx.comment).map(string),
Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty)) ctx.tablePropertyList.asScala.headOption.map(visitPropertyKeyValues).getOrElse(Map.empty))
} }
/** /**
@ -1260,40 +1269,49 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
* {{{ * {{{
* CREATE [OR REPLACE] [[GLOBAL] TEMPORARY] VIEW [IF NOT EXISTS] [db_name.]view_name * CREATE [OR REPLACE] [[GLOBAL] TEMPORARY] VIEW [IF NOT EXISTS] [db_name.]view_name
* [(column_name [COMMENT column_comment], ...) ] * [(column_name [COMMENT column_comment], ...) ]
* [COMMENT view_comment] * create_view_clauses
* [TBLPROPERTIES (property_name = property_value, ...)] *
* AS SELECT ...; * AS SELECT ...;
*
* create_view_clauses (order insensitive):
* [COMMENT view_comment]
* [TBLPROPERTIES (property_name = property_value, ...)]
* }}} * }}}
*/ */
override def visitCreateView(ctx: CreateViewContext): LogicalPlan = withOrigin(ctx) { override def visitCreateView(ctx: CreateViewContext): LogicalPlan = withOrigin(ctx) {
if (ctx.identifierList != null) { if (!ctx.identifierList.isEmpty) {
operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx) operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx)
} else {
val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
icl.identifierComment.asScala.map { ic =>
ic.identifier.getText -> Option(ic.STRING).map(string)
}
}
val viewType = if (ctx.TEMPORARY == null) {
PersistedView
} else if (ctx.GLOBAL != null) {
GlobalTempView
} else {
LocalTempView
}
CreateViewCommand(
name = visitTableIdentifier(ctx.tableIdentifier),
userSpecifiedColumns = userSpecifiedColumns,
comment = Option(ctx.STRING).map(string),
properties = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty),
originalText = Option(source(ctx.query)),
child = plan(ctx.query),
allowExisting = ctx.EXISTS != null,
replace = ctx.REPLACE != null,
viewType = viewType)
} }
checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED ON", ctx)
checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
icl.identifierComment.asScala.map { ic =>
ic.identifier.getText -> Option(ic.STRING).map(string)
}
}
val viewType = if (ctx.TEMPORARY == null) {
PersistedView
} else if (ctx.GLOBAL != null) {
GlobalTempView
} else {
LocalTempView
}
CreateViewCommand(
name = visitTableIdentifier(ctx.tableIdentifier),
userSpecifiedColumns = userSpecifiedColumns,
comment = ctx.STRING.asScala.headOption.map(string),
properties = ctx.tablePropertyList.asScala.headOption.map(visitPropertyKeyValues)
.getOrElse(Map.empty),
originalText = Option(source(ctx.query)),
child = plan(ctx.query),
allowExisting = ctx.EXISTS != null,
replace = ctx.REPLACE != null,
viewType = viewType)
} }
/** /**

View file

@ -40,7 +40,6 @@ import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
class DDLParserSuite extends PlanTest with SharedSQLContext { class DDLParserSuite extends PlanTest with SharedSQLContext {
private lazy val parser = new SparkSqlParser(new SQLConf) private lazy val parser = new SparkSqlParser(new SQLConf)
@ -85,8 +84,8 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
val sql = val sql =
""" """
|CREATE DATABASE IF NOT EXISTS database_name |CREATE DATABASE IF NOT EXISTS database_name
|COMMENT 'database_comment' LOCATION '/home/user/db'
|WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c') |WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')
|COMMENT 'database_comment' LOCATION '/home/user/db'
""".stripMargin """.stripMargin
val parsed = parser.parsePlan(sql) val parsed = parser.parsePlan(sql)
val expected = CreateDatabaseCommand( val expected = CreateDatabaseCommand(
@ -98,6 +97,23 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
comparePlans(parsed, expected) comparePlans(parsed, expected)
} }
test("create database -- check duplicates") {
def createDatabase(duplicateClause: String): String = {
s"""
|CREATE DATABASE IF NOT EXISTS database_name
|$duplicateClause
|$duplicateClause
""".stripMargin
}
val sql1 = createDatabase("COMMENT 'database_comment'")
val sql2 = createDatabase("LOCATION '/home/user/db'")
val sql3 = createDatabase("WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')")
intercept(sql1, "Found duplicate clauses: COMMENT")
intercept(sql2, "Found duplicate clauses: LOCATION")
intercept(sql3, "Found duplicate clauses: WITH DBPROPERTIES")
}
test("create database - property values must be set") { test("create database - property values must be set") {
assertUnsupported( assertUnsupported(
sql = "CREATE DATABASE my_db WITH DBPROPERTIES('key_without_value', 'key_with_value'='x')", sql = "CREATE DATABASE my_db WITH DBPROPERTIES('key_without_value', 'key_with_value'='x')",
@ -1517,8 +1533,8 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
""" """
|CREATE OR REPLACE VIEW view1 |CREATE OR REPLACE VIEW view1
|(col1, col3 COMMENT 'hello') |(col1, col3 COMMENT 'hello')
|COMMENT 'BLABLA'
|TBLPROPERTIES('prop1Key'="prop1Val") |TBLPROPERTIES('prop1Key'="prop1Val")
|COMMENT 'BLABLA'
|AS SELECT * FROM tab1 |AS SELECT * FROM tab1
""".stripMargin """.stripMargin
val command = parser.parsePlan(v1).asInstanceOf[CreateViewCommand] val command = parser.parsePlan(v1).asInstanceOf[CreateViewCommand]
@ -1537,6 +1553,22 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
} }
} }
test("create view - duplicate clauses") {
def createViewStatement(duplicateClause: String): String = {
s"""
|CREATE OR REPLACE VIEW view1
|(col1, col3 COMMENT 'hello')
|$duplicateClause
|$duplicateClause
|AS SELECT * FROM tab1
""".stripMargin
}
val sql1 = createViewStatement("COMMENT 'BLABLA'")
val sql2 = createViewStatement("TBLPROPERTIES('prop1Key'=\"prop1Val\")")
intercept(sql1, "Found duplicate clauses: COMMENT")
intercept(sql2, "Found duplicate clauses: TBLPROPERTIES")
}
test("MSCK REPAIR table") { test("MSCK REPAIR table") {
val sql = "MSCK REPAIR TABLE tab1" val sql = "MSCK REPAIR TABLE tab1"
val parsed = parser.parsePlan(sql) val parsed = parser.parsePlan(sql)