[SPARK-27809][SQL] Make optional clauses order insensitive for CREATE DATABASE/VIEW SQL statement
## What changes were proposed in this pull request? Each time, when I write a complex CREATE DATABASE/VIEW statements, I have to open the .g4 file to find the EXACT order of clauses in CREATE TABLE statement. When the order is not right, I will get A strange confusing error message generated from ANTLR4. The original g4 grammar for CREATE VIEW is ``` CREATE [OR REPLACE] [[GLOBAL] TEMPORARY] VIEW [db_name.]view_name [(col_name1 [COMMENT col_comment1], ...)] [COMMENT table_comment] [TBLPROPERTIES (key1=val1, key2=val2, ...)] AS select_statement ``` The proposal is to make the following clauses order insensitive. ``` [COMMENT table_comment] [TBLPROPERTIES (key1=val1, key2=val2, ...)] ``` – The original g4 grammar for CREATE DATABASE is ``` CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] db_name [COMMENT comment_text] [LOCATION path] [WITH DBPROPERTIES (key1=val1, key2=val2, ...)] ``` The proposal is to make the following clauses order insensitive. ``` [COMMENT comment_text] [LOCATION path] [WITH DBPROPERTIES (key1=val1, key2=val2, ...)] ``` ## How was this patch tested? By adding new unit tests to test duplicate clauses and modifying some existing unit tests to test whether those clauses are actually order insensitive Closes #24681 from yeshengm/create-view-parser. Authored-by: Yesheng Ma <kimi.ysma@gmail.com> Signed-off-by: gatorsmile <gatorsmile@gmail.com>
This commit is contained in:
parent
a12de29c1a
commit
5e3520f7f4
|
@ -84,8 +84,9 @@ statement
|
|||
| ctes? dmlStatementNoWith #dmlStatement
|
||||
| USE db=identifier #use
|
||||
| CREATE database (IF NOT EXISTS)? identifier
|
||||
(COMMENT comment=STRING)? locationSpec?
|
||||
(WITH DBPROPERTIES tablePropertyList)? #createDatabase
|
||||
((COMMENT comment=STRING) |
|
||||
locationSpec |
|
||||
(WITH DBPROPERTIES tablePropertyList))* #createDatabase
|
||||
| ALTER database identifier SET DBPROPERTIES tablePropertyList #setDatabaseProperties
|
||||
| DROP database (IF EXISTS)? identifier (RESTRICT | CASCADE)? #dropDatabase
|
||||
| SHOW DATABASES (LIKE? pattern=STRING)? #showDatabases
|
||||
|
@ -142,9 +143,11 @@ statement
|
|||
| DROP VIEW (IF EXISTS)? tableIdentifier #dropTable
|
||||
| CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)?
|
||||
VIEW (IF NOT EXISTS)? tableIdentifier
|
||||
identifierCommentList? (COMMENT STRING)?
|
||||
(PARTITIONED ON identifierList)?
|
||||
(TBLPROPERTIES tablePropertyList)? AS query #createView
|
||||
identifierCommentList?
|
||||
((COMMENT STRING) |
|
||||
(PARTITIONED ON identifierList) |
|
||||
(TBLPROPERTIES tablePropertyList))*
|
||||
AS query #createView
|
||||
| CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW
|
||||
tableIdentifier ('(' colTypeList ')')? tableProvider
|
||||
(OPTIONS tablePropertyList)? #createTempViewUsing
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.spark.sql.catalyst.parser
|
||||
|
||||
import org.antlr.v4.runtime.{CharStreams, CommonTokenStream, ParserRuleContext}
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
import org.apache.spark.SparkFunSuite
|
||||
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
|
||||
|
@ -152,14 +153,14 @@ class ParserUtilsSuite extends SparkFunSuite {
|
|||
assert(string(showDbsContext.pattern) == "identifier_with_wildcards")
|
||||
assert(string(createDbContext.comment) == "database_comment")
|
||||
|
||||
assert(string(createDbContext.locationSpec.STRING) == "/home/user/db")
|
||||
assert(string(createDbContext.locationSpec.asScala.head.STRING) == "/home/user/db")
|
||||
}
|
||||
|
||||
test("position") {
|
||||
assert(position(setConfContext.start) == Origin(Some(1), Some(0)))
|
||||
assert(position(showFuncContext.stop) == Origin(Some(1), Some(19)))
|
||||
assert(position(descFuncContext.describeFuncName.start) == Origin(Some(1), Some(27)))
|
||||
assert(position(createDbContext.locationSpec.start) == Origin(Some(3), Some(27)))
|
||||
assert(position(createDbContext.locationSpec.asScala.head.start) == Origin(Some(3), Some(27)))
|
||||
assert(position(emptyContext.stop) == Origin(None, None))
|
||||
}
|
||||
|
||||
|
@ -177,7 +178,7 @@ class ParserUtilsSuite extends SparkFunSuite {
|
|||
}
|
||||
|
||||
test("withOrigin") {
|
||||
val ctx = createDbContext.locationSpec
|
||||
val ctx = createDbContext.locationSpec.asScala.head
|
||||
val current = CurrentOrigin.get
|
||||
val (location, origin) = withOrigin(ctx) {
|
||||
(string(ctx.STRING), CurrentOrigin.get)
|
||||
|
|
|
@ -495,17 +495,26 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
|
|||
*
|
||||
* For example:
|
||||
* {{{
|
||||
* CREATE DATABASE [IF NOT EXISTS] database_name [COMMENT database_comment]
|
||||
* [LOCATION path] [WITH DBPROPERTIES (key1=val1, key2=val2, ...)]
|
||||
* CREATE DATABASE [IF NOT EXISTS] database_name
|
||||
* create_database_clauses;
|
||||
*
|
||||
* create_database_clauses (order insensitive):
|
||||
* [COMMENT database_comment]
|
||||
* [LOCATION path]
|
||||
* [WITH DBPROPERTIES (key1=val1, key2=val2, ...)]
|
||||
* }}}
|
||||
*/
|
||||
override def visitCreateDatabase(ctx: CreateDatabaseContext): LogicalPlan = withOrigin(ctx) {
|
||||
checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
|
||||
checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
|
||||
checkDuplicateClauses(ctx.DBPROPERTIES, "WITH DBPROPERTIES", ctx)
|
||||
|
||||
CreateDatabaseCommand(
|
||||
ctx.identifier.getText,
|
||||
ctx.EXISTS != null,
|
||||
Option(ctx.locationSpec).map(visitLocationSpec),
|
||||
ctx.locationSpec.asScala.headOption.map(visitLocationSpec),
|
||||
Option(ctx.comment).map(string),
|
||||
Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty))
|
||||
ctx.tablePropertyList.asScala.headOption.map(visitPropertyKeyValues).getOrElse(Map.empty))
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1260,40 +1269,49 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
|
|||
* {{{
|
||||
* CREATE [OR REPLACE] [[GLOBAL] TEMPORARY] VIEW [IF NOT EXISTS] [db_name.]view_name
|
||||
* [(column_name [COMMENT column_comment], ...) ]
|
||||
* [COMMENT view_comment]
|
||||
* [TBLPROPERTIES (property_name = property_value, ...)]
|
||||
* create_view_clauses
|
||||
*
|
||||
* AS SELECT ...;
|
||||
*
|
||||
* create_view_clauses (order insensitive):
|
||||
* [COMMENT view_comment]
|
||||
* [TBLPROPERTIES (property_name = property_value, ...)]
|
||||
* }}}
|
||||
*/
|
||||
override def visitCreateView(ctx: CreateViewContext): LogicalPlan = withOrigin(ctx) {
|
||||
if (ctx.identifierList != null) {
|
||||
if (!ctx.identifierList.isEmpty) {
|
||||
operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx)
|
||||
} else {
|
||||
val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
|
||||
icl.identifierComment.asScala.map { ic =>
|
||||
ic.identifier.getText -> Option(ic.STRING).map(string)
|
||||
}
|
||||
}
|
||||
|
||||
val viewType = if (ctx.TEMPORARY == null) {
|
||||
PersistedView
|
||||
} else if (ctx.GLOBAL != null) {
|
||||
GlobalTempView
|
||||
} else {
|
||||
LocalTempView
|
||||
}
|
||||
|
||||
CreateViewCommand(
|
||||
name = visitTableIdentifier(ctx.tableIdentifier),
|
||||
userSpecifiedColumns = userSpecifiedColumns,
|
||||
comment = Option(ctx.STRING).map(string),
|
||||
properties = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty),
|
||||
originalText = Option(source(ctx.query)),
|
||||
child = plan(ctx.query),
|
||||
allowExisting = ctx.EXISTS != null,
|
||||
replace = ctx.REPLACE != null,
|
||||
viewType = viewType)
|
||||
}
|
||||
|
||||
checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
|
||||
checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED ON", ctx)
|
||||
checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
|
||||
|
||||
val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
|
||||
icl.identifierComment.asScala.map { ic =>
|
||||
ic.identifier.getText -> Option(ic.STRING).map(string)
|
||||
}
|
||||
}
|
||||
|
||||
val viewType = if (ctx.TEMPORARY == null) {
|
||||
PersistedView
|
||||
} else if (ctx.GLOBAL != null) {
|
||||
GlobalTempView
|
||||
} else {
|
||||
LocalTempView
|
||||
}
|
||||
|
||||
CreateViewCommand(
|
||||
name = visitTableIdentifier(ctx.tableIdentifier),
|
||||
userSpecifiedColumns = userSpecifiedColumns,
|
||||
comment = ctx.STRING.asScala.headOption.map(string),
|
||||
properties = ctx.tablePropertyList.asScala.headOption.map(visitPropertyKeyValues)
|
||||
.getOrElse(Map.empty),
|
||||
originalText = Option(source(ctx.query)),
|
||||
child = plan(ctx.query),
|
||||
allowExisting = ctx.EXISTS != null,
|
||||
replace = ctx.REPLACE != null,
|
||||
viewType = viewType)
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -40,7 +40,6 @@ import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
|
|||
import org.apache.spark.sql.test.SharedSQLContext
|
||||
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
|
||||
|
||||
|
||||
class DDLParserSuite extends PlanTest with SharedSQLContext {
|
||||
private lazy val parser = new SparkSqlParser(new SQLConf)
|
||||
|
||||
|
@ -85,8 +84,8 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
|
|||
val sql =
|
||||
"""
|
||||
|CREATE DATABASE IF NOT EXISTS database_name
|
||||
|COMMENT 'database_comment' LOCATION '/home/user/db'
|
||||
|WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')
|
||||
|COMMENT 'database_comment' LOCATION '/home/user/db'
|
||||
""".stripMargin
|
||||
val parsed = parser.parsePlan(sql)
|
||||
val expected = CreateDatabaseCommand(
|
||||
|
@ -98,6 +97,23 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
|
|||
comparePlans(parsed, expected)
|
||||
}
|
||||
|
||||
test("create database -- check duplicates") {
|
||||
def createDatabase(duplicateClause: String): String = {
|
||||
s"""
|
||||
|CREATE DATABASE IF NOT EXISTS database_name
|
||||
|$duplicateClause
|
||||
|$duplicateClause
|
||||
""".stripMargin
|
||||
}
|
||||
val sql1 = createDatabase("COMMENT 'database_comment'")
|
||||
val sql2 = createDatabase("LOCATION '/home/user/db'")
|
||||
val sql3 = createDatabase("WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')")
|
||||
|
||||
intercept(sql1, "Found duplicate clauses: COMMENT")
|
||||
intercept(sql2, "Found duplicate clauses: LOCATION")
|
||||
intercept(sql3, "Found duplicate clauses: WITH DBPROPERTIES")
|
||||
}
|
||||
|
||||
test("create database - property values must be set") {
|
||||
assertUnsupported(
|
||||
sql = "CREATE DATABASE my_db WITH DBPROPERTIES('key_without_value', 'key_with_value'='x')",
|
||||
|
@ -1517,8 +1533,8 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
|
|||
"""
|
||||
|CREATE OR REPLACE VIEW view1
|
||||
|(col1, col3 COMMENT 'hello')
|
||||
|COMMENT 'BLABLA'
|
||||
|TBLPROPERTIES('prop1Key'="prop1Val")
|
||||
|COMMENT 'BLABLA'
|
||||
|AS SELECT * FROM tab1
|
||||
""".stripMargin
|
||||
val command = parser.parsePlan(v1).asInstanceOf[CreateViewCommand]
|
||||
|
@ -1537,6 +1553,22 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
|
|||
}
|
||||
}
|
||||
|
||||
test("create view - duplicate clauses") {
|
||||
def createViewStatement(duplicateClause: String): String = {
|
||||
s"""
|
||||
|CREATE OR REPLACE VIEW view1
|
||||
|(col1, col3 COMMENT 'hello')
|
||||
|$duplicateClause
|
||||
|$duplicateClause
|
||||
|AS SELECT * FROM tab1
|
||||
""".stripMargin
|
||||
}
|
||||
val sql1 = createViewStatement("COMMENT 'BLABLA'")
|
||||
val sql2 = createViewStatement("TBLPROPERTIES('prop1Key'=\"prop1Val\")")
|
||||
intercept(sql1, "Found duplicate clauses: COMMENT")
|
||||
intercept(sql2, "Found duplicate clauses: TBLPROPERTIES")
|
||||
}
|
||||
|
||||
test("MSCK REPAIR table") {
|
||||
val sql = "MSCK REPAIR TABLE tab1"
|
||||
val parsed = parser.parsePlan(sql)
|
||||
|
|
Loading…
Reference in a new issue