[SPARK-27809][SQL] Make optional clauses order insensitive for CREATE DATABASE/VIEW SQL statement
## What changes were proposed in this pull request? Each time, when I write a complex CREATE DATABASE/VIEW statements, I have to open the .g4 file to find the EXACT order of clauses in CREATE TABLE statement. When the order is not right, I will get A strange confusing error message generated from ANTLR4. The original g4 grammar for CREATE VIEW is ``` CREATE [OR REPLACE] [[GLOBAL] TEMPORARY] VIEW [db_name.]view_name [(col_name1 [COMMENT col_comment1], ...)] [COMMENT table_comment] [TBLPROPERTIES (key1=val1, key2=val2, ...)] AS select_statement ``` The proposal is to make the following clauses order insensitive. ``` [COMMENT table_comment] [TBLPROPERTIES (key1=val1, key2=val2, ...)] ``` – The original g4 grammar for CREATE DATABASE is ``` CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] db_name [COMMENT comment_text] [LOCATION path] [WITH DBPROPERTIES (key1=val1, key2=val2, ...)] ``` The proposal is to make the following clauses order insensitive. ``` [COMMENT comment_text] [LOCATION path] [WITH DBPROPERTIES (key1=val1, key2=val2, ...)] ``` ## How was this patch tested? By adding new unit tests to test duplicate clauses and modifying some existing unit tests to test whether those clauses are actually order insensitive Closes #24681 from yeshengm/create-view-parser. Authored-by: Yesheng Ma <kimi.ysma@gmail.com> Signed-off-by: gatorsmile <gatorsmile@gmail.com>
This commit is contained in:
parent
a12de29c1a
commit
5e3520f7f4
|
@ -84,8 +84,9 @@ statement
|
||||||
| ctes? dmlStatementNoWith #dmlStatement
|
| ctes? dmlStatementNoWith #dmlStatement
|
||||||
| USE db=identifier #use
|
| USE db=identifier #use
|
||||||
| CREATE database (IF NOT EXISTS)? identifier
|
| CREATE database (IF NOT EXISTS)? identifier
|
||||||
(COMMENT comment=STRING)? locationSpec?
|
((COMMENT comment=STRING) |
|
||||||
(WITH DBPROPERTIES tablePropertyList)? #createDatabase
|
locationSpec |
|
||||||
|
(WITH DBPROPERTIES tablePropertyList))* #createDatabase
|
||||||
| ALTER database identifier SET DBPROPERTIES tablePropertyList #setDatabaseProperties
|
| ALTER database identifier SET DBPROPERTIES tablePropertyList #setDatabaseProperties
|
||||||
| DROP database (IF EXISTS)? identifier (RESTRICT | CASCADE)? #dropDatabase
|
| DROP database (IF EXISTS)? identifier (RESTRICT | CASCADE)? #dropDatabase
|
||||||
| SHOW DATABASES (LIKE? pattern=STRING)? #showDatabases
|
| SHOW DATABASES (LIKE? pattern=STRING)? #showDatabases
|
||||||
|
@ -142,9 +143,11 @@ statement
|
||||||
| DROP VIEW (IF EXISTS)? tableIdentifier #dropTable
|
| DROP VIEW (IF EXISTS)? tableIdentifier #dropTable
|
||||||
| CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)?
|
| CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)?
|
||||||
VIEW (IF NOT EXISTS)? tableIdentifier
|
VIEW (IF NOT EXISTS)? tableIdentifier
|
||||||
identifierCommentList? (COMMENT STRING)?
|
identifierCommentList?
|
||||||
(PARTITIONED ON identifierList)?
|
((COMMENT STRING) |
|
||||||
(TBLPROPERTIES tablePropertyList)? AS query #createView
|
(PARTITIONED ON identifierList) |
|
||||||
|
(TBLPROPERTIES tablePropertyList))*
|
||||||
|
AS query #createView
|
||||||
| CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW
|
| CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW
|
||||||
tableIdentifier ('(' colTypeList ')')? tableProvider
|
tableIdentifier ('(' colTypeList ')')? tableProvider
|
||||||
(OPTIONS tablePropertyList)? #createTempViewUsing
|
(OPTIONS tablePropertyList)? #createTempViewUsing
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.spark.sql.catalyst.parser
|
package org.apache.spark.sql.catalyst.parser
|
||||||
|
|
||||||
import org.antlr.v4.runtime.{CharStreams, CommonTokenStream, ParserRuleContext}
|
import org.antlr.v4.runtime.{CharStreams, CommonTokenStream, ParserRuleContext}
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
import org.apache.spark.SparkFunSuite
|
import org.apache.spark.SparkFunSuite
|
||||||
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
|
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
|
||||||
|
@ -152,14 +153,14 @@ class ParserUtilsSuite extends SparkFunSuite {
|
||||||
assert(string(showDbsContext.pattern) == "identifier_with_wildcards")
|
assert(string(showDbsContext.pattern) == "identifier_with_wildcards")
|
||||||
assert(string(createDbContext.comment) == "database_comment")
|
assert(string(createDbContext.comment) == "database_comment")
|
||||||
|
|
||||||
assert(string(createDbContext.locationSpec.STRING) == "/home/user/db")
|
assert(string(createDbContext.locationSpec.asScala.head.STRING) == "/home/user/db")
|
||||||
}
|
}
|
||||||
|
|
||||||
test("position") {
|
test("position") {
|
||||||
assert(position(setConfContext.start) == Origin(Some(1), Some(0)))
|
assert(position(setConfContext.start) == Origin(Some(1), Some(0)))
|
||||||
assert(position(showFuncContext.stop) == Origin(Some(1), Some(19)))
|
assert(position(showFuncContext.stop) == Origin(Some(1), Some(19)))
|
||||||
assert(position(descFuncContext.describeFuncName.start) == Origin(Some(1), Some(27)))
|
assert(position(descFuncContext.describeFuncName.start) == Origin(Some(1), Some(27)))
|
||||||
assert(position(createDbContext.locationSpec.start) == Origin(Some(3), Some(27)))
|
assert(position(createDbContext.locationSpec.asScala.head.start) == Origin(Some(3), Some(27)))
|
||||||
assert(position(emptyContext.stop) == Origin(None, None))
|
assert(position(emptyContext.stop) == Origin(None, None))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -177,7 +178,7 @@ class ParserUtilsSuite extends SparkFunSuite {
|
||||||
}
|
}
|
||||||
|
|
||||||
test("withOrigin") {
|
test("withOrigin") {
|
||||||
val ctx = createDbContext.locationSpec
|
val ctx = createDbContext.locationSpec.asScala.head
|
||||||
val current = CurrentOrigin.get
|
val current = CurrentOrigin.get
|
||||||
val (location, origin) = withOrigin(ctx) {
|
val (location, origin) = withOrigin(ctx) {
|
||||||
(string(ctx.STRING), CurrentOrigin.get)
|
(string(ctx.STRING), CurrentOrigin.get)
|
||||||
|
|
|
@ -495,17 +495,26 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
|
||||||
*
|
*
|
||||||
* For example:
|
* For example:
|
||||||
* {{{
|
* {{{
|
||||||
* CREATE DATABASE [IF NOT EXISTS] database_name [COMMENT database_comment]
|
* CREATE DATABASE [IF NOT EXISTS] database_name
|
||||||
* [LOCATION path] [WITH DBPROPERTIES (key1=val1, key2=val2, ...)]
|
* create_database_clauses;
|
||||||
|
*
|
||||||
|
* create_database_clauses (order insensitive):
|
||||||
|
* [COMMENT database_comment]
|
||||||
|
* [LOCATION path]
|
||||||
|
* [WITH DBPROPERTIES (key1=val1, key2=val2, ...)]
|
||||||
* }}}
|
* }}}
|
||||||
*/
|
*/
|
||||||
override def visitCreateDatabase(ctx: CreateDatabaseContext): LogicalPlan = withOrigin(ctx) {
|
override def visitCreateDatabase(ctx: CreateDatabaseContext): LogicalPlan = withOrigin(ctx) {
|
||||||
|
checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
|
||||||
|
checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
|
||||||
|
checkDuplicateClauses(ctx.DBPROPERTIES, "WITH DBPROPERTIES", ctx)
|
||||||
|
|
||||||
CreateDatabaseCommand(
|
CreateDatabaseCommand(
|
||||||
ctx.identifier.getText,
|
ctx.identifier.getText,
|
||||||
ctx.EXISTS != null,
|
ctx.EXISTS != null,
|
||||||
Option(ctx.locationSpec).map(visitLocationSpec),
|
ctx.locationSpec.asScala.headOption.map(visitLocationSpec),
|
||||||
Option(ctx.comment).map(string),
|
Option(ctx.comment).map(string),
|
||||||
Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty))
|
ctx.tablePropertyList.asScala.headOption.map(visitPropertyKeyValues).getOrElse(Map.empty))
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1260,40 +1269,49 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
|
||||||
* {{{
|
* {{{
|
||||||
* CREATE [OR REPLACE] [[GLOBAL] TEMPORARY] VIEW [IF NOT EXISTS] [db_name.]view_name
|
* CREATE [OR REPLACE] [[GLOBAL] TEMPORARY] VIEW [IF NOT EXISTS] [db_name.]view_name
|
||||||
* [(column_name [COMMENT column_comment], ...) ]
|
* [(column_name [COMMENT column_comment], ...) ]
|
||||||
* [COMMENT view_comment]
|
* create_view_clauses
|
||||||
* [TBLPROPERTIES (property_name = property_value, ...)]
|
*
|
||||||
* AS SELECT ...;
|
* AS SELECT ...;
|
||||||
|
*
|
||||||
|
* create_view_clauses (order insensitive):
|
||||||
|
* [COMMENT view_comment]
|
||||||
|
* [TBLPROPERTIES (property_name = property_value, ...)]
|
||||||
* }}}
|
* }}}
|
||||||
*/
|
*/
|
||||||
override def visitCreateView(ctx: CreateViewContext): LogicalPlan = withOrigin(ctx) {
|
override def visitCreateView(ctx: CreateViewContext): LogicalPlan = withOrigin(ctx) {
|
||||||
if (ctx.identifierList != null) {
|
if (!ctx.identifierList.isEmpty) {
|
||||||
operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx)
|
operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx)
|
||||||
} else {
|
|
||||||
val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
|
|
||||||
icl.identifierComment.asScala.map { ic =>
|
|
||||||
ic.identifier.getText -> Option(ic.STRING).map(string)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
val viewType = if (ctx.TEMPORARY == null) {
|
|
||||||
PersistedView
|
|
||||||
} else if (ctx.GLOBAL != null) {
|
|
||||||
GlobalTempView
|
|
||||||
} else {
|
|
||||||
LocalTempView
|
|
||||||
}
|
|
||||||
|
|
||||||
CreateViewCommand(
|
|
||||||
name = visitTableIdentifier(ctx.tableIdentifier),
|
|
||||||
userSpecifiedColumns = userSpecifiedColumns,
|
|
||||||
comment = Option(ctx.STRING).map(string),
|
|
||||||
properties = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty),
|
|
||||||
originalText = Option(source(ctx.query)),
|
|
||||||
child = plan(ctx.query),
|
|
||||||
allowExisting = ctx.EXISTS != null,
|
|
||||||
replace = ctx.REPLACE != null,
|
|
||||||
viewType = viewType)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
|
||||||
|
checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED ON", ctx)
|
||||||
|
checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
|
||||||
|
|
||||||
|
val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
|
||||||
|
icl.identifierComment.asScala.map { ic =>
|
||||||
|
ic.identifier.getText -> Option(ic.STRING).map(string)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val viewType = if (ctx.TEMPORARY == null) {
|
||||||
|
PersistedView
|
||||||
|
} else if (ctx.GLOBAL != null) {
|
||||||
|
GlobalTempView
|
||||||
|
} else {
|
||||||
|
LocalTempView
|
||||||
|
}
|
||||||
|
|
||||||
|
CreateViewCommand(
|
||||||
|
name = visitTableIdentifier(ctx.tableIdentifier),
|
||||||
|
userSpecifiedColumns = userSpecifiedColumns,
|
||||||
|
comment = ctx.STRING.asScala.headOption.map(string),
|
||||||
|
properties = ctx.tablePropertyList.asScala.headOption.map(visitPropertyKeyValues)
|
||||||
|
.getOrElse(Map.empty),
|
||||||
|
originalText = Option(source(ctx.query)),
|
||||||
|
child = plan(ctx.query),
|
||||||
|
allowExisting = ctx.EXISTS != null,
|
||||||
|
replace = ctx.REPLACE != null,
|
||||||
|
viewType = viewType)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -40,7 +40,6 @@ import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
|
||||||
import org.apache.spark.sql.test.SharedSQLContext
|
import org.apache.spark.sql.test.SharedSQLContext
|
||||||
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
|
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
|
||||||
|
|
||||||
|
|
||||||
class DDLParserSuite extends PlanTest with SharedSQLContext {
|
class DDLParserSuite extends PlanTest with SharedSQLContext {
|
||||||
private lazy val parser = new SparkSqlParser(new SQLConf)
|
private lazy val parser = new SparkSqlParser(new SQLConf)
|
||||||
|
|
||||||
|
@ -85,8 +84,8 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
|
||||||
val sql =
|
val sql =
|
||||||
"""
|
"""
|
||||||
|CREATE DATABASE IF NOT EXISTS database_name
|
|CREATE DATABASE IF NOT EXISTS database_name
|
||||||
|COMMENT 'database_comment' LOCATION '/home/user/db'
|
|
||||||
|WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')
|
|WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')
|
||||||
|
|COMMENT 'database_comment' LOCATION '/home/user/db'
|
||||||
""".stripMargin
|
""".stripMargin
|
||||||
val parsed = parser.parsePlan(sql)
|
val parsed = parser.parsePlan(sql)
|
||||||
val expected = CreateDatabaseCommand(
|
val expected = CreateDatabaseCommand(
|
||||||
|
@ -98,6 +97,23 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
|
||||||
comparePlans(parsed, expected)
|
comparePlans(parsed, expected)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("create database -- check duplicates") {
|
||||||
|
def createDatabase(duplicateClause: String): String = {
|
||||||
|
s"""
|
||||||
|
|CREATE DATABASE IF NOT EXISTS database_name
|
||||||
|
|$duplicateClause
|
||||||
|
|$duplicateClause
|
||||||
|
""".stripMargin
|
||||||
|
}
|
||||||
|
val sql1 = createDatabase("COMMENT 'database_comment'")
|
||||||
|
val sql2 = createDatabase("LOCATION '/home/user/db'")
|
||||||
|
val sql3 = createDatabase("WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')")
|
||||||
|
|
||||||
|
intercept(sql1, "Found duplicate clauses: COMMENT")
|
||||||
|
intercept(sql2, "Found duplicate clauses: LOCATION")
|
||||||
|
intercept(sql3, "Found duplicate clauses: WITH DBPROPERTIES")
|
||||||
|
}
|
||||||
|
|
||||||
test("create database - property values must be set") {
|
test("create database - property values must be set") {
|
||||||
assertUnsupported(
|
assertUnsupported(
|
||||||
sql = "CREATE DATABASE my_db WITH DBPROPERTIES('key_without_value', 'key_with_value'='x')",
|
sql = "CREATE DATABASE my_db WITH DBPROPERTIES('key_without_value', 'key_with_value'='x')",
|
||||||
|
@ -1517,8 +1533,8 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
|
||||||
"""
|
"""
|
||||||
|CREATE OR REPLACE VIEW view1
|
|CREATE OR REPLACE VIEW view1
|
||||||
|(col1, col3 COMMENT 'hello')
|
|(col1, col3 COMMENT 'hello')
|
||||||
|COMMENT 'BLABLA'
|
|
||||||
|TBLPROPERTIES('prop1Key'="prop1Val")
|
|TBLPROPERTIES('prop1Key'="prop1Val")
|
||||||
|
|COMMENT 'BLABLA'
|
||||||
|AS SELECT * FROM tab1
|
|AS SELECT * FROM tab1
|
||||||
""".stripMargin
|
""".stripMargin
|
||||||
val command = parser.parsePlan(v1).asInstanceOf[CreateViewCommand]
|
val command = parser.parsePlan(v1).asInstanceOf[CreateViewCommand]
|
||||||
|
@ -1537,6 +1553,22 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("create view - duplicate clauses") {
|
||||||
|
def createViewStatement(duplicateClause: String): String = {
|
||||||
|
s"""
|
||||||
|
|CREATE OR REPLACE VIEW view1
|
||||||
|
|(col1, col3 COMMENT 'hello')
|
||||||
|
|$duplicateClause
|
||||||
|
|$duplicateClause
|
||||||
|
|AS SELECT * FROM tab1
|
||||||
|
""".stripMargin
|
||||||
|
}
|
||||||
|
val sql1 = createViewStatement("COMMENT 'BLABLA'")
|
||||||
|
val sql2 = createViewStatement("TBLPROPERTIES('prop1Key'=\"prop1Val\")")
|
||||||
|
intercept(sql1, "Found duplicate clauses: COMMENT")
|
||||||
|
intercept(sql2, "Found duplicate clauses: TBLPROPERTIES")
|
||||||
|
}
|
||||||
|
|
||||||
test("MSCK REPAIR table") {
|
test("MSCK REPAIR table") {
|
||||||
val sql = "MSCK REPAIR TABLE tab1"
|
val sql = "MSCK REPAIR TABLE tab1"
|
||||||
val parsed = parser.parsePlan(sql)
|
val parsed = parser.parsePlan(sql)
|
||||||
|
|
Loading…
Reference in a new issue