[SPARK-22934][SQL] Make optional clauses order insensitive for CREATE TABLE SQL statement

## What changes were proposed in this pull request?
Currently, our CREATE TABLE syntax require the EXACT order of clauses. It is pretty hard to remember the exact order. Thus, this PR is to make optional clauses order insensitive for `CREATE TABLE` SQL statement.

```
CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name
    [(col_name1 col_type1 [COMMENT col_comment1], ...)]
    USING datasource
    [OPTIONS (key1=val1, key2=val2, ...)]
    [PARTITIONED BY (col_name1, col_name2, ...)]
    [CLUSTERED BY (col_name3, col_name4, ...) INTO num_buckets BUCKETS]
    [LOCATION path]
    [COMMENT table_comment]
    [TBLPROPERTIES (key1=val1, key2=val2, ...)]
    [AS select_statement]
```

The proposal is to make the following clauses order insensitive.
```
    [OPTIONS (key1=val1, key2=val2, ...)]
    [PARTITIONED BY (col_name1, col_name2, ...)]
    [CLUSTERED BY (col_name3, col_name4, ...) INTO num_buckets BUCKETS]
    [LOCATION path]
    [COMMENT table_comment]
    [TBLPROPERTIES (key1=val1, key2=val2, ...)]
```

The same idea is also applicable to Create Hive Table.
```
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
    [(col_name1[:] col_type1 [COMMENT col_comment1], ...)]
    [COMMENT table_comment]
    [PARTITIONED BY (col_name2[:] col_type2 [COMMENT col_comment2], ...)]
    [ROW FORMAT row_format]
    [STORED AS file_format]
    [LOCATION path]
    [TBLPROPERTIES (key1=val1, key2=val2, ...)]
    [AS select_statement]
```

The proposal is to make the following clauses order insensitive.
```
    [COMMENT table_comment]
    [PARTITIONED BY (col_name2[:] col_type2 [COMMENT col_comment2], ...)]
    [ROW FORMAT row_format]
    [STORED AS file_format]
    [LOCATION path]
    [TBLPROPERTIES (key1=val1, key2=val2, ...)]
```

## How was this patch tested?
Added test cases

Author: gatorsmile <gatorsmile@gmail.com>

Closes #20133 from gatorsmile/createDataSourceTableDDL.
This commit is contained in:
gatorsmile 2018-01-03 22:09:30 +08:00
parent 247a08939d
commit 1a87a1609c
7 changed files with 335 additions and 138 deletions

View file

@ -73,18 +73,22 @@ statement
| ALTER DATABASE identifier SET DBPROPERTIES tablePropertyList #setDatabaseProperties | ALTER DATABASE identifier SET DBPROPERTIES tablePropertyList #setDatabaseProperties
| DROP DATABASE (IF EXISTS)? identifier (RESTRICT | CASCADE)? #dropDatabase | DROP DATABASE (IF EXISTS)? identifier (RESTRICT | CASCADE)? #dropDatabase
| createTableHeader ('(' colTypeList ')')? tableProvider | createTableHeader ('(' colTypeList ')')? tableProvider
(OPTIONS options=tablePropertyList)? ((OPTIONS options=tablePropertyList) |
(PARTITIONED BY partitionColumnNames=identifierList)? (PARTITIONED BY partitionColumnNames=identifierList) |
bucketSpec? locationSpec? bucketSpec |
(COMMENT comment=STRING)? locationSpec |
(TBLPROPERTIES tableProps=tablePropertyList)? (COMMENT comment=STRING) |
(TBLPROPERTIES tableProps=tablePropertyList))*
(AS? query)? #createTable (AS? query)? #createTable
| createTableHeader ('(' columns=colTypeList ')')? | createTableHeader ('(' columns=colTypeList ')')?
(COMMENT comment=STRING)? ((COMMENT comment=STRING) |
(PARTITIONED BY '(' partitionColumns=colTypeList ')')? (PARTITIONED BY '(' partitionColumns=colTypeList ')') |
bucketSpec? skewSpec? bucketSpec |
rowFormat? createFileFormat? locationSpec? skewSpec |
(TBLPROPERTIES tablePropertyList)? rowFormat |
createFileFormat |
locationSpec |
(TBLPROPERTIES tableProps=tablePropertyList))*
(AS? query)? #createHiveTable (AS? query)? #createHiveTable
| CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier
LIKE source=tableIdentifier locationSpec? #createTableLike LIKE source=tableIdentifier locationSpec? #createTableLike

View file

@ -16,6 +16,8 @@
*/ */
package org.apache.spark.sql.catalyst.parser package org.apache.spark.sql.catalyst.parser
import java.util
import scala.collection.mutable.StringBuilder import scala.collection.mutable.StringBuilder
import org.antlr.v4.runtime.{ParserRuleContext, Token} import org.antlr.v4.runtime.{ParserRuleContext, Token}
@ -39,6 +41,13 @@ object ParserUtils {
throw new ParseException(s"Operation not allowed: $message", ctx) throw new ParseException(s"Operation not allowed: $message", ctx)
} }
def checkDuplicateClauses[T](
nodes: util.List[T], clauseName: String, ctx: ParserRuleContext): Unit = {
if (nodes.size() > 1) {
throw new ParseException(s"Found duplicate clauses: $clauseName", ctx)
}
}
/** Check if duplicate keys exist in a set of key-value pairs. */ /** Check if duplicate keys exist in a set of key-value pairs. */
def checkDuplicateKeys[T](keyPairs: Seq[(String, T)], ctx: ParserRuleContext): Unit = { def checkDuplicateKeys[T](keyPairs: Seq[(String, T)], ctx: ParserRuleContext): Unit = {
keyPairs.groupBy(_._1).filter(_._2.size > 1).foreach { case (key, _) => keyPairs.groupBy(_._1).filter(_._2.size > 1).foreach { case (key, _) =>

View file

@ -383,16 +383,19 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
* {{{ * {{{
* CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name * CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name
* USING table_provider * USING table_provider
* [OPTIONS table_property_list] * create_table_clauses
* [PARTITIONED BY (col_name, col_name, ...)]
* [CLUSTERED BY (col_name, col_name, ...)
* [SORTED BY (col_name [ASC|DESC], ...)]
* INTO num_buckets BUCKETS
* ]
* [LOCATION path]
* [COMMENT table_comment]
* [TBLPROPERTIES (property_name=property_value, ...)]
* [[AS] select_statement]; * [[AS] select_statement];
*
* create_table_clauses (order insensitive):
* [OPTIONS table_property_list]
* [PARTITIONED BY (col_name, col_name, ...)]
* [CLUSTERED BY (col_name, col_name, ...)
* [SORTED BY (col_name [ASC|DESC], ...)]
* INTO num_buckets BUCKETS
* ]
* [LOCATION path]
* [COMMENT table_comment]
* [TBLPROPERTIES (property_name=property_value, ...)]
* }}} * }}}
*/ */
override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) { override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) {
@ -400,6 +403,14 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
if (external) { if (external) {
operationNotAllowed("CREATE EXTERNAL TABLE ... USING", ctx) operationNotAllowed("CREATE EXTERNAL TABLE ... USING", ctx)
} }
checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx)
checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty) val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
val provider = ctx.tableProvider.qualifiedName.getText val provider = ctx.tableProvider.qualifiedName.getText
val schema = Option(ctx.colTypeList()).map(createSchema) val schema = Option(ctx.colTypeList()).map(createSchema)
@ -408,9 +419,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
.map(visitIdentifierList(_).toArray) .map(visitIdentifierList(_).toArray)
.getOrElse(Array.empty[String]) .getOrElse(Array.empty[String])
val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty) val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
val bucketSpec = Option(ctx.bucketSpec()).map(visitBucketSpec) val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
val location = Option(ctx.locationSpec).map(visitLocationSpec) val location = ctx.locationSpec.asScala.headOption.map(visitLocationSpec)
val storage = DataSource.buildStorageFormatFromOptions(options) val storage = DataSource.buildStorageFormatFromOptions(options)
if (location.isDefined && storage.locationUri.isDefined) { if (location.isDefined && storage.locationUri.isDefined) {
@ -1087,13 +1098,16 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
* {{{ * {{{
* CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name * CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
* [(col1[:] data_type [COMMENT col_comment], ...)] * [(col1[:] data_type [COMMENT col_comment], ...)]
* [COMMENT table_comment] * create_table_clauses
* [PARTITIONED BY (col2[:] data_type [COMMENT col_comment], ...)]
* [ROW FORMAT row_format]
* [STORED AS file_format]
* [LOCATION path]
* [TBLPROPERTIES (property_name=property_value, ...)]
* [AS select_statement]; * [AS select_statement];
*
* create_table_clauses (order insensitive):
* [COMMENT table_comment]
* [PARTITIONED BY (col2[:] data_type [COMMENT col_comment], ...)]
* [ROW FORMAT row_format]
* [STORED AS file_format]
* [LOCATION path]
* [TBLPROPERTIES (property_name=property_value, ...)]
* }}} * }}}
*/ */
override def visitCreateHiveTable(ctx: CreateHiveTableContext): LogicalPlan = withOrigin(ctx) { override def visitCreateHiveTable(ctx: CreateHiveTableContext): LogicalPlan = withOrigin(ctx) {
@ -1104,15 +1118,23 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
"CREATE TEMPORARY TABLE is not supported yet. " + "CREATE TEMPORARY TABLE is not supported yet. " +
"Please use CREATE TEMPORARY VIEW as an alternative.", ctx) "Please use CREATE TEMPORARY VIEW as an alternative.", ctx)
} }
if (ctx.skewSpec != null) { if (ctx.skewSpec.size > 0) {
operationNotAllowed("CREATE TABLE ... SKEWED BY", ctx) operationNotAllowed("CREATE TABLE ... SKEWED BY", ctx)
} }
checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
checkDuplicateClauses(ctx.createFileFormat, "STORED AS/BY", ctx)
checkDuplicateClauses(ctx.rowFormat, "ROW FORMAT", ctx)
checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
val dataCols = Option(ctx.columns).map(visitColTypeList).getOrElse(Nil) val dataCols = Option(ctx.columns).map(visitColTypeList).getOrElse(Nil)
val partitionCols = Option(ctx.partitionColumns).map(visitColTypeList).getOrElse(Nil) val partitionCols = Option(ctx.partitionColumns).map(visitColTypeList).getOrElse(Nil)
val properties = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty) val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
val selectQuery = Option(ctx.query).map(plan) val selectQuery = Option(ctx.query).map(plan)
val bucketSpec = Option(ctx.bucketSpec()).map(visitBucketSpec) val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
// Note: Hive requires partition columns to be distinct from the schema, so we need // Note: Hive requires partition columns to be distinct from the schema, so we need
// to include the partition columns here explicitly // to include the partition columns here explicitly
@ -1120,12 +1142,12 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
// Storage format // Storage format
val defaultStorage = HiveSerDe.getDefaultStorage(conf) val defaultStorage = HiveSerDe.getDefaultStorage(conf)
validateRowFormatFileFormat(ctx.rowFormat, ctx.createFileFormat, ctx) validateRowFormatFileFormat(ctx.rowFormat.asScala, ctx.createFileFormat.asScala, ctx)
val fileStorage = Option(ctx.createFileFormat).map(visitCreateFileFormat) val fileStorage = ctx.createFileFormat.asScala.headOption.map(visitCreateFileFormat)
.getOrElse(CatalogStorageFormat.empty) .getOrElse(CatalogStorageFormat.empty)
val rowStorage = Option(ctx.rowFormat).map(visitRowFormat) val rowStorage = ctx.rowFormat.asScala.headOption.map(visitRowFormat)
.getOrElse(CatalogStorageFormat.empty) .getOrElse(CatalogStorageFormat.empty)
val location = Option(ctx.locationSpec).map(visitLocationSpec) val location = ctx.locationSpec.asScala.headOption.map(visitLocationSpec)
// If we are creating an EXTERNAL table, then the LOCATION field is required // If we are creating an EXTERNAL table, then the LOCATION field is required
if (external && location.isEmpty) { if (external && location.isEmpty) {
operationNotAllowed("CREATE EXTERNAL TABLE must be accompanied by LOCATION", ctx) operationNotAllowed("CREATE EXTERNAL TABLE must be accompanied by LOCATION", ctx)
@ -1180,7 +1202,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
ctx) ctx)
} }
val hasStorageProperties = (ctx.createFileFormat != null) || (ctx.rowFormat != null) val hasStorageProperties = (ctx.createFileFormat.size != 0) || (ctx.rowFormat.size != 0)
if (conf.convertCTAS && !hasStorageProperties) { if (conf.convertCTAS && !hasStorageProperties) {
// At here, both rowStorage.serdeProperties and fileStorage.serdeProperties // At here, both rowStorage.serdeProperties and fileStorage.serdeProperties
// are empty Maps. // are empty Maps.
@ -1366,6 +1388,15 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
} }
} }
private def validateRowFormatFileFormat(
rowFormatCtx: Seq[RowFormatContext],
createFileFormatCtx: Seq[CreateFileFormatContext],
parentCtx: ParserRuleContext): Unit = {
if (rowFormatCtx.size == 1 && createFileFormatCtx.size == 1) {
validateRowFormatFileFormat(rowFormatCtx.head, createFileFormatCtx.head, parentCtx)
}
}
/** /**
* Create or replace a view. This creates a [[CreateViewCommand]] command. * Create or replace a view. This creates a [[CreateViewCommand]] command.
* *

View file

@ -54,6 +54,13 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
} }
} }
private def intercept(sqlCommand: String, messages: String*): Unit = {
val e = intercept[ParseException](parser.parsePlan(sqlCommand)).getMessage
messages.foreach { message =>
assert(e.contains(message))
}
}
private def parseAs[T: ClassTag](query: String): T = { private def parseAs[T: ClassTag](query: String): T = {
parser.parsePlan(query) match { parser.parsePlan(query) match {
case t: T => t case t: T => t
@ -494,6 +501,37 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
} }
} }
test("Duplicate clauses - create table") {
def createTableHeader(duplicateClause: String, isNative: Boolean): String = {
val fileFormat = if (isNative) "USING parquet" else "STORED AS parquet"
s"CREATE TABLE my_tab(a INT, b STRING) $fileFormat $duplicateClause $duplicateClause"
}
Seq(true, false).foreach { isNative =>
intercept(createTableHeader("TBLPROPERTIES('test' = 'test2')", isNative),
"Found duplicate clauses: TBLPROPERTIES")
intercept(createTableHeader("LOCATION '/tmp/file'", isNative),
"Found duplicate clauses: LOCATION")
intercept(createTableHeader("COMMENT 'a table'", isNative),
"Found duplicate clauses: COMMENT")
intercept(createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS", isNative),
"Found duplicate clauses: CLUSTERED BY")
}
// Only for native data source tables
intercept(createTableHeader("PARTITIONED BY (b)", isNative = true),
"Found duplicate clauses: PARTITIONED BY")
// Only for Hive serde tables
intercept(createTableHeader("PARTITIONED BY (k int)", isNative = false),
"Found duplicate clauses: PARTITIONED BY")
intercept(createTableHeader("STORED AS parquet", isNative = false),
"Found duplicate clauses: STORED AS/BY")
intercept(
createTableHeader("ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'", isNative = false),
"Found duplicate clauses: ROW FORMAT")
}
test("create table - with location") { test("create table - with location") {
val v1 = "CREATE TABLE my_tab(a INT, b STRING) USING parquet LOCATION '/tmp/file'" val v1 = "CREATE TABLE my_tab(a INT, b STRING) USING parquet LOCATION '/tmp/file'"
@ -1153,38 +1191,119 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
} }
} }
test("Test CTAS against data source tables") {
val s1 =
"""
|CREATE TABLE IF NOT EXISTS mydb.page_view
|USING parquet
|COMMENT 'This is the staging page view table'
|LOCATION '/user/external/page_view'
|TBLPROPERTIES ('p1'='v1', 'p2'='v2')
|AS SELECT * FROM src
""".stripMargin
val s2 =
"""
|CREATE TABLE IF NOT EXISTS mydb.page_view
|USING parquet
|LOCATION '/user/external/page_view'
|COMMENT 'This is the staging page view table'
|TBLPROPERTIES ('p1'='v1', 'p2'='v2')
|AS SELECT * FROM src
""".stripMargin
val s3 =
"""
|CREATE TABLE IF NOT EXISTS mydb.page_view
|USING parquet
|COMMENT 'This is the staging page view table'
|LOCATION '/user/external/page_view'
|TBLPROPERTIES ('p1'='v1', 'p2'='v2')
|AS SELECT * FROM src
""".stripMargin
checkParsing(s1)
checkParsing(s2)
checkParsing(s3)
def checkParsing(sql: String): Unit = {
val (desc, exists) = extractTableDesc(sql)
assert(exists)
assert(desc.identifier.database == Some("mydb"))
assert(desc.identifier.table == "page_view")
assert(desc.storage.locationUri == Some(new URI("/user/external/page_view")))
assert(desc.schema.isEmpty) // will be populated later when the table is actually created
assert(desc.comment == Some("This is the staging page view table"))
assert(desc.viewText.isEmpty)
assert(desc.viewDefaultDatabase.isEmpty)
assert(desc.viewQueryColumnNames.isEmpty)
assert(desc.partitionColumnNames.isEmpty)
assert(desc.provider == Some("parquet"))
assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
}
}
test("Test CTAS #1") { test("Test CTAS #1") {
val s1 = val s1 =
"""CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view """
|CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
|COMMENT 'This is the staging page view table' |COMMENT 'This is the staging page view table'
|STORED AS RCFILE |STORED AS RCFILE
|LOCATION '/user/external/page_view' |LOCATION '/user/external/page_view'
|TBLPROPERTIES ('p1'='v1', 'p2'='v2') |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
|AS SELECT * FROM src""".stripMargin |AS SELECT * FROM src
""".stripMargin
val (desc, exists) = extractTableDesc(s1) val s2 =
assert(exists) """
assert(desc.identifier.database == Some("mydb")) |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
assert(desc.identifier.table == "page_view") |STORED AS RCFILE
assert(desc.tableType == CatalogTableType.EXTERNAL) |COMMENT 'This is the staging page view table'
assert(desc.storage.locationUri == Some(new URI("/user/external/page_view"))) |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
assert(desc.schema.isEmpty) // will be populated later when the table is actually created |LOCATION '/user/external/page_view'
assert(desc.comment == Some("This is the staging page view table")) |AS SELECT * FROM src
// TODO will be SQLText """.stripMargin
assert(desc.viewText.isEmpty)
assert(desc.viewDefaultDatabase.isEmpty) val s3 =
assert(desc.viewQueryColumnNames.isEmpty) """
assert(desc.partitionColumnNames.isEmpty) |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat")) |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat")) |LOCATION '/user/external/page_view'
assert(desc.storage.serde == |STORED AS RCFILE
Some("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe")) |COMMENT 'This is the staging page view table'
assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2")) |AS SELECT * FROM src
""".stripMargin
checkParsing(s1)
checkParsing(s2)
checkParsing(s3)
def checkParsing(sql: String): Unit = {
val (desc, exists) = extractTableDesc(sql)
assert(exists)
assert(desc.identifier.database == Some("mydb"))
assert(desc.identifier.table == "page_view")
assert(desc.tableType == CatalogTableType.EXTERNAL)
assert(desc.storage.locationUri == Some(new URI("/user/external/page_view")))
assert(desc.schema.isEmpty) // will be populated later when the table is actually created
assert(desc.comment == Some("This is the staging page view table"))
// TODO will be SQLText
assert(desc.viewText.isEmpty)
assert(desc.viewDefaultDatabase.isEmpty)
assert(desc.viewQueryColumnNames.isEmpty)
assert(desc.partitionColumnNames.isEmpty)
assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
assert(desc.storage.serde ==
Some("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))
assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
}
} }
test("Test CTAS #2") { test("Test CTAS #2") {
val s2 = val s1 =
"""CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view """
|CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
|COMMENT 'This is the staging page view table' |COMMENT 'This is the staging page view table'
|ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe' |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
| STORED AS | STORED AS
@ -1192,26 +1311,45 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
| OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat' | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat'
|LOCATION '/user/external/page_view' |LOCATION '/user/external/page_view'
|TBLPROPERTIES ('p1'='v1', 'p2'='v2') |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
|AS SELECT * FROM src""".stripMargin |AS SELECT * FROM src
""".stripMargin
val (desc, exists) = extractTableDesc(s2) val s2 =
assert(exists) """
assert(desc.identifier.database == Some("mydb")) |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
assert(desc.identifier.table == "page_view") |LOCATION '/user/external/page_view'
assert(desc.tableType == CatalogTableType.EXTERNAL) |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
assert(desc.storage.locationUri == Some(new URI("/user/external/page_view"))) |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
assert(desc.schema.isEmpty) // will be populated later when the table is actually created | STORED AS
// TODO will be SQLText | INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'
assert(desc.comment == Some("This is the staging page view table")) | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat'
assert(desc.viewText.isEmpty) |COMMENT 'This is the staging page view table'
assert(desc.viewDefaultDatabase.isEmpty) |AS SELECT * FROM src
assert(desc.viewQueryColumnNames.isEmpty) """.stripMargin
assert(desc.partitionColumnNames.isEmpty)
assert(desc.storage.properties == Map()) checkParsing(s1)
assert(desc.storage.inputFormat == Some("parquet.hive.DeprecatedParquetInputFormat")) checkParsing(s2)
assert(desc.storage.outputFormat == Some("parquet.hive.DeprecatedParquetOutputFormat"))
assert(desc.storage.serde == Some("parquet.hive.serde.ParquetHiveSerDe")) def checkParsing(sql: String): Unit = {
assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2")) val (desc, exists) = extractTableDesc(sql)
assert(exists)
assert(desc.identifier.database == Some("mydb"))
assert(desc.identifier.table == "page_view")
assert(desc.tableType == CatalogTableType.EXTERNAL)
assert(desc.storage.locationUri == Some(new URI("/user/external/page_view")))
assert(desc.schema.isEmpty) // will be populated later when the table is actually created
// TODO will be SQLText
assert(desc.comment == Some("This is the staging page view table"))
assert(desc.viewText.isEmpty)
assert(desc.viewDefaultDatabase.isEmpty)
assert(desc.viewQueryColumnNames.isEmpty)
assert(desc.partitionColumnNames.isEmpty)
assert(desc.storage.properties == Map())
assert(desc.storage.inputFormat == Some("parquet.hive.DeprecatedParquetInputFormat"))
assert(desc.storage.outputFormat == Some("parquet.hive.DeprecatedParquetOutputFormat"))
assert(desc.storage.serde == Some("parquet.hive.serde.ParquetHiveSerDe"))
assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
}
} }
test("Test CTAS #3") { test("Test CTAS #3") {

View file

@ -1971,8 +1971,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
s""" s"""
|CREATE TABLE t(a int, b int, c int, d int) |CREATE TABLE t(a int, b int, c int, d int)
|USING parquet |USING parquet
|PARTITIONED BY(a, b)
|LOCATION "${dir.toURI}" |LOCATION "${dir.toURI}"
|PARTITIONED BY(a, b)
""".stripMargin) """.stripMargin)
spark.sql("INSERT INTO TABLE t PARTITION(a=1, b=2) SELECT 3, 4") spark.sql("INSERT INTO TABLE t PARTITION(a=1, b=2) SELECT 3, 4")
checkAnswer(spark.table("t"), Row(3, 4, 1, 2) :: Nil) checkAnswer(spark.table("t"), Row(3, 4, 1, 2) :: Nil)

View file

@ -875,12 +875,13 @@ class HiveDDLSuite
test("desc table for Hive table - bucketed + sorted table") { test("desc table for Hive table - bucketed + sorted table") {
withTable("tbl") { withTable("tbl") {
sql(s""" sql(
CREATE TABLE tbl (id int, name string) s"""
PARTITIONED BY (ds string) |CREATE TABLE tbl (id int, name string)
CLUSTERED BY(id) |CLUSTERED BY(id)
SORTED BY(id, name) INTO 1024 BUCKETS |SORTED BY(id, name) INTO 1024 BUCKETS
""") |PARTITIONED BY (ds string)
""".stripMargin)
val x = sql("DESC FORMATTED tbl").collect() val x = sql("DESC FORMATTED tbl").collect()
assert(x.containsSlice( assert(x.containsSlice(

View file

@ -461,51 +461,55 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
} }
test("CTAS without serde without location") { test("CTAS without serde without location") {
val originalConf = sessionState.conf.convertCTAS withSQLConf(SQLConf.CONVERT_CTAS.key -> "true") {
val defaultDataSource = sessionState.conf.defaultDataSourceName
setConf(SQLConf.CONVERT_CTAS, true) withTable("ctas1") {
val defaultDataSource = sessionState.conf.defaultDataSourceName
try {
sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
sql("CREATE TABLE IF NOT EXISTS ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
val message = intercept[AnalysisException] {
sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value") sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
}.getMessage sql("CREATE TABLE IF NOT EXISTS ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
assert(message.contains("already exists")) val message = intercept[AnalysisException] {
checkRelation("ctas1", true, defaultDataSource) sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
sql("DROP TABLE ctas1") }.getMessage
assert(message.contains("already exists"))
checkRelation("ctas1", isDataSourceTable = true, defaultDataSource)
}
// Specifying database name for query can be converted to data source write path // Specifying database name for query can be converted to data source write path
// is not allowed right now. // is not allowed right now.
sql("CREATE TABLE default.ctas1 AS SELECT key k, value FROM src ORDER BY k, value") withTable("ctas1") {
checkRelation("ctas1", true, defaultDataSource) sql("CREATE TABLE default.ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
sql("DROP TABLE ctas1") checkRelation("ctas1", isDataSourceTable = true, defaultDataSource)
}
sql("CREATE TABLE ctas1 stored as textfile" + withTable("ctas1") {
sql("CREATE TABLE ctas1 stored as textfile" +
" AS SELECT key k, value FROM src ORDER BY k, value") " AS SELECT key k, value FROM src ORDER BY k, value")
checkRelation("ctas1", false, "text") checkRelation("ctas1", isDataSourceTable = false, "text")
sql("DROP TABLE ctas1") }
sql("CREATE TABLE ctas1 stored as sequencefile" + withTable("ctas1") {
" AS SELECT key k, value FROM src ORDER BY k, value") sql("CREATE TABLE ctas1 stored as sequencefile" +
checkRelation("ctas1", false, "sequence") " AS SELECT key k, value FROM src ORDER BY k, value")
sql("DROP TABLE ctas1") checkRelation("ctas1", isDataSourceTable = false, "sequence")
}
sql("CREATE TABLE ctas1 stored as rcfile AS SELECT key k, value FROM src ORDER BY k, value") withTable("ctas1") {
checkRelation("ctas1", false, "rcfile") sql("CREATE TABLE ctas1 stored as rcfile AS SELECT key k, value FROM src ORDER BY k, value")
sql("DROP TABLE ctas1") checkRelation("ctas1", isDataSourceTable = false, "rcfile")
}
sql("CREATE TABLE ctas1 stored as orc AS SELECT key k, value FROM src ORDER BY k, value") withTable("ctas1") {
checkRelation("ctas1", false, "orc") sql("CREATE TABLE ctas1 stored as orc AS SELECT key k, value FROM src ORDER BY k, value")
sql("DROP TABLE ctas1") checkRelation("ctas1", isDataSourceTable = false, "orc")
}
sql("CREATE TABLE ctas1 stored as parquet AS SELECT key k, value FROM src ORDER BY k, value") withTable("ctas1") {
checkRelation("ctas1", false, "parquet") sql(
sql("DROP TABLE ctas1") """
} finally { |CREATE TABLE ctas1 stored as parquet
setConf(SQLConf.CONVERT_CTAS, originalConf) |AS SELECT key k, value FROM src ORDER BY k, value
sql("DROP TABLE IF EXISTS ctas1") """.stripMargin)
checkRelation("ctas1", isDataSourceTable = false, "parquet")
}
} }
} }
@ -539,30 +543,40 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
val defaultDataSource = sessionState.conf.defaultDataSourceName val defaultDataSource = sessionState.conf.defaultDataSourceName
val tempLocation = dir.toURI.getPath.stripSuffix("/") val tempLocation = dir.toURI.getPath.stripSuffix("/")
sql(s"CREATE TABLE ctas1 LOCATION 'file:$tempLocation/c1'" + withTable("ctas1") {
" AS SELECT key k, value FROM src ORDER BY k, value") sql(s"CREATE TABLE ctas1 LOCATION 'file:$tempLocation/c1'" +
checkRelation("ctas1", true, defaultDataSource, Some(s"file:$tempLocation/c1")) " AS SELECT key k, value FROM src ORDER BY k, value")
sql("DROP TABLE ctas1") checkRelation(
"ctas1", isDataSourceTable = true, defaultDataSource, Some(s"file:$tempLocation/c1"))
}
sql(s"CREATE TABLE ctas1 LOCATION 'file:$tempLocation/c2'" + withTable("ctas1") {
" AS SELECT key k, value FROM src ORDER BY k, value") sql(s"CREATE TABLE ctas1 LOCATION 'file:$tempLocation/c2'" +
checkRelation("ctas1", true, defaultDataSource, Some(s"file:$tempLocation/c2")) " AS SELECT key k, value FROM src ORDER BY k, value")
sql("DROP TABLE ctas1") checkRelation(
"ctas1", isDataSourceTable = true, defaultDataSource, Some(s"file:$tempLocation/c2"))
}
sql(s"CREATE TABLE ctas1 stored as textfile LOCATION 'file:$tempLocation/c3'" + withTable("ctas1") {
" AS SELECT key k, value FROM src ORDER BY k, value") sql(s"CREATE TABLE ctas1 stored as textfile LOCATION 'file:$tempLocation/c3'" +
checkRelation("ctas1", false, "text", Some(s"file:$tempLocation/c3")) " AS SELECT key k, value FROM src ORDER BY k, value")
sql("DROP TABLE ctas1") checkRelation(
"ctas1", isDataSourceTable = false, "text", Some(s"file:$tempLocation/c3"))
}
sql(s"CREATE TABLE ctas1 stored as sequenceFile LOCATION 'file:$tempLocation/c4'" + withTable("ctas1") {
" AS SELECT key k, value FROM src ORDER BY k, value") sql(s"CREATE TABLE ctas1 stored as sequenceFile LOCATION 'file:$tempLocation/c4'" +
checkRelation("ctas1", false, "sequence", Some(s"file:$tempLocation/c4")) " AS SELECT key k, value FROM src ORDER BY k, value")
sql("DROP TABLE ctas1") checkRelation(
"ctas1", isDataSourceTable = false, "sequence", Some(s"file:$tempLocation/c4"))
}
sql(s"CREATE TABLE ctas1 stored as rcfile LOCATION 'file:$tempLocation/c5'" + withTable("ctas1") {
" AS SELECT key k, value FROM src ORDER BY k, value") sql(s"CREATE TABLE ctas1 stored as rcfile LOCATION 'file:$tempLocation/c5'" +
checkRelation("ctas1", false, "rcfile", Some(s"file:$tempLocation/c5")) " AS SELECT key k, value FROM src ORDER BY k, value")
sql("DROP TABLE ctas1") checkRelation(
"ctas1", isDataSourceTable = false, "rcfile", Some(s"file:$tempLocation/c5"))
}
} }
} }
} }