[SPARK-27946][SQL] Hive DDL to Spark DDL conversion USING "show create table"

## What changes were proposed in this pull request?

This patch adds a DDL command `SHOW CREATE TABLE AS SERDE`. It is used to generate Hive DDL for a Hive table.

For original `SHOW CREATE TABLE`, it now shows Spark DDL always. If given a Hive table, it tries to generate Spark DDL.

For Hive serde to data source conversion, this uses the existing mapping inside `HiveSerDe`. If can't find a mapping there, throws an analysis exception on unsupported serde configuration.

It is arguably that some Hive fileformat + row serde might be mapped to Spark data source, e.g., CSV. It is not included in this PR. To be conservative, it may not be supported.

For Hive serde properties, for now this doesn't save it to Spark DDL because it may not useful to keep Hive serde properties in Spark table.

## How was this patch tested?

Added test.

Closes #24938 from viirya/SPARK-27946.

Lead-authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Co-authored-by: Liang-Chi Hsieh <liangchi@uber.com>
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
This commit is contained in:
Liang-Chi Hsieh 2020-01-31 19:55:25 -08:00 committed by Xiao Li
parent d0c3e9f1f7
commit 8eecc20b11
11 changed files with 581 additions and 124 deletions

View file

@ -328,6 +328,8 @@ license: |
- Since Spark 3.0, `SHOW TBLPROPERTIES` will cause `AnalysisException` if the table does not exist. In Spark version 2.4 and earlier, this scenario caused `NoSuchTableException`. Also, `SHOW TBLPROPERTIES` on a temporary view will cause `AnalysisException`. In Spark version 2.4 and earlier, it returned an empty result.
- Since Spark 3.0, `SHOW CREATE TABLE` will always return Spark DDL, even when the given table is a Hive serde table. For Hive DDL, please use `SHOW CREATE TABLE AS SERDE` command instead.
## Upgrading from Spark SQL 2.4.4 to 2.4.5
- Since Spark 2.4.5, `TRUNCATE TABLE` command tries to set back original permission and ACLs during re-creating the table/partition paths. To restore the behaviour of earlier versions, set `spark.sql.truncateTable.ignorePermissionAcl.enabled` to `true`.

View file

@ -210,7 +210,7 @@ statement
| SHOW PARTITIONS multipartIdentifier partitionSpec? #showPartitions
| SHOW identifier? FUNCTIONS
(LIKE? (multipartIdentifier | pattern=STRING))? #showFunctions
| SHOW CREATE TABLE multipartIdentifier #showCreateTable
| SHOW CREATE TABLE multipartIdentifier (AS SERDE)? #showCreateTable
| SHOW CURRENT NAMESPACE #showCurrentNamespace
| (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction
| (DESC | DESCRIBE) namespace EXTENDED?

View file

@ -3215,7 +3215,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
* Creates a [[ShowCreateTableStatement]]
*/
override def visitShowCreateTable(ctx: ShowCreateTableContext): LogicalPlan = withOrigin(ctx) {
ShowCreateTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier()))
ShowCreateTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier()), ctx.SERDE != null)
}
/**

View file

@ -389,7 +389,9 @@ case class LoadDataStatement(
/**
* A SHOW CREATE TABLE statement, as parsed from SQL.
*/
case class ShowCreateTableStatement(tableName: Seq[String]) extends ParsedStatement
case class ShowCreateTableStatement(
tableName: Seq[String],
asSerde: Boolean = false) extends ParsedStatement
/**
* A CACHE TABLE statement, as parsed from SQL

View file

@ -378,10 +378,14 @@ class ResolveSessionCatalog(
isOverwrite,
partition)
case ShowCreateTableStatement(tbl) =>
case ShowCreateTableStatement(tbl, asSerde) if !asSerde =>
val v1TableName = parseV1Table(tbl, "SHOW CREATE TABLE")
ShowCreateTableCommand(v1TableName.asTableIdentifier)
case ShowCreateTableStatement(tbl, asSerde) if asSerde =>
val v1TableName = parseV1Table(tbl, "SHOW CREATE TABLE AS SERDE")
ShowCreateTableAsSerdeCommand(v1TableName.asTableIdentifier)
case CacheTableStatement(tbl, plan, isLazy, options) =>
val v1TableName = parseV1Table(tbl, "CACHE TABLE")
CacheTableCommand(v1TableName.asTableIdentifier, plan, isLazy, options)

View file

@ -44,7 +44,7 @@ import org.apache.spark.sql.execution.datasources.v2.csv.CSVDataSourceV2
import org.apache.spark.sql.execution.datasources.v2.json.JsonDataSourceV2
import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2
import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
import org.apache.spark.sql.types._
import org.apache.spark.sql.util.SchemaUtils
@ -1002,7 +1002,57 @@ case class ShowPartitionsCommand(
}
}
case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableCommand {
/**
* Provides common utilities between `ShowCreateTableCommand` and `ShowCreateTableAsSparkCommand`.
*/
trait ShowCreateTableCommandBase {
protected val table: TableIdentifier
protected def showTableLocation(metadata: CatalogTable, builder: StringBuilder): Unit = {
if (metadata.tableType == EXTERNAL) {
metadata.storage.locationUri.foreach { location =>
builder ++= s"LOCATION '${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'\n"
}
}
}
protected def showTableComment(metadata: CatalogTable, builder: StringBuilder): Unit = {
metadata
.comment
.map("COMMENT '" + escapeSingleQuotedString(_) + "'\n")
.foreach(builder.append)
}
protected def showTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = {
if (metadata.properties.nonEmpty) {
val props = metadata.properties.map { case (key, value) =>
s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
}
builder ++= "TBLPROPERTIES "
builder ++= concatByMultiLines(props)
}
}
protected def concatByMultiLines(iter: Iterable[String]): String = {
iter.mkString("(\n ", ",\n ", ")\n")
}
}
/**
* A command that shows the Spark DDL syntax that can be used to create a given table.
* For Hive serde table, this command will generate Spark DDL that can be used to
* create corresponding Spark table.
*
* The syntax of using this command in SQL is:
* {{{
* SHOW CREATE TABLE [db_name.]table_name
* }}}
*/
case class ShowCreateTableCommand(table: TableIdentifier)
extends RunnableCommand with ShowCreateTableCommandBase {
override val output: Seq[Attribute] = Seq(
AttributeReference("createtab_stmt", StringType, nullable = false)()
)
@ -1017,15 +1067,153 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
// TODO: [SPARK-28692] unify this after we unify the
// CREATE TABLE syntax for hive serde and data source table.
val metadata = if (DDLUtils.isDatasourceTable(tableMetadata)) {
tableMetadata
} else {
// For a Hive serde table, we try to convert it to Spark DDL.
if (tableMetadata.unsupportedFeatures.nonEmpty) {
throw new AnalysisException(
"Failed to execute SHOW CREATE TABLE against table " +
s"${tableMetadata.identifier}, which is created by Hive and uses the " +
"following unsupported feature(s)\n" +
tableMetadata.unsupportedFeatures.map(" - " + _).mkString("\n")
)
}
if (tableMetadata.tableType == VIEW) {
throw new AnalysisException("Hive view isn't supported by SHOW CREATE TABLE")
}
if ("true".equalsIgnoreCase(tableMetadata.properties.getOrElse("transactional", "false"))) {
throw new AnalysisException(
"SHOW CREATE TABLE doesn't support transactional Hive table")
}
convertTableMetadata(tableMetadata)
}
val stmt = showCreateDataSourceTable(metadata)
Seq(Row(stmt))
}
}
private def convertTableMetadata(tableMetadata: CatalogTable): CatalogTable = {
val hiveSerde = HiveSerDe(
serde = tableMetadata.storage.serde,
inputFormat = tableMetadata.storage.inputFormat,
outputFormat = tableMetadata.storage.outputFormat)
// Looking for Spark data source that maps to to the Hive serde.
// TODO: some Hive fileformat + row serde might be mapped to Spark data source, e.g. CSV.
val source = HiveSerDe.serdeToSource(hiveSerde)
if (source.isEmpty) {
val builder = StringBuilder.newBuilder
hiveSerde.serde.foreach { serde =>
builder ++= s" SERDE: $serde"
}
hiveSerde.inputFormat.foreach { format =>
builder ++= s" INPUTFORMAT: $format"
}
hiveSerde.outputFormat.foreach { format =>
builder ++= s" OUTPUTFORMAT: $format"
}
throw new AnalysisException(
"Failed to execute SHOW CREATE TABLE against table " +
s"${tableMetadata.identifier}, which is created by Hive and uses the " +
"following unsupported serde configuration\n" +
builder.toString()
)
} else {
// TODO: should we keep Hive serde properties?
val newStorage = tableMetadata.storage.copy(properties = Map.empty)
tableMetadata.copy(provider = source, storage = newStorage)
}
}
private def showDataSourceTableDataColumns(
metadata: CatalogTable, builder: StringBuilder): Unit = {
val columns = metadata.schema.fields.map(_.toDDL)
builder ++= concatByMultiLines(columns)
}
private def showDataSourceTableOptions(metadata: CatalogTable, builder: StringBuilder): Unit = {
// For datasource table, there is a provider there in the metadata.
// If it is a Hive table, we already convert its metadata and fill in a provider.
builder ++= s"USING ${metadata.provider.get}\n"
val dataSourceOptions = SQLConf.get.redactOptions(metadata.storage.properties).map {
case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'"
}
if (dataSourceOptions.nonEmpty) {
builder ++= "OPTIONS "
builder ++= concatByMultiLines(dataSourceOptions)
}
}
private def showDataSourceTableNonDataColumns(
metadata: CatalogTable, builder: StringBuilder): Unit = {
val partCols = metadata.partitionColumnNames
if (partCols.nonEmpty) {
builder ++= s"PARTITIONED BY ${partCols.mkString("(", ", ", ")")}\n"
}
metadata.bucketSpec.foreach { spec =>
if (spec.bucketColumnNames.nonEmpty) {
builder ++= s"CLUSTERED BY ${spec.bucketColumnNames.mkString("(", ", ", ")")}\n"
if (spec.sortColumnNames.nonEmpty) {
builder ++= s"SORTED BY ${spec.sortColumnNames.mkString("(", ", ", ")")}\n"
}
builder ++= s"INTO ${spec.numBuckets} BUCKETS\n"
}
}
}
private def showCreateDataSourceTable(metadata: CatalogTable): String = {
val builder = StringBuilder.newBuilder
builder ++= s"CREATE TABLE ${table.quotedString} "
showDataSourceTableDataColumns(metadata, builder)
showDataSourceTableOptions(metadata, builder)
showDataSourceTableNonDataColumns(metadata, builder)
showTableComment(metadata, builder)
showTableLocation(metadata, builder)
showTableProperties(metadata, builder)
builder.toString()
}
}
/**
* This commands generates the DDL for Hive serde table.
*
* The syntax of using this command in SQL is:
* {{{
* SHOW CREATE TABLE table_identifier AS SERDE;
* }}}
*/
case class ShowCreateTableAsSerdeCommand(table: TableIdentifier)
extends RunnableCommand with ShowCreateTableCommandBase {
override val output: Seq[Attribute] = Seq(
AttributeReference("createtab_stmt", StringType, nullable = false)()
)
override def run(sparkSession: SparkSession): Seq[Row] = {
val catalog = sparkSession.sessionState.catalog
val tableMetadata = catalog.getTableMetadata(table)
val stmt = if (DDLUtils.isDatasourceTable(tableMetadata)) {
showCreateDataSourceTable(tableMetadata)
throw new AnalysisException(
s"$table is a Spark data source table. Use `SHOW CREATE TABLE` without `AS SERDE` instead.")
} else {
showCreateHiveTable(tableMetadata)
}
Seq(Row(stmt))
}
}
private def showCreateHiveTable(metadata: CatalogTable): String = {
def reportUnsupportedError(features: Seq[String]): Unit = {
@ -1055,12 +1243,12 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
if (metadata.tableType == VIEW) {
showViewDataColumns(metadata, builder)
showComment(metadata, builder)
showTableComment(metadata, builder)
showViewProperties(metadata, builder)
showViewText(metadata, builder)
} else {
showHiveTableHeader(metadata, builder)
showComment(metadata, builder)
showTableComment(metadata, builder)
showHiveTableNonDataColumns(metadata, builder)
showHiveTableStorageInfo(metadata, builder)
showTableLocation(metadata, builder)
@ -1084,10 +1272,6 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
}
}
private def concatByMultiLines(iter: Iterable[String]): String = {
iter.mkString("(\n ", ",\n ", ")\n")
}
private def showViewProperties(metadata: CatalogTable, builder: StringBuilder): Unit = {
val viewProps = metadata.properties.filterKeys(!_.startsWith(CatalogTable.VIEW_PREFIX))
if (viewProps.nonEmpty) {
@ -1136,7 +1320,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
storage.serde.foreach { serde =>
builder ++= s"ROW FORMAT SERDE '$serde'\n"
val serdeProps = metadata.storage.properties.map {
val serdeProps = SQLConf.get.redactOptions(metadata.storage.properties).map {
case (key, value) =>
s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
}
@ -1156,81 +1340,4 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
}
}
}
private def showTableLocation(metadata: CatalogTable, builder: StringBuilder): Unit = {
if (metadata.tableType == EXTERNAL) {
metadata.storage.locationUri.foreach { location =>
builder ++= s"LOCATION '${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'\n"
}
}
}
private def showComment(metadata: CatalogTable, builder: StringBuilder): Unit = {
metadata
.comment
.map("COMMENT '" + escapeSingleQuotedString(_) + "'\n")
.foreach(builder.append)
}
private def showTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = {
if (metadata.properties.nonEmpty) {
val props = metadata.properties.map { case (key, value) =>
s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
}
builder ++= s"TBLPROPERTIES ${concatByMultiLines(props)}"
}
}
private def showCreateDataSourceTable(metadata: CatalogTable): String = {
val builder = StringBuilder.newBuilder
builder ++= s"CREATE TABLE ${table.quotedString} "
showDataSourceTableDataColumns(metadata, builder)
showDataSourceTableOptions(metadata, builder)
showDataSourceTableNonDataColumns(metadata, builder)
showComment(metadata, builder)
showTableLocation(metadata, builder)
showTableProperties(metadata, builder)
builder.toString()
}
private def showDataSourceTableDataColumns(
metadata: CatalogTable, builder: StringBuilder): Unit = {
val columns = metadata.schema.fields.map(_.toDDL)
builder ++= concatByMultiLines(columns)
}
private def showDataSourceTableOptions(metadata: CatalogTable, builder: StringBuilder): Unit = {
builder ++= s"USING ${metadata.provider.get}\n"
val dataSourceOptions = SQLConf.get.redactOptions(metadata.storage.properties).map {
case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'"
}
if (dataSourceOptions.nonEmpty) {
builder ++= s"OPTIONS ${concatByMultiLines(dataSourceOptions)}"
}
}
private def showDataSourceTableNonDataColumns(
metadata: CatalogTable, builder: StringBuilder): Unit = {
val partCols = metadata.partitionColumnNames
if (partCols.nonEmpty) {
builder ++= s"PARTITIONED BY ${partCols.mkString("(", ", ", ")")}\n"
}
metadata.bucketSpec.foreach { spec =>
if (spec.bucketColumnNames.nonEmpty) {
builder ++= s"CLUSTERED BY ${spec.bucketColumnNames.mkString("(", ", ", ")")}\n"
if (spec.sortColumnNames.nonEmpty) {
builder ++= s"SORTED BY ${spec.sortColumnNames.mkString("(", ", ", ")")}\n"
}
builder ++= s"INTO ${spec.numBuckets} BUCKETS\n"
}
}
}
}

View file

@ -65,6 +65,14 @@ object HiveSerDe {
outputFormat = Option("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat"),
serde = Option("org.apache.hadoop.hive.serde2.avro.AvroSerDe")))
// `HiveSerDe` in `serdeMap` should be dintinct.
val serdeInverseMap: Map[HiveSerDe, String] = serdeMap.flatMap {
case ("sequencefile", _) => None
case ("rcfile", _) => None
case ("textfile", serde) => Some((serde, "text"))
case pair => Some(pair.swap)
}
/**
* Get the Hive SerDe information from the data source abbreviation string or classname.
*
@ -88,6 +96,14 @@ object HiveSerDe {
serdeMap.get(key)
}
/**
* Get the Spark data source name from the Hive SerDe information.
*
* @param serde Hive SerDe information.
* @return Spark data source name associated with the specified Hive Serde.
*/
def serdeToSource(serde: HiveSerDe): Option[String] = serdeInverseMap.get(serde)
def getDefaultStorage(conf: SQLConf): CatalogStorageFormat = {
// To respect hive-site.xml, it peeks Hadoop configuration from existing Spark session,
// as an easy workaround. See SPARK-27555.

View file

@ -73,7 +73,7 @@ CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet;
CREATE VIEW view_SPARK_30302 (aaa, bbb)
AS SELECT a, b FROM tbl;
SHOW CREATE TABLE view_SPARK_30302;
SHOW CREATE TABLE view_SPARK_30302 AS SERDE;
DROP VIEW view_SPARK_30302;
@ -82,7 +82,7 @@ CREATE VIEW view_SPARK_30302 (aaa COMMENT 'comment with \'quoted text\' for aaa'
COMMENT 'This is a comment with \'quoted text\' for view'
AS SELECT a, b FROM tbl;
SHOW CREATE TABLE view_SPARK_30302;
SHOW CREATE TABLE view_SPARK_30302 AS SERDE;
DROP VIEW view_SPARK_30302;
@ -91,6 +91,13 @@ CREATE VIEW view_SPARK_30302 (aaa, bbb)
TBLPROPERTIES ('a' = '1', 'b' = '2')
AS SELECT a, b FROM tbl;
SHOW CREATE TABLE view_SPARK_30302 AS SERDE;
DROP VIEW view_SPARK_30302;
-- SHOW CREATE TABLE does not support view
CREATE VIEW view_SPARK_30302 (aaa, bbb)
AS SELECT a, b FROM tbl;
SHOW CREATE TABLE view_SPARK_30302;
DROP VIEW view_SPARK_30302;

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 38
-- Number of queries: 41
-- !query
@ -291,7 +291,7 @@ struct<>
-- !query
SHOW CREATE TABLE view_SPARK_30302
SHOW CREATE TABLE view_SPARK_30302 AS SERDE
-- !query schema
struct<createtab_stmt:string>
-- !query output
@ -320,7 +320,7 @@ struct<>
-- !query
SHOW CREATE TABLE view_SPARK_30302
SHOW CREATE TABLE view_SPARK_30302 AS SERDE
-- !query schema
struct<createtab_stmt:string>
-- !query output
@ -350,7 +350,7 @@ struct<>
-- !query
SHOW CREATE TABLE view_SPARK_30302
SHOW CREATE TABLE view_SPARK_30302 AS SERDE
-- !query schema
struct<createtab_stmt:string>
-- !query output
@ -371,6 +371,32 @@ struct<>
-- !query
CREATE VIEW view_SPARK_30302 (aaa, bbb)
AS SELECT a, b FROM tbl
-- !query schema
struct<>
-- !query output
-- !query
SHOW CREATE TABLE view_SPARK_30302
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Hive view isn't supported by SHOW CREATE TABLE;
-- !query
DROP VIEW view_SPARK_30302
-- !query schema
struct<>
-- !query output
-- !query
DROP TABLE tbl
-- !query schema

View file

@ -148,20 +148,6 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
}
}
test("view") {
withView("v1") {
sql("CREATE VIEW v1 AS SELECT 1 AS a")
checkCreateView("v1")
}
}
test("view with output columns") {
withView("v1") {
sql("CREATE VIEW v1 (b) AS SELECT 1 AS a")
checkCreateView("v1")
}
}
test("temp view") {
val viewName = "spark_28383"
withTempView(viewName) {
@ -225,7 +211,7 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
}
}
private def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = {
protected def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = {
def normalize(table: CatalogTable): CatalogTable = {
val nondeterministicProps = Set(
"CreateTime",

View file

@ -18,8 +18,10 @@
package org.apache.spark.sql.hive
import org.apache.spark.sql.{AnalysisException, ShowCreateTableSuite}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSingleton {
@ -38,6 +40,20 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
super.afterAll()
}
test("view") {
withView("v1") {
sql("CREATE VIEW v1 AS SELECT 1 AS a")
checkCreateHiveTableOrView("v1", "VIEW")
}
}
test("view with output columns") {
withView("v1") {
sql("CREATE VIEW v1 (b) AS SELECT 1 AS a")
checkCreateHiveTableOrView("v1", "VIEW")
}
}
test("simple hive table") {
withTable("t1") {
sql(
@ -52,7 +68,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
""".stripMargin
)
checkCreateTable("t1")
checkCreateHiveTableOrView("t1")
}
}
@ -72,7 +88,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
""".stripMargin
)
checkCreateTable("t1")
checkCreateHiveTableOrView("t1")
}
}
}
@ -92,7 +108,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
""".stripMargin
)
checkCreateTable("t1")
checkCreateHiveTableOrView("t1")
}
}
@ -110,7 +126,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
""".stripMargin
)
checkCreateTable("t1")
checkCreateHiveTableOrView("t1")
}
}
@ -125,7 +141,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
""".stripMargin
)
checkCreateTable("t1")
checkCreateHiveTableOrView("t1")
}
}
@ -147,7 +163,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
""".stripMargin
)
checkCreateTable("t1")
checkCreateHiveTableOrView("t1")
}
}
@ -160,7 +176,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
|INTO 2 BUCKETS
""".stripMargin
)
checkCreateTable("t1")
checkCreateHiveTableOrView("t1")
}
}
@ -188,18 +204,44 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
}
assert(cause.getMessage.contains(" - partitioned view"))
val causeForSpark = intercept[AnalysisException] {
sql("SHOW CREATE TABLE v1 AS SERDE")
}
assert(causeForSpark.getMessage.contains(" - partitioned view"))
}
}
}
test("SPARK-24911: keep quotes for nested fields in hive") {
withTable("t1") {
val createTable = "CREATE TABLE `t1`(`a` STRUCT<`b`: STRING>) USING hive"
val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>) USING hive"
sql(createTable)
val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
assert(shownDDL == createTable.dropRight(" USING hive".length))
checkCreateTable("t1")
checkCreateHiveTableOrView("t1")
}
}
/**
* This method compares the given table with the table created by the DDL generated by
* `SHOW CREATE TABLE AS SERDE`.
*/
private def checkCreateHiveTableOrView(tableName: String, checkType: String = "TABLE"): Unit = {
val table = TableIdentifier(tableName, Some("default"))
val db = table.database.getOrElse("default")
val expected = spark.sharedState.externalCatalog.getTable(db, table.table)
val shownDDL = sql(s"SHOW CREATE TABLE ${table.quotedString} AS SERDE").head().getString(0)
sql(s"DROP $checkType ${table.quotedString}")
try {
sql(shownDDL)
val actual = spark.sharedState.externalCatalog.getTable(db, table.table)
checkCatalogTables(expected, actual)
} finally {
sql(s"DROP $checkType IF EXISTS ${table.table}")
}
}
@ -207,4 +249,269 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
hiveContext.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog]
.client.runSqlHive(ddl)
}
private def checkCreateSparkTableAsHive(tableName: String): Unit = {
val table = TableIdentifier(tableName, Some("default"))
val db = table.database.get
val hiveTable = spark.sharedState.externalCatalog.getTable(db, table.table)
val sparkDDL = sql(s"SHOW CREATE TABLE ${table.quotedString}").head().getString(0)
// Drops original Hive table.
sql(s"DROP TABLE ${table.quotedString}")
try {
// Creates Spark datasource table using generated Spark DDL.
sql(sparkDDL)
val sparkTable = spark.sharedState.externalCatalog.getTable(db, table.table)
checkHiveCatalogTables(hiveTable, sparkTable)
} finally {
sql(s"DROP TABLE IF EXISTS ${table.table}")
}
}
private def checkHiveCatalogTables(hiveTable: CatalogTable, sparkTable: CatalogTable): Unit = {
def normalize(table: CatalogTable): CatalogTable = {
val nondeterministicProps = Set(
"CreateTime",
"transient_lastDdlTime",
"grantTime",
"lastUpdateTime",
"last_modified_by",
"last_modified_time",
"Owner:",
// The following are hive specific schema parameters which we do not need to match exactly.
"totalNumberFiles",
"maxFileSize",
"minFileSize"
)
table.copy(
createTime = 0L,
lastAccessTime = 0L,
properties = table.properties.filterKeys(!nondeterministicProps.contains(_)),
stats = None,
ignoredProperties = Map.empty,
storage = table.storage.copy(properties = Map.empty),
provider = None,
tracksPartitionsInCatalog = false
)
}
def fillSerdeFromProvider(table: CatalogTable): CatalogTable = {
table.provider.flatMap(HiveSerDe.sourceToSerDe(_)).map { hiveSerde =>
val newStorage = table.storage.copy(
inputFormat = hiveSerde.inputFormat,
outputFormat = hiveSerde.outputFormat,
serde = hiveSerde.serde
)
table.copy(storage = newStorage)
}.getOrElse(table)
}
assert(normalize(fillSerdeFromProvider(sparkTable)) == normalize(hiveTable))
}
test("simple hive table in Spark DDL") {
withTable("t1") {
sql(
s"""
|CREATE TABLE t1 (
| c1 STRING COMMENT 'bla',
| c2 STRING
|)
|TBLPROPERTIES (
| 'prop1' = 'value1',
| 'prop2' = 'value2'
|)
|STORED AS orc
""".stripMargin
)
checkCreateSparkTableAsHive("t1")
}
}
test("show create table as serde can't work on data source table") {
withTable("t1") {
sql(
s"""
|CREATE TABLE t1 (
| c1 STRING COMMENT 'bla',
| c2 STRING
|)
|USING orc
""".stripMargin
)
val cause = intercept[AnalysisException] {
checkCreateHiveTableOrView("t1")
}
assert(cause.getMessage.contains("Use `SHOW CREATE TABLE` without `AS SERDE` instead"))
}
}
test("simple external hive table in Spark DDL") {
withTempDir { dir =>
withTable("t1") {
sql(
s"""
|CREATE TABLE t1 (
| c1 STRING COMMENT 'bla',
| c2 STRING
|)
|LOCATION '${dir.toURI}'
|TBLPROPERTIES (
| 'prop1' = 'value1',
| 'prop2' = 'value2'
|)
|STORED AS orc
""".stripMargin
)
checkCreateSparkTableAsHive("t1")
}
}
}
test("hive table with STORED AS clause in Spark DDL") {
withTable("t1") {
sql(
s"""
|CREATE TABLE t1 (
| c1 INT COMMENT 'bla',
| c2 STRING
|)
|STORED AS PARQUET
""".stripMargin
)
checkCreateSparkTableAsHive("t1")
}
}
test("hive table with nested fields with STORED AS clause in Spark DDL") {
withTable("t1") {
sql(
s"""
|CREATE TABLE t1 (
| c1 INT COMMENT 'bla',
| c2 STRING,
| c3 STRUCT <s1: INT, s2: STRING>
|)
|STORED AS PARQUET
""".stripMargin
)
checkCreateSparkTableAsHive("t1")
}
}
test("hive table with unsupported fileformat in Spark DDL") {
withTable("t1") {
sql(
s"""
|CREATE TABLE t1 (
| c1 INT COMMENT 'bla',
| c2 STRING
|)
|STORED AS RCFILE
""".stripMargin
)
val cause = intercept[AnalysisException] {
checkCreateSparkTableAsHive("t1")
}
assert(cause.getMessage.contains("unsupported serde configuration"))
}
}
test("hive table with serde info in Spark DDL") {
withTable("t1") {
sql(
s"""
|CREATE TABLE t1 (
| c1 INT COMMENT 'bla',
| c2 STRING
|)
|ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
|STORED AS
| INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
| OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
""".stripMargin
)
checkCreateSparkTableAsHive("t1")
}
}
test("hive view is not supported by show create table without as serde") {
withTable("t1") {
withView("v1") {
sql("CREATE TABLE t1 (c1 STRING, c2 STRING)")
createRawHiveTable(
s"""
|CREATE VIEW v1
|AS SELECT * from t1
""".stripMargin
)
val cause = intercept[AnalysisException] {
sql("SHOW CREATE TABLE v1")
}
assert(cause.getMessage.contains("view isn't supported"))
}
}
}
test("partitioned, bucketed hive table in Spark DDL") {
withTable("t1") {
sql(
s"""
|CREATE TABLE t1 (
| emp_id INT COMMENT 'employee id', emp_name STRING,
| emp_dob STRING COMMENT 'employee date of birth', emp_sex STRING COMMENT 'M/F'
|)
|COMMENT 'employee table'
|PARTITIONED BY (
| emp_country STRING COMMENT '2-char code', emp_state STRING COMMENT '2-char code'
|)
|CLUSTERED BY (emp_sex) SORTED BY (emp_id ASC) INTO 10 BUCKETS
|STORED AS ORC
""".stripMargin
)
checkCreateSparkTableAsHive("t1")
}
}
test("show create table for transactional hive table") {
withTable("t1") {
sql(
s"""
|CREATE TABLE t1 (
| c1 STRING COMMENT 'bla',
| c2 STRING
|)
|TBLPROPERTIES (
| 'transactional' = 'true',
| 'prop1' = 'value1',
| 'prop2' = 'value2'
|)
|CLUSTERED BY (c1) INTO 10 BUCKETS
|STORED AS ORC
""".stripMargin
)
val cause = intercept[AnalysisException] {
sql("SHOW CREATE TABLE t1")
}
assert(cause.getMessage.contains(
"SHOW CREATE TABLE doesn't support transactional Hive table"))
}
}
}