[SPARK-31061][SQL] Provide ability to alter the provider of a table

This PR adds functionality to HiveExternalCatalog to be able to change the provider of a table.

This is useful for catalogs in Spark 3.0 to be able to use alterTable to change the provider of a table as part of an atomic REPLACE TABLE function.

No

Unit tests

Closes #27822 from brkyvz/externalCat.

Authored-by: Burak Yavuz <brkyvz@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Burak Yavuz 2020-03-06 15:40:44 +08:00 committed by Wenchen Fan
parent 6468d6f103
commit 2e3adadc6a
2 changed files with 49 additions and 1 deletions

View file

@ -634,7 +634,15 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX) ||
k.startsWith(CREATED_SPARK_VERSION)
}
val newTableProps = propsFromOldTable ++ tableDefinition.properties + partitionProviderProp
val newFormatIfExists = tableDefinition.provider.flatMap { p =>
if (DDLUtils.isDatasourceTable(tableDefinition)) {
Some(DATASOURCE_PROVIDER -> p)
} else {
None
}
}
val newTableProps =
propsFromOldTable ++ tableDefinition.properties + partitionProviderProp ++ newFormatIfExists
// // Add old table's owner if we need to restore
val owner = Option(tableDefinition.owner).filter(_.nonEmpty).getOrElse(oldTableDef.owner)

View file

@ -17,6 +17,8 @@
package org.apache.spark.sql.hive
import java.net.URI
import org.apache.hadoop.conf.Configuration
import org.apache.spark.SparkConf
@ -178,4 +180,42 @@ class HiveExternalCatalogSuite extends ExternalCatalogSuite {
assertThrows[QueryExecutionException](client.runSqlHive(
"INSERT overwrite directory \"fs://localhost/tmp\" select 1 as a"))
}
test("SPARK-31061: alterTable should be able to change table provider") {
val catalog = newBasicCatalog()
val parquetTable = CatalogTable(
identifier = TableIdentifier("parq_tbl", Some("db1")),
tableType = CatalogTableType.MANAGED,
storage = storageFormat.copy(locationUri = Some(new URI("file:/some/path"))),
schema = new StructType().add("col1", "int").add("col2", "string"),
provider = Some("parquet"))
catalog.createTable(parquetTable, ignoreIfExists = false)
val rawTable = externalCatalog.getTable("db1", "parq_tbl")
assert(rawTable.provider === Some("parquet"))
val fooTable = parquetTable.copy(provider = Some("foo"))
catalog.alterTable(fooTable)
val alteredTable = externalCatalog.getTable("db1", "parq_tbl")
assert(alteredTable.provider === Some("foo"))
}
test("SPARK-31061: alterTable should be able to change table provider from hive") {
val catalog = newBasicCatalog()
val hiveTable = CatalogTable(
identifier = TableIdentifier("parq_tbl", Some("db1")),
tableType = CatalogTableType.MANAGED,
storage = storageFormat,
schema = new StructType().add("col1", "int").add("col2", "string"),
provider = Some("hive"))
catalog.createTable(hiveTable, ignoreIfExists = false)
val rawTable = externalCatalog.getTable("db1", "parq_tbl")
assert(rawTable.provider === Some("hive"))
val fooTable = rawTable.copy(provider = Some("foo"))
catalog.alterTable(fooTable)
val alteredTable = externalCatalog.getTable("db1", "parq_tbl")
assert(alteredTable.provider === Some("foo"))
}
}