[SPARK-19272][SQL] Remove the param viewOriginalText
from CatalogTable
## What changes were proposed in this pull request? Hive will expand the view text, so it needs 2 fields: originalText and viewText. Since we don't expand the view text, but only add table properties, perhaps only a single field `viewText` is enough in CatalogTable. This PR brought in the following changes: 1. Remove the param `viewOriginalText` from `CatalogTable`; 2. Update the output of command `DescribeTableCommand`. ## How was this patch tested? Tested by exsiting test cases, also updated the failed test cases. Author: jiangxingbo <jiangxb1987@gmail.com> Closes #16679 from jiangxb1987/catalogTable.
This commit is contained in:
parent
fcfd5d0bba
commit
3bdf3ee860
|
@ -175,7 +175,6 @@ case class CatalogTable(
|
|||
lastAccessTime: Long = -1,
|
||||
properties: Map[String, String] = Map.empty,
|
||||
stats: Option[CatalogStatistics] = None,
|
||||
viewOriginalText: Option[String] = None,
|
||||
viewText: Option[String] = None,
|
||||
comment: Option[String] = None,
|
||||
unsupportedFeatures: Seq[String] = Seq.empty,
|
||||
|
@ -261,7 +260,6 @@ case class CatalogTable(
|
|||
if (provider.isDefined) s"Provider: ${provider.get}" else "",
|
||||
if (partitionColumnNames.nonEmpty) s"Partition Columns: $partitionColumns" else ""
|
||||
) ++ bucketStrings ++ Seq(
|
||||
viewOriginalText.map("Original View: " + _).getOrElse(""),
|
||||
viewText.map("View: " + _).getOrElse(""),
|
||||
comment.map("Comment: " + _).getOrElse(""),
|
||||
if (properties.nonEmpty) s"Properties: $tableProperties" else "",
|
||||
|
|
|
@ -919,7 +919,6 @@ abstract class CatalogTestUtils {
|
|||
.add("col2", "string")
|
||||
.add("a", "int")
|
||||
.add("b", "string"),
|
||||
viewOriginalText = Some("SELECT * FROM tbl1"),
|
||||
viewText = Some("SELECT * FROM tbl1"),
|
||||
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> viewDefaultDatabase))
|
||||
}
|
||||
|
|
|
@ -528,8 +528,10 @@ case class DescribeTableCommand(
|
|||
private def describeViewInfo(metadata: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
|
||||
append(buffer, "", "", "")
|
||||
append(buffer, "# View Information", "", "")
|
||||
append(buffer, "View Original Text:", metadata.viewOriginalText.getOrElse(""), "")
|
||||
append(buffer, "View Expanded Text:", metadata.viewText.getOrElse(""), "")
|
||||
append(buffer, "View Text:", metadata.viewText.getOrElse(""), "")
|
||||
append(buffer, "View Default Database:", metadata.viewDefaultDatabase.getOrElse(""), "")
|
||||
append(buffer, "View Query Output Columns:",
|
||||
metadata.viewQueryColumnNames.mkString("[", ", ", "]"), "")
|
||||
}
|
||||
|
||||
private def describeBucketingInfo(metadata: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
|
||||
|
|
|
@ -235,7 +235,6 @@ case class CreateViewCommand(
|
|||
storage = CatalogStorageFormat.empty,
|
||||
schema = aliasPlan(session, analyzedPlan).schema,
|
||||
properties = newProperties,
|
||||
viewOriginalText = originalText,
|
||||
viewText = originalText,
|
||||
comment = comment
|
||||
)
|
||||
|
@ -289,7 +288,6 @@ case class AlterViewAsCommand(
|
|||
val updatedViewMeta = viewMeta.copy(
|
||||
schema = analyzedPlan.schema,
|
||||
properties = newProperties,
|
||||
viewOriginalText = Some(originalText),
|
||||
viewText = Some(originalText))
|
||||
|
||||
session.sessionState.catalog.alterTable(updatedViewMeta)
|
||||
|
|
|
@ -426,7 +426,9 @@ private[hive] class HiveClientImpl(
|
|||
// in the function toHiveTable.
|
||||
properties = properties.filter(kv => kv._1 != "comment" && kv._1 != "EXTERNAL"),
|
||||
comment = properties.get("comment"),
|
||||
viewOriginalText = Option(h.getViewOriginalText),
|
||||
// In older versions of Spark(before 2.2.0), we expand the view original text and store
|
||||
// that into `viewExpandedText`, and that should be used in view resolution. So we get
|
||||
// `viewExpandedText` instead of `viewOriginalText` for viewText here.
|
||||
viewText = Option(h.getViewExpandedText),
|
||||
unsupportedFeatures = unsupportedFeatures)
|
||||
}
|
||||
|
@ -854,8 +856,13 @@ private[hive] class HiveClientImpl(
|
|||
table.storage.properties.foreach { case (k, v) => hiveTable.setSerdeParam(k, v) }
|
||||
table.properties.foreach { case (k, v) => hiveTable.setProperty(k, v) }
|
||||
table.comment.foreach { c => hiveTable.setProperty("comment", c) }
|
||||
table.viewOriginalText.foreach { t => hiveTable.setViewOriginalText(t) }
|
||||
table.viewText.foreach { t => hiveTable.setViewExpandedText(t) }
|
||||
// Hive will expand the view text, so it needs 2 fields: viewOriginalText and viewExpandedText.
|
||||
// Since we don't expand the view text, but only add table properties, we map the `viewText` to
|
||||
// the both fields in hive table.
|
||||
table.viewText.foreach { t =>
|
||||
hiveTable.setViewOriginalText(t)
|
||||
hiveTable.setViewExpandedText(t)
|
||||
}
|
||||
hiveTable
|
||||
}
|
||||
|
||||
|
|
|
@ -75,7 +75,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
|
|||
assert(desc.comment == Some("This is the staging page view table"))
|
||||
// TODO will be SQLText
|
||||
assert(desc.viewText.isEmpty)
|
||||
assert(desc.viewOriginalText.isEmpty)
|
||||
assert(desc.viewDefaultDatabase.isEmpty)
|
||||
assert(desc.viewQueryColumnNames.isEmpty)
|
||||
assert(desc.partitionColumnNames.isEmpty)
|
||||
assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
|
||||
assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
|
||||
|
@ -106,7 +107,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
|
|||
// TODO will be SQLText
|
||||
assert(desc.comment == Some("This is the staging page view table"))
|
||||
assert(desc.viewText.isEmpty)
|
||||
assert(desc.viewOriginalText.isEmpty)
|
||||
assert(desc.viewDefaultDatabase.isEmpty)
|
||||
assert(desc.viewQueryColumnNames.isEmpty)
|
||||
assert(desc.partitionColumnNames.isEmpty)
|
||||
assert(desc.storage.properties == Map())
|
||||
assert(desc.storage.inputFormat == Some("parquet.hive.DeprecatedParquetInputFormat"))
|
||||
|
@ -125,7 +127,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
|
|||
assert(desc.storage.locationUri == None)
|
||||
assert(desc.schema.isEmpty)
|
||||
assert(desc.viewText == None) // TODO will be SQLText
|
||||
assert(desc.viewOriginalText.isEmpty)
|
||||
assert(desc.viewDefaultDatabase.isEmpty)
|
||||
assert(desc.viewQueryColumnNames.isEmpty)
|
||||
assert(desc.storage.properties == Map())
|
||||
assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.TextInputFormat"))
|
||||
assert(desc.storage.outputFormat ==
|
||||
|
@ -161,7 +164,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
|
|||
assert(desc.storage.locationUri == None)
|
||||
assert(desc.schema.isEmpty)
|
||||
assert(desc.viewText == None) // TODO will be SQLText
|
||||
assert(desc.viewOriginalText.isEmpty)
|
||||
assert(desc.viewDefaultDatabase.isEmpty)
|
||||
assert(desc.viewQueryColumnNames.isEmpty)
|
||||
assert(desc.storage.properties == Map(("serde_p1" -> "p1"), ("serde_p2" -> "p2")))
|
||||
assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
|
||||
assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
|
||||
|
@ -304,7 +308,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
|
|||
assert(desc.partitionColumnNames.isEmpty)
|
||||
assert(desc.bucketSpec.isEmpty)
|
||||
assert(desc.viewText.isEmpty)
|
||||
assert(desc.viewOriginalText.isEmpty)
|
||||
assert(desc.viewDefaultDatabase.isEmpty)
|
||||
assert(desc.viewQueryColumnNames.isEmpty)
|
||||
assert(desc.storage.locationUri.isEmpty)
|
||||
assert(desc.storage.inputFormat ==
|
||||
Some("org.apache.hadoop.mapred.TextInputFormat"))
|
||||
|
@ -462,7 +467,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
|
|||
assert(desc.partitionColumnNames == Seq("month"))
|
||||
assert(desc.bucketSpec.isEmpty)
|
||||
assert(desc.viewText.isEmpty)
|
||||
assert(desc.viewOriginalText.isEmpty)
|
||||
assert(desc.viewDefaultDatabase.isEmpty)
|
||||
assert(desc.viewQueryColumnNames.isEmpty)
|
||||
assert(desc.storage.locationUri == Some("/path/to/mercury"))
|
||||
assert(desc.storage.inputFormat == Some("winput"))
|
||||
assert(desc.storage.outputFormat == Some("wowput"))
|
||||
|
|
|
@ -346,23 +346,10 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing
|
|||
table.copy(
|
||||
createTime = 0L,
|
||||
lastAccessTime = 0L,
|
||||
properties = table.properties.filterKeys(!nondeterministicProps.contains(_)),
|
||||
// View texts are checked separately
|
||||
viewOriginalText = None,
|
||||
viewText = None
|
||||
properties = table.properties.filterKeys(!nondeterministicProps.contains(_))
|
||||
)
|
||||
}
|
||||
|
||||
// Normalizes attributes auto-generated by Spark SQL for views
|
||||
def normalizeGeneratedAttributes(str: String): String = {
|
||||
str.replaceAll("gen_attr_[0-9]+", "gen_attr_0")
|
||||
}
|
||||
|
||||
// We use expanded canonical view text as original view text of the new table
|
||||
assertResult(expected.viewText.map(normalizeGeneratedAttributes)) {
|
||||
actual.viewOriginalText.map(normalizeGeneratedAttributes)
|
||||
}
|
||||
|
||||
assert(normalize(actual) == normalize(expected))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -656,8 +656,9 @@ class HiveDDLSuite
|
|||
assert(sql("DESC FORMATTED view1").collect().containsSlice(
|
||||
Seq(
|
||||
Row("# View Information", "", ""),
|
||||
Row("View Original Text:", "SELECT * FROM tbl", ""),
|
||||
Row("View Expanded Text:", "SELECT * FROM tbl", "")
|
||||
Row("View Text:", "SELECT * FROM tbl", ""),
|
||||
Row("View Default Database:", "default", ""),
|
||||
Row("View Query Output Columns:", "[a]", "")
|
||||
)
|
||||
))
|
||||
}
|
||||
|
@ -943,7 +944,9 @@ class HiveDDLSuite
|
|||
TableIdentifier(sourceViewName, Some("default")))
|
||||
// The original source should be a VIEW with an empty path
|
||||
assert(sourceView.tableType == CatalogTableType.VIEW)
|
||||
assert(sourceView.viewText.nonEmpty && sourceView.viewOriginalText.nonEmpty)
|
||||
assert(sourceView.viewText.nonEmpty)
|
||||
assert(sourceView.viewDefaultDatabase == Some("default"))
|
||||
assert(sourceView.viewQueryColumnNames == Seq("a", "b", "c", "d"))
|
||||
val targetTable = spark.sessionState.catalog.getTableMetadata(
|
||||
TableIdentifier(targetTabName, Some("default")))
|
||||
|
||||
|
@ -956,8 +959,12 @@ class HiveDDLSuite
|
|||
// The created table should be a MANAGED table with empty view text and original text.
|
||||
assert(targetTable.tableType == CatalogTableType.MANAGED,
|
||||
"the created table must be a Hive managed table")
|
||||
assert(targetTable.viewText.isEmpty && targetTable.viewOriginalText.isEmpty,
|
||||
"the view text and original text in the created table must be empty")
|
||||
assert(targetTable.viewText.isEmpty,
|
||||
"the view text in the created table must be empty")
|
||||
assert(targetTable.viewDefaultDatabase.isEmpty,
|
||||
"the view default database in the created table must be empty")
|
||||
assert(targetTable.viewQueryColumnNames.isEmpty,
|
||||
"the view query output columns in the created table must be empty")
|
||||
assert(targetTable.comment.isEmpty,
|
||||
"the comment in the created table must be empty")
|
||||
assert(targetTable.unsupportedFeatures.isEmpty,
|
||||
|
|
|
@ -556,7 +556,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
|||
tableType = CatalogTableType.VIEW,
|
||||
storage = CatalogStorageFormat.empty,
|
||||
schema = new StructType().add("x", "long").add("y", "long"),
|
||||
viewOriginalText = Some("SELECT * FROM jt"),
|
||||
viewText = Some("SELECT * FROM jt"),
|
||||
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
|
||||
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
|
||||
|
@ -567,7 +566,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
|||
tableType = CatalogTableType.VIEW,
|
||||
storage = CatalogStorageFormat.empty,
|
||||
schema = new StructType().add("id", "long").add("id1", "long"),
|
||||
viewOriginalText = Some("SELECT * FROM view1"),
|
||||
viewText = Some("SELECT * FROM view1"),
|
||||
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> db,
|
||||
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
|
||||
|
@ -589,7 +587,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
|||
tableType = CatalogTableType.VIEW,
|
||||
storage = CatalogStorageFormat.empty,
|
||||
schema = new StructType().add("n", "int"),
|
||||
viewOriginalText = Some("WITH w AS (SELECT 1 AS n) SELECT n FROM w"),
|
||||
viewText = Some("WITH w AS (SELECT 1 AS n) SELECT n FROM w"),
|
||||
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
|
||||
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "1",
|
||||
|
@ -606,7 +603,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
|||
tableType = CatalogTableType.VIEW,
|
||||
storage = CatalogStorageFormat.empty,
|
||||
schema = new StructType().add("id", "long").add("id1", "long"),
|
||||
viewOriginalText = Some("SELECT * FROM jt"),
|
||||
viewText = Some("SELECT * FROM jt"),
|
||||
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
|
||||
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
|
||||
|
@ -634,7 +630,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
|||
tableType = CatalogTableType.VIEW,
|
||||
storage = CatalogStorageFormat.empty,
|
||||
schema = new StructType().add("id", "long").add("id1", "long"),
|
||||
viewOriginalText = Some("SELECT * FROM invalid_db.jt"),
|
||||
viewText = Some("SELECT * FROM invalid_db.jt"),
|
||||
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
|
||||
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
|
||||
|
@ -649,7 +644,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
|||
tableType = CatalogTableType.VIEW,
|
||||
storage = CatalogStorageFormat.empty,
|
||||
schema = new StructType().add("id", "long").add("id1", "long"),
|
||||
viewOriginalText = Some("SELECT * FROM invalid_table"),
|
||||
viewText = Some("SELECT * FROM invalid_table"),
|
||||
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
|
||||
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
|
||||
|
@ -664,7 +658,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
|||
tableType = CatalogTableType.VIEW,
|
||||
storage = CatalogStorageFormat.empty,
|
||||
schema = new StructType().add("id", "long").add("id1", "long"),
|
||||
viewOriginalText = Some("SELECT * FROM view2"),
|
||||
viewText = Some("SELECT * FROM view2"),
|
||||
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
|
||||
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
|
||||
|
@ -687,7 +680,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
|||
tableType = CatalogTableType.VIEW,
|
||||
storage = CatalogStorageFormat.empty,
|
||||
schema = new StructType().add("a", "int").add("b", "int"),
|
||||
viewOriginalText = Some(s"SELECT * FROM hive_table"),
|
||||
viewText = Some("SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b` FROM (SELECT " +
|
||||
"`gen_attr_0`, `gen_attr_1` FROM (SELECT `a` AS `gen_attr_0`, `b` AS " +
|
||||
"`gen_attr_1` FROM hive_table) AS gen_subquery_0) AS hive_table")
|
||||
|
@ -711,7 +703,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
|||
tableType = CatalogTableType.VIEW,
|
||||
storage = CatalogStorageFormat.empty,
|
||||
schema = new StructType().add("x", "long").add("y", "long"),
|
||||
viewOriginalText = Some("SELECT * FROM testTable"),
|
||||
viewText = Some("SELECT * FROM testTable"),
|
||||
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
|
||||
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
|
||||
|
@ -744,7 +735,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
|||
tableType = CatalogTableType.VIEW,
|
||||
storage = CatalogStorageFormat.empty,
|
||||
schema = new StructType().add("id", "long").add("id1", "long"),
|
||||
viewOriginalText = Some("SELECT * FROM testTable"),
|
||||
viewText = Some("SELECT * FROM testTable"),
|
||||
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
|
||||
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
|
||||
|
|
Loading…
Reference in a new issue