[SPARK-19272][SQL] Remove the param viewOriginalText from CatalogTable

## What changes were proposed in this pull request?

Hive will expand the view text, so it needs 2 fields: originalText and viewText. Since we don't expand the view text, but only add table properties, perhaps only a single field `viewText` is enough in CatalogTable.

This PR brought in the following changes:
1. Remove the param `viewOriginalText` from `CatalogTable`;
2. Update the output of command `DescribeTableCommand`.

## How was this patch tested?

Tested by exsiting test cases, also updated the failed test cases.

Author: jiangxingbo <jiangxb1987@gmail.com>

Closes #16679 from jiangxb1987/catalogTable.
This commit is contained in:
jiangxingbo 2017-01-24 12:37:30 +08:00 committed by Wenchen Fan
parent fcfd5d0bba
commit 3bdf3ee860
9 changed files with 39 additions and 45 deletions

View file

@ -175,7 +175,6 @@ case class CatalogTable(
lastAccessTime: Long = -1,
properties: Map[String, String] = Map.empty,
stats: Option[CatalogStatistics] = None,
viewOriginalText: Option[String] = None,
viewText: Option[String] = None,
comment: Option[String] = None,
unsupportedFeatures: Seq[String] = Seq.empty,
@ -261,7 +260,6 @@ case class CatalogTable(
if (provider.isDefined) s"Provider: ${provider.get}" else "",
if (partitionColumnNames.nonEmpty) s"Partition Columns: $partitionColumns" else ""
) ++ bucketStrings ++ Seq(
viewOriginalText.map("Original View: " + _).getOrElse(""),
viewText.map("View: " + _).getOrElse(""),
comment.map("Comment: " + _).getOrElse(""),
if (properties.nonEmpty) s"Properties: $tableProperties" else "",

View file

@ -919,7 +919,6 @@ abstract class CatalogTestUtils {
.add("col2", "string")
.add("a", "int")
.add("b", "string"),
viewOriginalText = Some("SELECT * FROM tbl1"),
viewText = Some("SELECT * FROM tbl1"),
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> viewDefaultDatabase))
}

View file

@ -528,8 +528,10 @@ case class DescribeTableCommand(
private def describeViewInfo(metadata: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
append(buffer, "", "", "")
append(buffer, "# View Information", "", "")
append(buffer, "View Original Text:", metadata.viewOriginalText.getOrElse(""), "")
append(buffer, "View Expanded Text:", metadata.viewText.getOrElse(""), "")
append(buffer, "View Text:", metadata.viewText.getOrElse(""), "")
append(buffer, "View Default Database:", metadata.viewDefaultDatabase.getOrElse(""), "")
append(buffer, "View Query Output Columns:",
metadata.viewQueryColumnNames.mkString("[", ", ", "]"), "")
}
private def describeBucketingInfo(metadata: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {

View file

@ -235,7 +235,6 @@ case class CreateViewCommand(
storage = CatalogStorageFormat.empty,
schema = aliasPlan(session, analyzedPlan).schema,
properties = newProperties,
viewOriginalText = originalText,
viewText = originalText,
comment = comment
)
@ -289,7 +288,6 @@ case class AlterViewAsCommand(
val updatedViewMeta = viewMeta.copy(
schema = analyzedPlan.schema,
properties = newProperties,
viewOriginalText = Some(originalText),
viewText = Some(originalText))
session.sessionState.catalog.alterTable(updatedViewMeta)

View file

@ -426,7 +426,9 @@ private[hive] class HiveClientImpl(
// in the function toHiveTable.
properties = properties.filter(kv => kv._1 != "comment" && kv._1 != "EXTERNAL"),
comment = properties.get("comment"),
viewOriginalText = Option(h.getViewOriginalText),
// In older versions of Spark(before 2.2.0), we expand the view original text and store
// that into `viewExpandedText`, and that should be used in view resolution. So we get
// `viewExpandedText` instead of `viewOriginalText` for viewText here.
viewText = Option(h.getViewExpandedText),
unsupportedFeatures = unsupportedFeatures)
}
@ -854,8 +856,13 @@ private[hive] class HiveClientImpl(
table.storage.properties.foreach { case (k, v) => hiveTable.setSerdeParam(k, v) }
table.properties.foreach { case (k, v) => hiveTable.setProperty(k, v) }
table.comment.foreach { c => hiveTable.setProperty("comment", c) }
table.viewOriginalText.foreach { t => hiveTable.setViewOriginalText(t) }
table.viewText.foreach { t => hiveTable.setViewExpandedText(t) }
// Hive will expand the view text, so it needs 2 fields: viewOriginalText and viewExpandedText.
// Since we don't expand the view text, but only add table properties, we map the `viewText` to
// the both fields in hive table.
table.viewText.foreach { t =>
hiveTable.setViewOriginalText(t)
hiveTable.setViewExpandedText(t)
}
hiveTable
}

View file

@ -75,7 +75,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
assert(desc.comment == Some("This is the staging page view table"))
// TODO will be SQLText
assert(desc.viewText.isEmpty)
assert(desc.viewOriginalText.isEmpty)
assert(desc.viewDefaultDatabase.isEmpty)
assert(desc.viewQueryColumnNames.isEmpty)
assert(desc.partitionColumnNames.isEmpty)
assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
@ -106,7 +107,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
// TODO will be SQLText
assert(desc.comment == Some("This is the staging page view table"))
assert(desc.viewText.isEmpty)
assert(desc.viewOriginalText.isEmpty)
assert(desc.viewDefaultDatabase.isEmpty)
assert(desc.viewQueryColumnNames.isEmpty)
assert(desc.partitionColumnNames.isEmpty)
assert(desc.storage.properties == Map())
assert(desc.storage.inputFormat == Some("parquet.hive.DeprecatedParquetInputFormat"))
@ -125,7 +127,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
assert(desc.storage.locationUri == None)
assert(desc.schema.isEmpty)
assert(desc.viewText == None) // TODO will be SQLText
assert(desc.viewOriginalText.isEmpty)
assert(desc.viewDefaultDatabase.isEmpty)
assert(desc.viewQueryColumnNames.isEmpty)
assert(desc.storage.properties == Map())
assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.TextInputFormat"))
assert(desc.storage.outputFormat ==
@ -161,7 +164,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
assert(desc.storage.locationUri == None)
assert(desc.schema.isEmpty)
assert(desc.viewText == None) // TODO will be SQLText
assert(desc.viewOriginalText.isEmpty)
assert(desc.viewDefaultDatabase.isEmpty)
assert(desc.viewQueryColumnNames.isEmpty)
assert(desc.storage.properties == Map(("serde_p1" -> "p1"), ("serde_p2" -> "p2")))
assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
@ -304,7 +308,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
assert(desc.partitionColumnNames.isEmpty)
assert(desc.bucketSpec.isEmpty)
assert(desc.viewText.isEmpty)
assert(desc.viewOriginalText.isEmpty)
assert(desc.viewDefaultDatabase.isEmpty)
assert(desc.viewQueryColumnNames.isEmpty)
assert(desc.storage.locationUri.isEmpty)
assert(desc.storage.inputFormat ==
Some("org.apache.hadoop.mapred.TextInputFormat"))
@ -462,7 +467,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
assert(desc.partitionColumnNames == Seq("month"))
assert(desc.bucketSpec.isEmpty)
assert(desc.viewText.isEmpty)
assert(desc.viewOriginalText.isEmpty)
assert(desc.viewDefaultDatabase.isEmpty)
assert(desc.viewQueryColumnNames.isEmpty)
assert(desc.storage.locationUri == Some("/path/to/mercury"))
assert(desc.storage.inputFormat == Some("winput"))
assert(desc.storage.outputFormat == Some("wowput"))

View file

@ -346,23 +346,10 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing
table.copy(
createTime = 0L,
lastAccessTime = 0L,
properties = table.properties.filterKeys(!nondeterministicProps.contains(_)),
// View texts are checked separately
viewOriginalText = None,
viewText = None
properties = table.properties.filterKeys(!nondeterministicProps.contains(_))
)
}
// Normalizes attributes auto-generated by Spark SQL for views
def normalizeGeneratedAttributes(str: String): String = {
str.replaceAll("gen_attr_[0-9]+", "gen_attr_0")
}
// We use expanded canonical view text as original view text of the new table
assertResult(expected.viewText.map(normalizeGeneratedAttributes)) {
actual.viewOriginalText.map(normalizeGeneratedAttributes)
}
assert(normalize(actual) == normalize(expected))
}
}

View file

@ -656,8 +656,9 @@ class HiveDDLSuite
assert(sql("DESC FORMATTED view1").collect().containsSlice(
Seq(
Row("# View Information", "", ""),
Row("View Original Text:", "SELECT * FROM tbl", ""),
Row("View Expanded Text:", "SELECT * FROM tbl", "")
Row("View Text:", "SELECT * FROM tbl", ""),
Row("View Default Database:", "default", ""),
Row("View Query Output Columns:", "[a]", "")
)
))
}
@ -943,7 +944,9 @@ class HiveDDLSuite
TableIdentifier(sourceViewName, Some("default")))
// The original source should be a VIEW with an empty path
assert(sourceView.tableType == CatalogTableType.VIEW)
assert(sourceView.viewText.nonEmpty && sourceView.viewOriginalText.nonEmpty)
assert(sourceView.viewText.nonEmpty)
assert(sourceView.viewDefaultDatabase == Some("default"))
assert(sourceView.viewQueryColumnNames == Seq("a", "b", "c", "d"))
val targetTable = spark.sessionState.catalog.getTableMetadata(
TableIdentifier(targetTabName, Some("default")))
@ -956,8 +959,12 @@ class HiveDDLSuite
// The created table should be a MANAGED table with empty view text and original text.
assert(targetTable.tableType == CatalogTableType.MANAGED,
"the created table must be a Hive managed table")
assert(targetTable.viewText.isEmpty && targetTable.viewOriginalText.isEmpty,
"the view text and original text in the created table must be empty")
assert(targetTable.viewText.isEmpty,
"the view text in the created table must be empty")
assert(targetTable.viewDefaultDatabase.isEmpty,
"the view default database in the created table must be empty")
assert(targetTable.viewQueryColumnNames.isEmpty,
"the view query output columns in the created table must be empty")
assert(targetTable.comment.isEmpty,
"the comment in the created table must be empty")
assert(targetTable.unsupportedFeatures.isEmpty,

View file

@ -556,7 +556,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
tableType = CatalogTableType.VIEW,
storage = CatalogStorageFormat.empty,
schema = new StructType().add("x", "long").add("y", "long"),
viewOriginalText = Some("SELECT * FROM jt"),
viewText = Some("SELECT * FROM jt"),
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
@ -567,7 +566,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
tableType = CatalogTableType.VIEW,
storage = CatalogStorageFormat.empty,
schema = new StructType().add("id", "long").add("id1", "long"),
viewOriginalText = Some("SELECT * FROM view1"),
viewText = Some("SELECT * FROM view1"),
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> db,
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
@ -589,7 +587,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
tableType = CatalogTableType.VIEW,
storage = CatalogStorageFormat.empty,
schema = new StructType().add("n", "int"),
viewOriginalText = Some("WITH w AS (SELECT 1 AS n) SELECT n FROM w"),
viewText = Some("WITH w AS (SELECT 1 AS n) SELECT n FROM w"),
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "1",
@ -606,7 +603,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
tableType = CatalogTableType.VIEW,
storage = CatalogStorageFormat.empty,
schema = new StructType().add("id", "long").add("id1", "long"),
viewOriginalText = Some("SELECT * FROM jt"),
viewText = Some("SELECT * FROM jt"),
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
@ -634,7 +630,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
tableType = CatalogTableType.VIEW,
storage = CatalogStorageFormat.empty,
schema = new StructType().add("id", "long").add("id1", "long"),
viewOriginalText = Some("SELECT * FROM invalid_db.jt"),
viewText = Some("SELECT * FROM invalid_db.jt"),
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
@ -649,7 +644,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
tableType = CatalogTableType.VIEW,
storage = CatalogStorageFormat.empty,
schema = new StructType().add("id", "long").add("id1", "long"),
viewOriginalText = Some("SELECT * FROM invalid_table"),
viewText = Some("SELECT * FROM invalid_table"),
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
@ -664,7 +658,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
tableType = CatalogTableType.VIEW,
storage = CatalogStorageFormat.empty,
schema = new StructType().add("id", "long").add("id1", "long"),
viewOriginalText = Some("SELECT * FROM view2"),
viewText = Some("SELECT * FROM view2"),
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
@ -687,7 +680,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
tableType = CatalogTableType.VIEW,
storage = CatalogStorageFormat.empty,
schema = new StructType().add("a", "int").add("b", "int"),
viewOriginalText = Some(s"SELECT * FROM hive_table"),
viewText = Some("SELECT `gen_attr_0` AS `a`, `gen_attr_1` AS `b` FROM (SELECT " +
"`gen_attr_0`, `gen_attr_1` FROM (SELECT `a` AS `gen_attr_0`, `b` AS " +
"`gen_attr_1` FROM hive_table) AS gen_subquery_0) AS hive_table")
@ -711,7 +703,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
tableType = CatalogTableType.VIEW,
storage = CatalogStorageFormat.empty,
schema = new StructType().add("x", "long").add("y", "long"),
viewOriginalText = Some("SELECT * FROM testTable"),
viewText = Some("SELECT * FROM testTable"),
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",
@ -744,7 +735,6 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
tableType = CatalogTableType.VIEW,
storage = CatalogStorageFormat.empty,
schema = new StructType().add("id", "long").add("id1", "long"),
viewOriginalText = Some("SELECT * FROM testTable"),
viewText = Some("SELECT * FROM testTable"),
properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "default",
CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> "2",