[SPARK-27592][SQL][TEST][FOLLOW-UP] Test set the partitioned bucketed data source table SerDe correctly
### What changes were proposed in this pull request? This PR add test for set the partitioned bucketed data source table SerDe correctly. ### Why are the changes needed? Improve test. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? N/A Closes #25591 from wangyum/SPARK-27592-f1. Authored-by: Yuming Wang <yumwang@ebay.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
cb06209fc9
commit
96179732aa
|
@ -320,4 +320,42 @@ class DataSourceWithHiveMetastoreCatalogSuite
|
|||
assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === Seq("1\t2"))
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-27592 set the partitioned bucketed data source table SerDe correctly") {
|
||||
val provider = "parquet"
|
||||
withTable("t") {
|
||||
spark.sql(
|
||||
s"""
|
||||
|CREATE TABLE t
|
||||
|USING $provider
|
||||
|PARTITIONED BY (p)
|
||||
|CLUSTERED BY (key)
|
||||
|SORTED BY (value)
|
||||
|INTO 2 BUCKETS
|
||||
|AS SELECT key, value, cast(key % 3 as string) as p FROM src
|
||||
""".stripMargin)
|
||||
|
||||
val metadata = sessionState.catalog.getTableMetadata(TableIdentifier("t", Some("default")))
|
||||
|
||||
val hiveSerDe = HiveSerDe.sourceToSerDe(provider).get
|
||||
assert(metadata.storage.serde === hiveSerDe.serde)
|
||||
assert(metadata.storage.inputFormat === hiveSerDe.inputFormat)
|
||||
assert(metadata.storage.outputFormat === hiveSerDe.outputFormat)
|
||||
|
||||
// It's a bucketed table at Spark side
|
||||
assert(sql("DESC FORMATTED t").collect().containsSlice(
|
||||
Seq(Row("Num Buckets", "2", ""), Row("Bucket Columns", "[`key`]", ""))
|
||||
))
|
||||
checkAnswer(table("t").select("key", "value"), table("src"))
|
||||
|
||||
// It's not a bucketed table at Hive side
|
||||
val hiveSide = sparkSession.metadataHive.runSqlHive("DESC FORMATTED t")
|
||||
assert(hiveSide.contains("Num Buckets: \t-1 \t "))
|
||||
assert(hiveSide.contains("Bucket Columns: \t[] \t "))
|
||||
assert(hiveSide.contains("\tspark.sql.sources.schema.numBuckets\t2 "))
|
||||
assert(hiveSide.contains("\tspark.sql.sources.schema.bucketCol.0\tkey "))
|
||||
assert(sparkSession.metadataHive.runSqlHive("SELECT count(*) FROM t") ===
|
||||
Seq(table("src").count().toString))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue