[SPARK-36552][SQL] Fix different behavior for writing char/varchar to hive and datasource table
### What changes were proposed in this pull request? For the hive table, the actual write path and the schema handling are inconsistent when `spark.sql.legacy.charVarcharAsString` is true. This causes problems like SPARK-36552 described. In this PR we respect `spark.sql.legacy.charVarcharAsString` when generates hive table schema from spark data types. ### Why are the changes needed? bugfix ### Does this PR introduce _any_ user-facing change? yes, when `spark.sql.legacy.charVarcharAsString` is true, hive table with char/varchar will respect string behavior. ### How was this patch tested? newly added test Closes #33798 from yaooqinn/SPARK-36552. Authored-by: Kent Yao <yao@apache.org> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
This commit is contained in:
parent
1ccb06ca8c
commit
f918c123a0
|
@ -999,8 +999,11 @@ private[hive] object HiveClientImpl extends Logging {
|
|||
// For Hive Serde, we still need to to restore the raw type for char and varchar type.
|
||||
// When reading data in parquet, orc, or avro file format with string type for char,
|
||||
// the tailing spaces may lost if we are not going to pad it.
|
||||
val typeString = CharVarcharUtils.getRawTypeString(c.metadata)
|
||||
.getOrElse(c.dataType.catalogString)
|
||||
val typeString = if (SQLConf.get.charVarcharAsString) {
|
||||
c.dataType.catalogString
|
||||
} else {
|
||||
CharVarcharUtils.getRawTypeString(c.metadata).getOrElse(c.dataType.catalogString)
|
||||
}
|
||||
new FieldSchema(c.name, typeString, c.getComment().orNull)
|
||||
}
|
||||
|
||||
|
|
|
@ -59,6 +59,20 @@ class HiveCharVarcharTestSuite extends CharVarcharTestSuite with TestHiveSinglet
|
|||
checkAnswer(sql("SELECT v from t where c = 'Spark' and v = 'kyuubi'"), Row("kyuubi"))
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-36552: Fix different behavior of writing char/varchar to hive and datasource table") {
|
||||
Seq("true", "false").foreach { v =>
|
||||
withSQLConf(
|
||||
"spark.sql.hive.convertMetastoreParquet" -> v,
|
||||
"spark.sql.legacy.charVarcharAsString" -> "true") {
|
||||
withTable("t") {
|
||||
sql(s"CREATE TABLE t (c varchar(2)) USING $format")
|
||||
sql("INSERT INTO t SELECT 'kyuubi'")
|
||||
checkAnswer(sql("SELECT c from t"), Row("kyuubi"))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class HiveCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with TestHiveSingleton {
|
||||
|
|
Loading…
Reference in a new issue