[SPARK-20680][SQL][FOLLOW-UP] Add HiveVoidType in HiveClientImpl
### What changes were proposed in this pull request? Discussion with [comment](https://github.com/apache/spark/pull/29244#issuecomment-671746329). Add `HiveVoidType` class in `HiveClientImpl` then we can replace `NullType` to `HiveVoidType` before we call hive client. ### Why are the changes needed? Better compatible with hive. More details in [#29244](https://github.com/apache/spark/pull/29244). ### Does this PR introduce _any_ user-facing change? Yes, user can create view with null type in Hive. ### How was this patch tested? New test. Closes #29423 from ulysses-you/add-HiveVoidType. Authored-by: ulysses <youxiduo@weidian.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
5debde9401
commit
339eec5f32
|
@ -723,7 +723,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
|
|||
|
||||
test("sparkSession API view resolution with different default database") {
|
||||
withDatabase("db2") {
|
||||
withView("v1") {
|
||||
withView("default.v1") {
|
||||
withTable("t1") {
|
||||
sql("USE default")
|
||||
sql("CREATE TABLE t1 USING parquet AS SELECT 1 AS c0")
|
||||
|
|
|
@ -986,7 +986,8 @@ private[hive] object HiveClientImpl extends Logging {
|
|||
val typeString = if (c.metadata.contains(HIVE_TYPE_STRING)) {
|
||||
c.metadata.getString(HIVE_TYPE_STRING)
|
||||
} else {
|
||||
c.dataType.catalogString
|
||||
// replace NullType to HiveVoidType since Hive parse void not null.
|
||||
HiveVoidType.replaceVoidType(c.dataType).catalogString
|
||||
}
|
||||
new FieldSchema(c.name, typeString, c.getComment().orNull)
|
||||
}
|
||||
|
@ -1005,7 +1006,8 @@ private[hive] object HiveClientImpl extends Logging {
|
|||
/** Builds the native StructField from Hive's FieldSchema. */
|
||||
def fromHiveColumn(hc: FieldSchema): StructField = {
|
||||
val columnType = getSparkSQLDataType(hc)
|
||||
val metadata = if (hc.getType != columnType.catalogString) {
|
||||
val replacedVoidType = HiveVoidType.replaceVoidType(columnType)
|
||||
val metadata = if (hc.getType != replacedVoidType.catalogString) {
|
||||
new MetadataBuilder().putString(HIVE_TYPE_STRING, hc.getType).build()
|
||||
} else {
|
||||
Metadata.empty
|
||||
|
@ -1273,3 +1275,22 @@ private[hive] object HiveClientImpl extends Logging {
|
|||
hiveConf
|
||||
}
|
||||
}
|
||||
|
||||
case object HiveVoidType extends DataType {
|
||||
override def defaultSize: Int = 1
|
||||
override def asNullable: DataType = HiveVoidType
|
||||
override def simpleString: String = "void"
|
||||
|
||||
def replaceVoidType(dt: DataType): DataType = dt match {
|
||||
case ArrayType(et, nullable) =>
|
||||
ArrayType(replaceVoidType(et), nullable)
|
||||
case MapType(kt, vt, nullable) =>
|
||||
MapType(replaceVoidType(kt), replaceVoidType(vt), nullable)
|
||||
case StructType(fields) =>
|
||||
StructType(fields.map { field =>
|
||||
field.copy(dataType = replaceVoidType(field.dataType))
|
||||
})
|
||||
case _: NullType => HiveVoidType
|
||||
case _ => dt
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
|
|||
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
|
||||
import org.apache.spark.sql.execution.SQLViewSuite
|
||||
import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
|
||||
import org.apache.spark.sql.types.StructType
|
||||
import org.apache.spark.sql.types.{NullType, StructType}
|
||||
|
||||
/**
|
||||
* A test suite for Hive view related functionality.
|
||||
|
@ -137,4 +137,24 @@ class HiveSQLViewSuite extends SQLViewSuite with TestHiveSingleton {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-20680: Add HiveVoidType to compatible with Hive void type") {
|
||||
withView("v1") {
|
||||
sql("create view v1 as select null as c")
|
||||
val df = sql("select * from v1")
|
||||
assert(df.schema.fields.head.dataType == NullType)
|
||||
checkAnswer(
|
||||
df,
|
||||
Row(null)
|
||||
)
|
||||
|
||||
sql("alter view v1 as select null as c1, 1 as c2")
|
||||
val df2 = sql("select * from v1")
|
||||
assert(df2.schema.fields.head.dataType == NullType)
|
||||
checkAnswer(
|
||||
df2,
|
||||
Row(null, 1)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue