[SPARK-20680][SQL][FOLLOW-UP] Add HiveVoidType in HiveClientImpl

### What changes were proposed in this pull request? Discussion with [comment](https://github.com/apache/spark/pull/29244#issuecomment-671746329). Add `HiveVoidType` class in `HiveClientImpl` then we can replace `NullType` to `HiveVoidType` before we call hive client. ### Why are the changes needed? Better compatible with hive. More details in [#29244](https://github.com/apache/spark/pull/29244). ### Does this PR introduce _any_ user-facing change? Yes, user can create view with null type in Hive. ### How was this patch tested? New test. Closes #29423 from ulysses-you/add-HiveVoidType. Authored-by: ulysses <youxiduo@weidian.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
2020-08-14 06:59:15 +00:00 · 2020-08-14 06:59:15 +00:00 · 339eec5f32
parent 5debde9401
commit 339eec5f32
3 changed files with 45 additions and 4 deletions
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@ -723,7 +723,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {

  test("sparkSession API view resolution with different default database") {
    withDatabase("db2") {
-      withView("v1") {
+      withView("default.v1") {
        withTable("t1") {
          sql("USE default")
          sql("CREATE TABLE t1 USING parquet AS SELECT 1 AS c0")
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@ -986,7 +986,8 @@ private[hive] object HiveClientImpl extends Logging {
    val typeString = if (c.metadata.contains(HIVE_TYPE_STRING)) {
      c.metadata.getString(HIVE_TYPE_STRING)
    } else {
-      c.dataType.catalogString
+      // replace NullType to HiveVoidType since Hive parse void not null.
+      HiveVoidType.replaceVoidType(c.dataType).catalogString
    }
    new FieldSchema(c.name, typeString, c.getComment().orNull)
  }
@ -1005,7 +1006,8 @@ private[hive] object HiveClientImpl extends Logging {
  /** Builds the native StructField from Hive's FieldSchema. */
  def fromHiveColumn(hc: FieldSchema): StructField = {
    val columnType = getSparkSQLDataType(hc)
-    val metadata = if (hc.getType != columnType.catalogString) {
+    val replacedVoidType = HiveVoidType.replaceVoidType(columnType)
+    val metadata = if (hc.getType != replacedVoidType.catalogString) {
      new MetadataBuilder().putString(HIVE_TYPE_STRING, hc.getType).build()
    } else {
      Metadata.empty
@ -1273,3 +1275,22 @@ private[hive] object HiveClientImpl extends Logging {
    hiveConf
  }
 }
+
+case object HiveVoidType extends DataType {
+  override def defaultSize: Int = 1
+  override def asNullable: DataType = HiveVoidType
+  override def simpleString: String = "void"
+
+  def replaceVoidType(dt: DataType): DataType = dt match {
+    case ArrayType(et, nullable) =>
+      ArrayType(replaceVoidType(et), nullable)
+    case MapType(kt, vt, nullable) =>
+      MapType(replaceVoidType(kt), replaceVoidType(vt), nullable)
+    case StructType(fields) =>
+      StructType(fields.map { field =>
+        field.copy(dataType = replaceVoidType(field.dataType))
+      })
+    case _: NullType => HiveVoidType
+    case _ => dt
+  }
+}
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.execution.SQLViewSuite
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{NullType, StructType}

 /**
 * A test suite for Hive view related functionality.
@ -137,4 +137,24 @@ class HiveSQLViewSuite extends SQLViewSuite with TestHiveSingleton {
      }
    }
  }
+
+  test("SPARK-20680: Add HiveVoidType to compatible with Hive void type") {
+    withView("v1") {
+      sql("create view v1 as select null as c")
+      val df = sql("select * from v1")
+      assert(df.schema.fields.head.dataType == NullType)
+      checkAnswer(
+        df,
+        Row(null)
+      )
+
+      sql("alter view v1 as select null as c1, 1 as c2")
+      val df2 = sql("select * from v1")
+      assert(df2.schema.fields.head.dataType == NullType)
+      checkAnswer(
+        df2,
+        Row(null, 1)
+      )
+    }
+  }
 }