[SPARK-35975][SQL] New configuration spark.sql.timestampType for the default timestamp type

### What changes were proposed in this pull request? Add a new configuration `spark.sql.timestampType`, which configures the default timestamp type of Spark SQL, including SQL DDL and Cast clause. Setting the configuration as `TIMESTAMP_NTZ` will use `TIMESTAMP WITHOUT TIME ZONE` as the default type while putting it as `TIMESTAMP_LTZ` will use `TIMESTAMP WITH LOCAL TIME ZONE`. The default value of the new configuration is TIMESTAMP_LTZ, which is consistent with previous Spark releases. ### Why are the changes needed? A new configuration for switching the default timestamp type as timestamp without time zone. ### Does this PR introduce _any_ user-facing change? No, it's a new feature. ### How was this patch tested? Unit test Closes #33176 from gengliangwang/newTsTypeConf. Authored-by: Gengliang Wang <gengliang@apache.org> Signed-off-by: Max Gekk <max.gekk@gmail.com>
2021-07-01 23:25:18 +03:00 · 2021-07-01 23:25:18 +03:00 · a643076d4e
parent ca1217667c
commit a643076d4e
3 changed files with 42 additions and 2 deletions
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@ -2502,7 +2502,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
      case ("float" | "real", Nil) => FloatType
      case ("double", Nil) => DoubleType
      case ("date", Nil) => DateType
-      case ("timestamp", Nil) => TimestampType
+      case ("timestamp", Nil) => SQLConf.get.timestampType
      case ("string", Nil) => StringType
      case ("character" | "char", length :: Nil) => CharType(length.getText.toInt)
      case ("varchar", length :: Nil) => VarcharType(length.getText.toInt)
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@ -44,6 +44,7 @@ import org.apache.spark.sql.catalyst.plans.logical.HintErrorHandler
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.types.{AtomicType, TimestampNTZType, TimestampType}
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.util.Utils

@ -2820,6 +2821,24 @@ object SQLConf {
      .booleanConf
      .createWithDefault(true)

+  object TimestampTypes extends Enumeration {
+    val TIMESTAMP_NTZ, TIMESTAMP_LTZ = Value
+  }
+
+  val TIMESTAMP_TYPE =
+    buildConf("spark.sql.timestampType")
+      .doc("Configures the default timestamp type of Spark SQL, including SQL DDL and Cast " +
+        s"clause. Setting the configuration as ${TimestampTypes.TIMESTAMP_NTZ.toString} will " +
+        "use TIMESTAMP WITHOUT TIME ZONE as the default type while putting it as " +
+        s"${TimestampTypes.TIMESTAMP_LTZ.toString} will use TIMESTAMP WITH LOCAL TIME ZONE. " +
+        "Before the 3.2.0 release, Spark only supports the TIMESTAMP WITH " +
+        "LOCAL TIME ZONE type.")
+      .version("3.2.0")
+      .stringConf
+      .transform(_.toUpperCase(Locale.ROOT))
+      .checkValues(TimestampTypes.values.map(_.toString))
+      .createWithDefault(TimestampTypes.TIMESTAMP_LTZ.toString)
+
  val DATETIME_JAVA8API_ENABLED = buildConf("spark.sql.datetime.java8API.enabled")
    .doc("If the configuration property is set to true, java.time.Instant and " +
      "java.time.LocalDate classes of Java 8 API are used as external types for " +
@ -3897,6 +3916,15 @@ class SQLConf extends Serializable with Logging {

  def ansiEnabled: Boolean = getConf(ANSI_ENABLED)

+  def timestampType: AtomicType = getConf(TIMESTAMP_TYPE) match {
+    case "TIMESTAMP_LTZ" =>
+      // For historical reason, the TimestampType maps to TIMESTAMP WITH LOCAL TIME ZONE
+      TimestampType
+
+    case "TIMESTAMP_NTZ" =>
+      TimestampNTZType
+  }
+
  def nestedSchemaPruningEnabled: Boolean = getConf(NESTED_SCHEMA_PRUNING_ENABLED)

  def serializerNestedSchemaPruningEnabled: Boolean =
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@ -18,9 +18,12 @@
 package org.apache.spark.sql.catalyst.parser

 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.TimestampTypes
 import org.apache.spark.sql.types._

-class DataTypeParserSuite extends SparkFunSuite {
+class DataTypeParserSuite extends SparkFunSuite with SQLHelper {

  def parse(sql: String): DataType = CatalystSqlParser.parseDataType(sql)

@ -135,6 +138,15 @@ class DataTypeParserSuite extends SparkFunSuite {
    assert(intercept("unknown(1,2,3)").getMessage.contains("unknown(1,2,3) is not supported"))
  }

+  test("Set default timestamp type") {
+    withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> TimestampTypes.TIMESTAMP_NTZ.toString) {
+      assert(parse("timestamp") === TimestampNTZType)
+    }
+    withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> TimestampTypes.TIMESTAMP_LTZ.toString) {
+      assert(parse("timestamp") === TimestampType)
+    }
+  }
+
  // DataType parser accepts certain reserved keywords.
  checkDataType(
    "Struct<TABLE: string, DATE:boolean>",