From 33f26275f4d65f54e68f38ba0d795396a5a4d2f4 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 7 Jun 2021 14:21:31 +0000 Subject: [PATCH] [SPARK-35663][SQL] Add Timestamp without time zone type ### What changes were proposed in this pull request? Extend Catalyst's type system by a new type that conforms to the SQL standard (see SQL:2016, section 4.6.2): TimestampWithoutTZType represents the timestamp without time zone type ### Why are the changes needed? Spark SQL today supports the TIMESTAMP data type. However the semantics provided actually match TIMESTAMP WITH LOCAL TIMEZONE as defined by Oracle. Timestamps embedded in a SQL query or passed through JDBC are presumed to be in session local timezone and cast to UTC before being processed. These are desirable semantics in many cases, such as when dealing with calendars. In many (more) other cases, such as when dealing with log files it is desirable that the provided timestamps not be altered. SQL users expect that they can model either behavior and do so by using TIMESTAMP WITHOUT TIME ZONE for time zone insensitive data and TIMESTAMP WITH LOCAL TIME ZONE for time zone sensitive data. Most traditional RDBMS map TIMESTAMP to TIMESTAMP WITHOUT TIME ZONE and will be surprised to see TIMESTAMP WITH LOCAL TIME ZONE, a feature that does not exist in the standard. In this new feature, we will introduce TIMESTAMP WITH LOCAL TIMEZONE to describe the existing timestamp type and add TIMESTAMP WITHOUT TIME ZONE for standard semantic. Using these two types will provide clarity. This is a starting PR. See more details in https://issues.apache.org/jira/browse/SPARK-35662 ### Does this PR introduce _any_ user-facing change? Yes, a new data type for Timestamp without time zone type. It is still in development. ### How was this patch tested? Unit test Closes #32802 from gengliangwang/TimestampNTZType. Authored-by: Gengliang Wang Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/types/DataTypes.java | 5 ++ .../sql/types/TimestampWithoutTZType.scala | 63 +++++++++++++++++++ .../spark/sql/types/DataTypeSuite.scala | 1 + 3 files changed, 69 insertions(+) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampWithoutTZType.scala diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java b/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java index eac0d91acd..00bf42bc00 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java @@ -54,6 +54,11 @@ public class DataTypes { */ public static final DataType TimestampType = TimestampType$.MODULE$; + /** + * Gets the TimestampWithoutTZType object. + */ + public static final DataType TimestampWithoutTZType = TimestampWithoutTZType$.MODULE$; + /** * Gets the CalendarIntervalType object. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampWithoutTZType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampWithoutTZType.scala new file mode 100644 index 0000000000..558f5ee94a --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampWithoutTZType.scala @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.types + +import scala.math.Ordering +import scala.reflect.runtime.universe.typeTag + +import org.apache.spark.annotation.Unstable + +/** + * The timestamp without time zone type represents a local time in microsecond precision, + * which is independent of time zone. + * Its valid range is [0001-01-01T00:00:00.000000, 9999-12-31T23:59:59.999999]. + * To represent an absolute point in time, use `TimestampType` instead. + * + * Please use the singleton `DataTypes.TimestampWithoutTZType` to refer the type. + * @since 3.2.0 + */ +@Unstable +class TimestampWithoutTZType private() extends AtomicType { + /** + * Internally, a timestamp is stored as the number of microseconds from + * the epoch of 1970-01-01T00:00:00.000000(Unix system time zero) + */ + private[sql] type InternalType = Long + + @transient private[sql] lazy val tag = typeTag[InternalType] + + private[sql] val ordering = implicitly[Ordering[InternalType]] + + /** + * The default size of a value of the TimestampWithoutTZType is 8 bytes. + */ + override def defaultSize: Int = 8 + + private[spark] override def asNullable: TimestampWithoutTZType = this +} + +/** + * The companion case object and its class is separated so the companion object also subclasses + * the TimestampWithoutTZType class. Otherwise, the companion object would be of type + * "TimestampWithoutTZType" in byte code. Defined with a private constructor so the companion + * object is the only possible instantiation. + * + * @since 3.2.0 + */ +@Unstable +case object TimestampWithoutTZType extends TimestampWithoutTZType diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index c7bd57ef04..e78a426f7d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -312,6 +312,7 @@ class DataTypeSuite extends SparkFunSuite { checkDefaultSize(DecimalType.SYSTEM_DEFAULT, 16) checkDefaultSize(DateType, 4) checkDefaultSize(TimestampType, 8) + checkDefaultSize(TimestampWithoutTZType, 8) checkDefaultSize(StringType, 20) checkDefaultSize(BinaryType, 100) checkDefaultSize(ArrayType(DoubleType, true), 8)