[SPARK-35017][SQL] Transfer ANSI intervals via Hive Thrift server
### What changes were proposed in this pull request? 1. Map Catalyst's interval types to Hive's types: - YearMonthIntervalType -> `interval_year_month` - DayTimeIntervalType -> `interval_day_time` 2. Invoke `HiveResult.toHiveString()` to convert external intervals types ` java.time.Period`/`java.time.Duration` to strings. ### Why are the changes needed? 1. To be able to retrieve ANSI intervals via Hive Thrift server. 2. This fixes the issue: ```sql $ ./sbin/start-thriftserver.sh $ ./bin/beeline Beeline version 2.3.8 by Apache Hive beeline> !connect jdbc:hive2://localhost:10000/default "" "" "" Connecting to jdbc:hive2://localhost:10000/default Connected to: Spark SQL (version 3.2.0-SNAPSHOT) 0: jdbc:hive2://localhost:10000/default> select timestamp'2021-01-01 01:02:03.000001' - date'2020-12-31'; Error: java.lang.IllegalArgumentException: Unrecognized type name: day-time interval (state=,code=0) ``` 3. It should unblock https://github.com/apache/spark/pull/32099 which enables `*.sql` tests in `ThriftServerQueryTestSuite`. ### Does this PR introduce _any_ user-facing change? Yes. After the changes: ```sql 0: jdbc:hive2://localhost:10000/default> select timestamp'2021-01-01 01:02:03.000001' - date'2020-12-31'; +----------------------------------------------------+ | subtracttimestamps(TIMESTAMP '2021-01-01 01:02:03.000001', DATE '2020-12-31') | +----------------------------------------------------+ | 1 01:02:03.000001000 | +----------------------------------------------------+ 1 row selected (1.637 seconds) ``` ### How was this patch tested? By running new test: ``` $ ./build/sbt -Phive -Phive-thriftserver "test:testOnly *SparkThriftServerProtocolVersionsSuite" $ ./build/sbt -Phive -Phive-thriftserver "test:testOnly *SparkMetadataOperationSuite" ``` Also checked an array of an interval: ```sql 0: jdbc:hive2://localhost:10000/default> select array(timestamp'2021-01-01 01:02:03.000001' - date'2020-12-31'); +----------------------------------------------------+ | array(subtracttimestamps(TIMESTAMP '2021-01-01 01:02:03.000001', DATE '2020-12-31')) | +----------------------------------------------------+ | [1 01:02:03.000001000] | +----------------------------------------------------+ ``` Closes #32121 from MaxGekk/ansi-intervals-thrift-protocol. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
d6df84e734
commit
90820b3ec3
|
@ -168,10 +168,6 @@ public class ColumnValue {
|
|||
case TIMESTAMP_TYPE:
|
||||
// SPARK-31859, SPARK-31861: converted to string already in SparkExecuteStatementOperation
|
||||
return stringValue((String)value);
|
||||
case INTERVAL_YEAR_MONTH_TYPE:
|
||||
return stringValue((HiveIntervalYearMonth) value);
|
||||
case INTERVAL_DAY_TIME_TYPE:
|
||||
return stringValue((HiveIntervalDayTime) value);
|
||||
case DECIMAL_TYPE:
|
||||
String plainStr = value == null ? null : ((BigDecimal)value).toPlainString();
|
||||
return stringValue(plainStr);
|
||||
|
@ -183,6 +179,8 @@ public class ColumnValue {
|
|||
case STRUCT_TYPE:
|
||||
case UNION_TYPE:
|
||||
case USER_DEFINED_TYPE:
|
||||
case INTERVAL_YEAR_MONTH_TYPE:
|
||||
case INTERVAL_DAY_TIME_TYPE:
|
||||
return stringValue((String)value);
|
||||
case NULL_TYPE:
|
||||
return stringValue((String)value);
|
||||
|
|
|
@ -120,7 +120,8 @@ private[hive] class SparkExecuteStatementOperation(
|
|||
(from.getAs[CalendarInterval](ordinal), CalendarIntervalType),
|
||||
false,
|
||||
timeFormatters)
|
||||
case _: ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] =>
|
||||
case _: ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] |
|
||||
YearMonthIntervalType | DayTimeIntervalType =>
|
||||
to += toHiveString((from.get(ordinal), dataTypes(ordinal)), false, timeFormatters)
|
||||
}
|
||||
}
|
||||
|
@ -377,6 +378,8 @@ object SparkExecuteStatementOperation {
|
|||
val attrTypeString = field.dataType match {
|
||||
case NullType => "void"
|
||||
case CalendarIntervalType => StringType.catalogString
|
||||
case YearMonthIntervalType => "interval_year_month"
|
||||
case DayTimeIntervalType => "interval_day_time"
|
||||
case other => other.catalogString
|
||||
}
|
||||
new FieldSchema(field.name, attrTypeString, field.getComment.getOrElse(""))
|
||||
|
|
|
@ -99,6 +99,7 @@ private[hive] object SparkGetTypeInfoUtil {
|
|||
TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE,
|
||||
FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE,
|
||||
DATE_TYPE, TIMESTAMP_TYPE,
|
||||
ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE, CHAR_TYPE, VARCHAR_TYPE)
|
||||
ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE, CHAR_TYPE, VARCHAR_TYPE,
|
||||
INTERVAL_YEAR_MONTH_TYPE, INTERVAL_DAY_TIME_TYPE)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.thriftserver
|
|||
import java.sql.{Date, Timestamp}
|
||||
import java.util.{List => JList, Properties}
|
||||
|
||||
import org.apache.hadoop.hive.common.`type`.HiveIntervalDayTime
|
||||
import org.apache.hive.jdbc.{HiveConnection, HiveQueryResultSet}
|
||||
import org.apache.hive.service.auth.PlainSaslHelper
|
||||
import org.apache.hive.service.cli.GetInfoType
|
||||
|
@ -458,5 +459,17 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftServer2TestBase {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
test(s"SPARK-35017: $version get day-time interval type") {
|
||||
testExecuteStatementWithProtocolVersion(
|
||||
version, "SELECT date'2021-01-01' - date'2020-12-31' AS dt") { rs =>
|
||||
assert(rs.next())
|
||||
assert(rs.getObject(1) === new HiveIntervalDayTime(1, 0, 0, 0, 0))
|
||||
val metaData = rs.getMetaData
|
||||
assert(metaData.getColumnName(1) === "dt")
|
||||
assert(metaData.getColumnTypeName(1) === "interval_day_time")
|
||||
assert(metaData.getColumnType(1) === java.sql.Types.OTHER)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue