[SPARK-35017][SQL] Transfer ANSI intervals via Hive Thrift server

### What changes were proposed in this pull request?
1. Map Catalyst's interval types to Hive's types:
    - YearMonthIntervalType -> `interval_year_month`
    - DayTimeIntervalType -> `interval_day_time`
2. Invoke `HiveResult.toHiveString()` to convert external intervals types ` java.time.Period`/`java.time.Duration` to strings.

### Why are the changes needed?
1. To be able to retrieve ANSI intervals via Hive Thrift server.
2. This fixes the issue:
```sql
 $ ./sbin/start-thriftserver.sh
 $ ./bin/beeline
Beeline version 2.3.8 by Apache Hive
beeline> !connect jdbc:hive2://localhost:10000/default "" "" ""
Connecting to jdbc:hive2://localhost:10000/default
Connected to: Spark SQL (version 3.2.0-SNAPSHOT)
0: jdbc:hive2://localhost:10000/default> select timestamp'2021-01-01 01:02:03.000001' - date'2020-12-31';
Error: java.lang.IllegalArgumentException: Unrecognized type name: day-time interval (state=,code=0)
```
3. It should unblock https://github.com/apache/spark/pull/32099 which enables `*.sql` tests in `ThriftServerQueryTestSuite`.

### Does this PR introduce _any_ user-facing change?
Yes. After the changes:
```sql
0: jdbc:hive2://localhost:10000/default> select timestamp'2021-01-01 01:02:03.000001' - date'2020-12-31';
+----------------------------------------------------+
| subtracttimestamps(TIMESTAMP '2021-01-01 01:02:03.000001', DATE '2020-12-31') |
+----------------------------------------------------+
| 1 01:02:03.000001000                               |
+----------------------------------------------------+
1 row selected (1.637 seconds)
```

### How was this patch tested?
By running new test:
```
$ ./build/sbt -Phive -Phive-thriftserver "test:testOnly *SparkThriftServerProtocolVersionsSuite"
$ ./build/sbt -Phive -Phive-thriftserver "test:testOnly *SparkMetadataOperationSuite"
```
Also checked an array of an interval:
```sql
0: jdbc:hive2://localhost:10000/default> select array(timestamp'2021-01-01 01:02:03.000001' - date'2020-12-31');
+----------------------------------------------------+
| array(subtracttimestamps(TIMESTAMP '2021-01-01 01:02:03.000001', DATE '2020-12-31')) |
+----------------------------------------------------+
| [1 01:02:03.000001000]                             |
+----------------------------------------------------+
```

Closes #32121 from MaxGekk/ansi-intervals-thrift-protocol.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
Max Gekk 2021-04-12 11:56:10 +09:00 committed by HyukjinKwon
parent d6df84e734
commit 90820b3ec3
4 changed files with 21 additions and 6 deletions

View file

@ -168,10 +168,6 @@ public class ColumnValue {
case TIMESTAMP_TYPE:
// SPARK-31859, SPARK-31861: converted to string already in SparkExecuteStatementOperation
return stringValue((String)value);
case INTERVAL_YEAR_MONTH_TYPE:
return stringValue((HiveIntervalYearMonth) value);
case INTERVAL_DAY_TIME_TYPE:
return stringValue((HiveIntervalDayTime) value);
case DECIMAL_TYPE:
String plainStr = value == null ? null : ((BigDecimal)value).toPlainString();
return stringValue(plainStr);
@ -183,6 +179,8 @@ public class ColumnValue {
case STRUCT_TYPE:
case UNION_TYPE:
case USER_DEFINED_TYPE:
case INTERVAL_YEAR_MONTH_TYPE:
case INTERVAL_DAY_TIME_TYPE:
return stringValue((String)value);
case NULL_TYPE:
return stringValue((String)value);

View file

@ -120,7 +120,8 @@ private[hive] class SparkExecuteStatementOperation(
(from.getAs[CalendarInterval](ordinal), CalendarIntervalType),
false,
timeFormatters)
case _: ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] =>
case _: ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] |
YearMonthIntervalType | DayTimeIntervalType =>
to += toHiveString((from.get(ordinal), dataTypes(ordinal)), false, timeFormatters)
}
}
@ -377,6 +378,8 @@ object SparkExecuteStatementOperation {
val attrTypeString = field.dataType match {
case NullType => "void"
case CalendarIntervalType => StringType.catalogString
case YearMonthIntervalType => "interval_year_month"
case DayTimeIntervalType => "interval_day_time"
case other => other.catalogString
}
new FieldSchema(field.name, attrTypeString, field.getComment.getOrElse(""))

View file

@ -99,6 +99,7 @@ private[hive] object SparkGetTypeInfoUtil {
TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE,
FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE,
DATE_TYPE, TIMESTAMP_TYPE,
ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE, CHAR_TYPE, VARCHAR_TYPE)
ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE, CHAR_TYPE, VARCHAR_TYPE,
INTERVAL_YEAR_MONTH_TYPE, INTERVAL_DAY_TIME_TYPE)
}
}

View file

@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.thriftserver
import java.sql.{Date, Timestamp}
import java.util.{List => JList, Properties}
import org.apache.hadoop.hive.common.`type`.HiveIntervalDayTime
import org.apache.hive.jdbc.{HiveConnection, HiveQueryResultSet}
import org.apache.hive.service.auth.PlainSaslHelper
import org.apache.hive.service.cli.GetInfoType
@ -458,5 +459,17 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftServer2TestBase {
}
}
}
test(s"SPARK-35017: $version get day-time interval type") {
testExecuteStatementWithProtocolVersion(
version, "SELECT date'2021-01-01' - date'2020-12-31' AS dt") { rs =>
assert(rs.next())
assert(rs.getObject(1) === new HiveIntervalDayTime(1, 0, 0, 0, 0))
val metaData = rs.getMetaData
assert(metaData.getColumnName(1) === "dt")
assert(metaData.getColumnTypeName(1) === "interval_day_time")
assert(metaData.getColumnType(1) === java.sql.Types.OTHER)
}
}
}
}