[SPARK-29679][SQL] Make interval type comparable and orderable
### What changes were proposed in this pull request? interval type support >, >=, <, <=, =, <=>, order by, min,max.. ### Why are the changes needed? Part of SPARK-27764 Feature Parity between PostgreSQL and Spark ### Does this PR introduce any user-facing change? yes, we now support compare intervals ### How was this patch tested? add ut Closes #26337 from yaooqinn/SPARK-29679. Authored-by: Kent Yao <yaooqinn@hotmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
e7f7990bc3
commit
e026412d9c
|
@ -29,7 +29,7 @@ import static org.apache.spark.sql.catalyst.util.DateTimeConstants.*;
|
|||
/**
|
||||
* The internal representation of interval type.
|
||||
*/
|
||||
public final class CalendarInterval implements Serializable {
|
||||
public final class CalendarInterval implements Serializable, Comparable<CalendarInterval> {
|
||||
public final int months;
|
||||
public final int days;
|
||||
public final long microseconds;
|
||||
|
@ -55,6 +55,29 @@ public final class CalendarInterval implements Serializable {
|
|||
return Objects.hash(months, days, microseconds);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(CalendarInterval that) {
|
||||
long thisAdjustDays =
|
||||
this.microseconds / MICROS_PER_DAY + this.days + this.months * DAYS_PER_MONTH;
|
||||
long thatAdjustDays =
|
||||
that.microseconds / MICROS_PER_DAY + that.days + that.months * DAYS_PER_MONTH;
|
||||
long daysDiff = thisAdjustDays - thatAdjustDays;
|
||||
if (daysDiff == 0) {
|
||||
long msDiff = (this.microseconds % MICROS_PER_DAY) - (that.microseconds % MICROS_PER_DAY);
|
||||
if (msDiff == 0) {
|
||||
return 0;
|
||||
} else if (msDiff > 0) {
|
||||
return 1;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if (daysDiff > 0){
|
||||
return 1;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (months == 0 && days == 0 && microseconds == 0) {
|
||||
|
|
|
@ -855,6 +855,11 @@ object TypeCoercion {
|
|||
case Divide(l @ CalendarIntervalType(), r @ NumericType()) =>
|
||||
DivideInterval(l, r)
|
||||
|
||||
case b @ BinaryOperator(l @ CalendarIntervalType(), r @ NullType()) =>
|
||||
b.withNewChildren(Seq(l, Cast(r, CalendarIntervalType)))
|
||||
case b @ BinaryOperator(l @ NullType(), r @ CalendarIntervalType()) =>
|
||||
b.withNewChildren(Seq(Cast(l, CalendarIntervalType), r))
|
||||
|
||||
case Add(l @ DateType(), r @ IntegerType()) => DateAdd(l, r)
|
||||
case Add(l @ IntegerType(), r @ DateType()) => DateAdd(r, l)
|
||||
case Subtract(l @ DateType(), r @ IntegerType()) => DateSub(l, r)
|
||||
|
|
|
@ -629,6 +629,7 @@ class CodegenContext extends Logging {
|
|||
// use c1 - c2 may overflow
|
||||
case dt: DataType if isPrimitiveType(dt) => s"($c1 > $c2 ? 1 : $c1 < $c2 ? -1 : 0)"
|
||||
case BinaryType => s"org.apache.spark.sql.catalyst.util.TypeUtils.compareBinary($c1, $c2)"
|
||||
case CalendarIntervalType => s"$c1.compareTo($c2)"
|
||||
case NullType => "0"
|
||||
case array: ArrayType =>
|
||||
val elementType = array.elementType
|
||||
|
|
|
@ -91,6 +91,7 @@ object RowOrdering {
|
|||
def isOrderable(dataType: DataType): Boolean = dataType match {
|
||||
case NullType => true
|
||||
case dt: AtomicType => true
|
||||
case CalendarIntervalType => true
|
||||
case struct: StructType => struct.fields.forall(f => isOrderable(f.dataType))
|
||||
case array: ArrayType => isOrderable(array.elementType)
|
||||
case udt: UserDefinedType[_] => isOrderable(udt.sqlType)
|
||||
|
|
|
@ -71,6 +71,7 @@ object TypeUtils {
|
|||
def getInterpretedOrdering(t: DataType): Ordering[Any] = {
|
||||
t match {
|
||||
case i: AtomicType => i.ordering.asInstanceOf[Ordering[Any]]
|
||||
case c: CalendarIntervalType => c.ordering.asInstanceOf[Ordering[Any]]
|
||||
case a: ArrayType => a.interpretedOrdering.asInstanceOf[Ordering[Any]]
|
||||
case s: StructType => s.interpretedOrdering.asInstanceOf[Ordering[Any]]
|
||||
case udt: UserDefinedType[_] => getInterpretedOrdering(udt.sqlType)
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.spark.sql.types
|
||||
|
||||
import org.apache.spark.annotation.Stable
|
||||
import org.apache.spark.unsafe.types.CalendarInterval
|
||||
|
||||
/**
|
||||
* The data type representing calendar time intervals. The calendar time interval is stored
|
||||
|
@ -36,6 +37,8 @@ class CalendarIntervalType private() extends DataType {
|
|||
|
||||
override def simpleString: String = "interval"
|
||||
|
||||
val ordering: Ordering[CalendarInterval] = Ordering[CalendarInterval]
|
||||
|
||||
private[spark] override def asNullable: CalendarIntervalType = this
|
||||
}
|
||||
|
||||
|
|
43
sql/core/src/test/resources/sql-tests/inputs/interval.sql
Normal file
43
sql/core/src/test/resources/sql-tests/inputs/interval.sql
Normal file
|
@ -0,0 +1,43 @@
|
|||
-- test for intervals
|
||||
|
||||
-- greater than or equal
|
||||
select interval '1 day' > interval '23 hour';
|
||||
select interval '-1 day' >= interval '-23 hour';
|
||||
select interval '-1 day' > null;
|
||||
select null > interval '-1 day';
|
||||
|
||||
-- less than or equal
|
||||
select interval '1 minutes' < interval '1 hour';
|
||||
select interval '-1 day' <= interval '-23 hour';
|
||||
|
||||
-- equal
|
||||
select interval '1 year' = interval '360 days';
|
||||
select interval '1 year 2 month' = interval '420 days';
|
||||
select interval '1 year' = interval '365 days';
|
||||
select interval '1 month' = interval '30 days';
|
||||
select interval '1 minutes' = interval '1 hour';
|
||||
select interval '1 minutes' = null;
|
||||
select null = interval '-1 day';
|
||||
|
||||
-- null safe equal
|
||||
select interval '1 minutes' <=> null;
|
||||
select null <=> interval '1 minutes';
|
||||
|
||||
-- complex interval representation
|
||||
select INTERVAL '9 years 1 months -1 weeks -4 days -10 hours -46 minutes' > interval '1 minutes';
|
||||
|
||||
-- ordering
|
||||
select cast(v as interval) i from VALUES ('1 seconds'), ('4 seconds'), ('3 seconds') t(v) order by i;
|
||||
|
||||
-- unlimited days
|
||||
select interval '1 month 120 days' > interval '2 month';
|
||||
select interval '1 month 30 days' = interval '2 month';
|
||||
|
||||
-- unlimited microseconds
|
||||
select interval '1 month 29 days 40 hours' > interval '2 month';
|
||||
|
||||
-- max
|
||||
select max(cast(v as interval)) from VALUES ('1 seconds'), ('4 seconds'), ('3 seconds') t(v);
|
||||
|
||||
-- min
|
||||
select min(cast(v as interval)) from VALUES ('1 seconds'), ('4 seconds'), ('3 seconds') t(v);
|
180
sql/core/src/test/resources/sql-tests/results/interval.sql.out
Normal file
180
sql/core/src/test/resources/sql-tests/results/interval.sql.out
Normal file
|
@ -0,0 +1,180 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 22
|
||||
|
||||
|
||||
-- !query 0
|
||||
select interval '1 day' > interval '23 hour'
|
||||
-- !query 0 schema
|
||||
struct<(1 days > 23 hours):boolean>
|
||||
-- !query 0 output
|
||||
true
|
||||
|
||||
|
||||
-- !query 1
|
||||
select interval '-1 day' >= interval '-23 hour'
|
||||
-- !query 1 schema
|
||||
struct<(-1 days >= -23 hours):boolean>
|
||||
-- !query 1 output
|
||||
false
|
||||
|
||||
|
||||
-- !query 2
|
||||
select interval '-1 day' > null
|
||||
-- !query 2 schema
|
||||
struct<(-1 days > CAST(NULL AS INTERVAL)):boolean>
|
||||
-- !query 2 output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query 3
|
||||
select null > interval '-1 day'
|
||||
-- !query 3 schema
|
||||
struct<(CAST(NULL AS INTERVAL) > -1 days):boolean>
|
||||
-- !query 3 output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query 4
|
||||
select interval '1 minutes' < interval '1 hour'
|
||||
-- !query 4 schema
|
||||
struct<(1 minutes < 1 hours):boolean>
|
||||
-- !query 4 output
|
||||
true
|
||||
|
||||
|
||||
-- !query 5
|
||||
select interval '-1 day' <= interval '-23 hour'
|
||||
-- !query 5 schema
|
||||
struct<(-1 days <= -23 hours):boolean>
|
||||
-- !query 5 output
|
||||
true
|
||||
|
||||
|
||||
-- !query 6
|
||||
select interval '1 year' = interval '360 days'
|
||||
-- !query 6 schema
|
||||
struct<(1 years = 360 days):boolean>
|
||||
-- !query 6 output
|
||||
true
|
||||
|
||||
|
||||
-- !query 7
|
||||
select interval '1 year 2 month' = interval '420 days'
|
||||
-- !query 7 schema
|
||||
struct<(1 years 2 months = 420 days):boolean>
|
||||
-- !query 7 output
|
||||
true
|
||||
|
||||
|
||||
-- !query 8
|
||||
select interval '1 year' = interval '365 days'
|
||||
-- !query 8 schema
|
||||
struct<(1 years = 365 days):boolean>
|
||||
-- !query 8 output
|
||||
false
|
||||
|
||||
|
||||
-- !query 9
|
||||
select interval '1 month' = interval '30 days'
|
||||
-- !query 9 schema
|
||||
struct<(1 months = 30 days):boolean>
|
||||
-- !query 9 output
|
||||
true
|
||||
|
||||
|
||||
-- !query 10
|
||||
select interval '1 minutes' = interval '1 hour'
|
||||
-- !query 10 schema
|
||||
struct<(1 minutes = 1 hours):boolean>
|
||||
-- !query 10 output
|
||||
false
|
||||
|
||||
|
||||
-- !query 11
|
||||
select interval '1 minutes' = null
|
||||
-- !query 11 schema
|
||||
struct<(1 minutes = CAST(NULL AS INTERVAL)):boolean>
|
||||
-- !query 11 output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query 12
|
||||
select null = interval '-1 day'
|
||||
-- !query 12 schema
|
||||
struct<(CAST(NULL AS INTERVAL) = -1 days):boolean>
|
||||
-- !query 12 output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query 13
|
||||
select interval '1 minutes' <=> null
|
||||
-- !query 13 schema
|
||||
struct<(1 minutes <=> CAST(NULL AS INTERVAL)):boolean>
|
||||
-- !query 13 output
|
||||
false
|
||||
|
||||
|
||||
-- !query 14
|
||||
select null <=> interval '1 minutes'
|
||||
-- !query 14 schema
|
||||
struct<(CAST(NULL AS INTERVAL) <=> 1 minutes):boolean>
|
||||
-- !query 14 output
|
||||
false
|
||||
|
||||
|
||||
-- !query 15
|
||||
select INTERVAL '9 years 1 months -1 weeks -4 days -10 hours -46 minutes' > interval '1 minutes'
|
||||
-- !query 15 schema
|
||||
struct<(9 years 1 months -11 days -10 hours -46 minutes > 1 minutes):boolean>
|
||||
-- !query 15 output
|
||||
true
|
||||
|
||||
|
||||
-- !query 16
|
||||
select cast(v as interval) i from VALUES ('1 seconds'), ('4 seconds'), ('3 seconds') t(v) order by i
|
||||
-- !query 16 schema
|
||||
struct<i:interval>
|
||||
-- !query 16 output
|
||||
1 seconds
|
||||
3 seconds
|
||||
4 seconds
|
||||
|
||||
|
||||
-- !query 17
|
||||
select interval '1 month 120 days' > interval '2 month'
|
||||
-- !query 17 schema
|
||||
struct<(1 months 120 days > 2 months):boolean>
|
||||
-- !query 17 output
|
||||
true
|
||||
|
||||
|
||||
-- !query 18
|
||||
select interval '1 month 30 days' = interval '2 month'
|
||||
-- !query 18 schema
|
||||
struct<(1 months 30 days = 2 months):boolean>
|
||||
-- !query 18 output
|
||||
true
|
||||
|
||||
|
||||
-- !query 19
|
||||
select interval '1 month 29 days 40 hours' > interval '2 month'
|
||||
-- !query 19 schema
|
||||
struct<(1 months 29 days 40 hours > 2 months):boolean>
|
||||
-- !query 19 output
|
||||
true
|
||||
|
||||
|
||||
-- !query 20
|
||||
select max(cast(v as interval)) from VALUES ('1 seconds'), ('4 seconds'), ('3 seconds') t(v)
|
||||
-- !query 20 schema
|
||||
struct<max(CAST(v AS INTERVAL)):interval>
|
||||
-- !query 20 output
|
||||
4 seconds
|
||||
|
||||
|
||||
-- !query 21
|
||||
select min(cast(v as interval)) from VALUES ('1 seconds'), ('4 seconds'), ('3 seconds') t(v)
|
||||
-- !query 21 schema
|
||||
struct<min(CAST(v AS INTERVAL)):interval>
|
||||
-- !query 21 output
|
||||
1 seconds
|
Loading…
Reference in a new issue