[SPARK-32021][SQL] Increase precision of seconds and fractions of make_interval

### What changes were proposed in this pull request?
Change precision of seconds and its fraction from 8 to 18 to be able to construct intervals of max allowed microseconds value (long).

### Why are the changes needed?
To improve UX of Spark SQL.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
- Add tests to IntervalExpressionsSuite
- Add an example to the `MakeInterval` expression
- Add tests to `interval.sql`

Closes #28873 from MaxGekk/make_interval-sec-precision.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
Max Gekk 2020-06-19 19:33:13 -07:00 committed by Dongjoon Hyun
parent 177a380bcf
commit 66ba35666a
6 changed files with 53 additions and 9 deletions

View file

@ -149,6 +149,8 @@ case class DivideInterval(
100 years 11 months 8 days 12 hours 30 minutes 1.001001 seconds
> SELECT _FUNC_(100, null, 3);
NULL
> SELECT _FUNC_(0, 1, 0, 1, 0, 0, 100.000001);
1 months 1 days 1 minutes 40.000001 seconds
""",
since = "3.0.0")
// scalastyle:on line.size.limit
@ -169,7 +171,7 @@ case class MakeInterval(
days: Expression,
hours: Expression,
mins: Expression) = {
this(years, months, weeks, days, hours, mins, Literal(Decimal(0, 8, 6)))
this(years, months, weeks, days, hours, mins, Literal(Decimal(0, Decimal.MAX_LONG_DIGITS, 6)))
}
def this(
years: Expression,
@ -191,7 +193,7 @@ case class MakeInterval(
// Accept `secs` as DecimalType to avoid loosing precision of microseconds while converting
// them to the fractional part of `secs`.
override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, IntegerType,
IntegerType, IntegerType, IntegerType, DecimalType(8, 6))
IntegerType, IntegerType, IntegerType, DecimalType(Decimal.MAX_LONG_DIGITS, 6))
override def dataType: DataType = CalendarIntervalType
override def nullable: Boolean = true
@ -211,7 +213,7 @@ case class MakeInterval(
day.asInstanceOf[Int],
hour.asInstanceOf[Int],
min.asInstanceOf[Int],
sec.map(_.asInstanceOf[Decimal]).getOrElse(Decimal(0, 8, 6)))
sec.map(_.asInstanceOf[Decimal]).getOrElse(Decimal(0, Decimal.MAX_LONG_DIGITS, 6)))
} catch {
case _: ArithmeticException => null
}

View file

@ -190,7 +190,8 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
micros: Int = 0): Unit = {
val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros)
val intervalExpr = MakeInterval(Literal(years), Literal(months), Literal(weeks),
Literal(days), Literal(hours), Literal(minutes), Literal(Decimal(secFrac, 8, 6)))
Literal(days), Literal(hours), Literal(minutes),
Literal(Decimal(secFrac, Decimal.MAX_LONG_DIGITS, 6)))
val totalMonths = years * MONTHS_PER_YEAR + months
val totalDays = weeks * DAYS_PER_WEEK + days
val totalMicros = secFrac + minutes * MICROS_PER_MINUTE + hours * MICROS_PER_HOUR
@ -206,5 +207,11 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
check(years = 10000, micros = -1)
check(-9999, -11, 0, -31, -23, -59, -59, -999, -999)
check(years = -10000, micros = 1)
check(
hours = Int.MaxValue,
minutes = Int.MaxValue,
seconds = Int.MaxValue,
millis = Int.MaxValue,
micros = Int.MaxValue)
}
}

View file

@ -166,7 +166,7 @@
| org.apache.spark.sql.catalyst.expressions.Lower | lcase | SELECT lcase('SparkSql') | struct<lcase(SparkSql):string> |
| org.apache.spark.sql.catalyst.expressions.Lower | lower | SELECT lower('SparkSql') | struct<lower(SparkSql):string> |
| org.apache.spark.sql.catalyst.expressions.MakeDate | make_date | SELECT make_date(2013, 7, 15) | struct<make_date(2013, 7, 15):date> |
| org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct<make_interval(100, 11, 1, 1, 12, 30, CAST(1.001001 AS DECIMAL(8,6))):interval> |
| org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct<make_interval(100, 11, 1, 1, 12, 30, CAST(1.001001 AS DECIMAL(18,6))):interval> |
| org.apache.spark.sql.catalyst.expressions.MakeTimestamp | make_timestamp | SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp(2014, 12, 28, 6, 30, CAST(45.887 AS DECIMAL(8,6))):timestamp> |
| org.apache.spark.sql.catalyst.expressions.MapConcat | map_concat | SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) | struct<map_concat(map(1, a, 2, b), map(3, c)):map<int,string>> |
| org.apache.spark.sql.catalyst.expressions.MapEntries | map_entries | SELECT map_entries(map(1, 'a', 2, 'b')) | struct<map_entries(map(1, a, 2, b)):array<struct<key:int,value:string>>> |

View file

@ -29,6 +29,8 @@ select make_interval(1, 2, 3, 4);
select make_interval(1, 2, 3, 4, 5);
select make_interval(1, 2, 3, 4, 5, 6);
select make_interval(1, 2, 3, 4, 5, 6, 7.008009);
select make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456);
select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789);
-- cast string to intervals
select cast('1 second' as interval);

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 91
-- Number of queries: 93
-- !query
@ -178,11 +178,28 @@ struct<make_interval(1, 2, 3, 4, 5, 6, 0.000000):interval>
-- !query
select make_interval(1, 2, 3, 4, 5, 6, 7.008009)
-- !query schema
struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(8,6))):interval>
struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(18,6))):interval>
-- !query output
1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds
-- !query
select make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456)
-- !query schema
struct<make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456):interval>
-- !query output
1 years 2 months 25 days 34293552 hours 30 minutes 12.123456 seconds
-- !query
select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)
-- !query schema
struct<>
-- !query output
java.lang.ArithmeticException
Decimal(expanded,1234567890123456789,20,0}) cannot be represented as Decimal(18, 6).
-- !query
select cast('1 second' as interval)
-- !query schema

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 91
-- Number of queries: 93
-- !query
@ -173,11 +173,27 @@ struct<make_interval(1, 2, 3, 4, 5, 6, 0.000000):interval>
-- !query
select make_interval(1, 2, 3, 4, 5, 6, 7.008009)
-- !query schema
struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(8,6))):interval>
struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(18,6))):interval>
-- !query output
1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds
-- !query
select make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456)
-- !query schema
struct<make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456):interval>
-- !query output
1 years 2 months 25 days 34293552 hours 30 minutes 12.123456 seconds
-- !query
select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)
-- !query schema
struct<make_interval(0, 0, 0, 0, 0, 0, CAST(1234567890123456789 AS DECIMAL(18,6))):interval>
-- !query output
NULL
-- !query
select cast('1 second' as interval)
-- !query schema