[SPARK-35130][SQL] Add make_dt_interval function to construct DayTimeIntervalType value

### What changes were proposed in this pull request?
Providing a new function make_dt_interval to construct DayTimeIntervalType value

### Why are the changes needed?
As the JIRA described, we should provide a function to construct DayTimeIntervalType value

### Does this PR introduce _any_ user-facing change?
Yes, a new make_dt_interval function provided

### How was this patch tested?
Updated UTs, manual testing

Closes #32601 from copperybean/work.

Authored-by: copperybean <copperybean.zhang@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
This commit is contained in:
copperybean 2021-06-17 10:01:16 +03:00 committed by Max Gekk
parent 0e554d44df
commit 939ae91e00
8 changed files with 244 additions and 4 deletions

View file

@ -552,6 +552,7 @@ object FunctionRegistry {
expression[MakeDate]("make_date"),
expression[MakeTimestamp]("make_timestamp"),
expression[MakeInterval]("make_interval"),
expression[MakeDTInterval]("make_dt_interval"),
expression[MakeYMInterval]("make_ym_interval"),
expression[DatePart]("date_part"),
expression[Extract]("extract"),

View file

@ -346,6 +346,83 @@ case class MakeInterval(
)
}
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(days, hours, mins, secs) - Make DayTimeIntervalType duration from days, hours, mins and secs.",
arguments = """
Arguments:
* days - the number of days, positive or negative
* hours - the number of hours, positive or negative
* mins - the number of minutes, positive or negative
* secs - the number of seconds with the fractional part in microsecond precision.
""",
examples = """
Examples:
> SELECT _FUNC_(1, 12, 30, 01.001001);
1 12:30:01.001001000
> SELECT _FUNC_(100, null, 3);
NULL
""",
since = "3.2.0",
group = "datetime_funcs")
// scalastyle:on line.size.limit
case class MakeDTInterval(
days: Expression,
hours: Expression,
mins: Expression,
secs: Expression)
extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant {
def this(
days: Expression,
hours: Expression,
mins: Expression) = {
this(days, hours, mins, Literal(Decimal(0, Decimal.MAX_LONG_DIGITS, 6)))
}
def this(days: Expression, hours: Expression) = this(days, hours, Literal(0))
def this(days: Expression) = this(days, Literal(0))
def this() = this(Literal(0))
override def first: Expression = days
override def second: Expression = hours
override def third: Expression = mins
override def fourth: Expression = secs
// Accept `secs` as DecimalType to avoid loosing precision of microseconds when converting
// them to the fractional part of `secs`.
override def inputTypes: Seq[AbstractDataType] = Seq(
IntegerType, IntegerType, IntegerType, DecimalType(Decimal.MAX_LONG_DIGITS, 6))
override def dataType: DataType = DayTimeIntervalType()
override def nullSafeEval(
day: Any,
hour: Any,
min: Any,
sec: Any): Any = {
IntervalUtils.makeDayTimeInterval(
day.asInstanceOf[Int],
hour.asInstanceOf[Int],
min.asInstanceOf[Int],
sec.asInstanceOf[Decimal])
}
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
defineCodeGen(ctx, ev, (day, hour, min, sec) => {
val iu = IntervalUtils.getClass.getName.stripSuffix("$")
s"$iu.makeDayTimeInterval($day, $hour, $min, $sec)"
})
}
override def prettyName: String = "make_dt_interval"
override protected def withNewChildrenInternal(
days: Expression,
hours: Expression,
mins: Expression,
secs: Expression): MakeDTInterval =
copy(days, hours, mins, secs)
}
@ExpressionDescription(
usage = "_FUNC_(years, months) - Make year-month interval from years, months.",
arguments = """

View file

@ -862,6 +862,19 @@ object IntervalUtils {
new CalendarInterval(totalMonths, totalDays, micros)
}
def makeDayTimeInterval(
days: Int,
hours: Int,
mins: Int,
secs: Decimal): Long = {
assert(secs.scale == 6, "Seconds fractional must have 6 digits for microseconds")
var micros = secs.toUnscaledLong
micros = Math.addExact(micros, Math.multiplyExact(days, MICROS_PER_DAY))
micros = Math.addExact(micros, Math.multiplyExact(hours, MICROS_PER_HOUR))
micros = Math.addExact(micros, Math.multiplyExact(mins, MICROS_PER_MINUTE))
micros
}
// The amount of seconds that can cause overflow in the conversion to microseconds
private final val minDurationSeconds = Math.floorDiv(Long.MinValue, MICROS_PER_SECOND)

View file

@ -280,6 +280,51 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
}
}
test("SPARK-35130: make day time interval") {
def check(
days: Int = 0,
hours: Int = 0,
minutes: Int = 0,
seconds: Int = 0,
millis: Int = 0,
micros: Int = 0): Unit = {
val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros)
val durationExpr = MakeDTInterval(Literal(days), Literal(hours), Literal(minutes),
Literal(Decimal(secFrac, Decimal.MAX_LONG_DIGITS, 6)))
val expected = secFrac + minutes * MICROS_PER_MINUTE + hours * MICROS_PER_HOUR +
days * MICROS_PER_DAY
checkEvaluation(durationExpr, expected)
}
def checkException(
days: Int = 0,
hours: Int = 0,
minutes: Int = 0,
seconds: Int = 0,
millis: Int = 0,
micros: Int = 0): Unit = {
val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros)
val durationExpr = MakeDTInterval(Literal(days), Literal(hours), Literal(minutes),
Literal(Decimal(secFrac, Decimal.MAX_LONG_DIGITS, 6)))
checkExceptionInExpression[ArithmeticException](durationExpr, EmptyRow, "")
}
check(millis = -123)
check(31, 23, 59, 59, 999, 999)
check(31, 123, 159, 159, 1999, 1999)
check(days = 10000, micros = -1)
check(-31, -23, -59, -59, -999, -999)
check(days = -10000, micros = 1)
check(
hours = Int.MaxValue,
minutes = Int.MaxValue,
seconds = Int.MaxValue,
millis = Int.MaxValue,
micros = Int.MaxValue)
checkException(days = Int.MaxValue)
}
// TODO(SPARK-35778): Check multiply/divide of year-month intervals of any fields by numeric
test("SPARK-34824: multiply year-month interval by numeric") {
Seq(

View file

@ -1,6 +1,6 @@
<!-- Automatically generated by ExpressionsSchemaSuite -->
## Summary
- Number of queries: 354
- Number of queries: 355
- Number of expressions that missing example: 13
- Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window
## Schema of Built-in Functions
@ -169,6 +169,7 @@
| org.apache.spark.sql.catalyst.expressions.Logarithm | log | SELECT log(10, 100) | struct<LOG(10, 100):double> |
| org.apache.spark.sql.catalyst.expressions.Lower | lcase | SELECT lcase('SparkSql') | struct<lcase(SparkSql):string> |
| org.apache.spark.sql.catalyst.expressions.Lower | lower | SELECT lower('SparkSql') | struct<lower(SparkSql):string> |
| org.apache.spark.sql.catalyst.expressions.MakeDTInterval | make_dt_interval | SELECT make_dt_interval(1, 12, 30, 01.001001) | struct<make_dt_interval(1, 12, 30, 1.001001):interval day to second> |
| org.apache.spark.sql.catalyst.expressions.MakeDate | make_date | SELECT make_date(2013, 7, 15) | struct<make_date(2013, 7, 15):date> |
| org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct<make_interval(100, 11, 1, 1, 12, 30, 1.001001):interval> |
| org.apache.spark.sql.catalyst.expressions.MakeTimestamp | make_timestamp | SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp(2014, 12, 28, 6, 30, 45.887):timestamp> |
@ -359,4 +360,4 @@
| org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()') | struct<xpath(<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>, a/b/text()):array<string>> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | SELECT xpath_long('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_long(<a><b>1</b><b>2</b></a>, sum(a/b)):bigint> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | SELECT xpath_short('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_short(<a><b>1</b><b>2</b></a>, sum(a/b)):smallint> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |

View file

@ -38,6 +38,14 @@ select make_interval(1, 2, 3, 4, 5, 6, 7.008009);
select make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456);
select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789);
-- make_dt_interval
select make_dt_interval(1);
select make_dt_interval(1, 2);
select make_dt_interval(1, 2, 3);
select make_dt_interval(1, 2, 3, 4.005006);
select make_dt_interval(1, 0, 0, 123456789012.123456);
select make_dt_interval(2147483647);
-- make_ym_interval
select make_ym_interval(1);
select make_ym_interval(1, 2);

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 141
-- Number of queries: 147
-- !query
@ -247,6 +247,53 @@ struct<>
org.apache.spark.sql.AnalysisException
cannot resolve 'make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789L)' due to data type mismatch: argument 7 requires decimal(18,6) type, however, '1234567890123456789L' is of bigint type.; line 1 pos 7
-- !query
select make_dt_interval(1)
-- !query schema
struct<make_dt_interval(1, 0, 0, 0.000000):interval day to second>
-- !query output
1 00:00:00.000000000
-- !query
select make_dt_interval(1, 2)
-- !query schema
struct<make_dt_interval(1, 2, 0, 0.000000):interval day to second>
-- !query output
1 02:00:00.000000000
-- !query
select make_dt_interval(1, 2, 3)
-- !query schema
struct<make_dt_interval(1, 2, 3, 0.000000):interval day to second>
-- !query output
1 02:03:00.000000000
-- !query
select make_dt_interval(1, 2, 3, 4.005006)
-- !query schema
struct<make_dt_interval(1, 2, 3, 4.005006):interval day to second>
-- !query output
1 02:03:04.005006000
-- !query
select make_dt_interval(1, 0, 0, 123456789012.123456)
-- !query schema
struct<make_dt_interval(1, 0, 0, 123456789012.123456):interval day to second>
-- !query output
1428899 00:30:12.123456000
-- !query
select make_dt_interval(2147483647)
-- !query schema
struct<>
-- !query output
java.lang.ArithmeticException
long overflow
-- !query
select make_ym_interval(1)

View file

@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 141
-- Number of queries: 147
-- !query
@ -242,6 +242,54 @@ struct<make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789):interval>
NULL
-- !query
select make_dt_interval(1)
-- !query schema
struct<make_dt_interval(1, 0, 0, 0.000000):interval day to second>
-- !query output
1 00:00:00.000000000
-- !query
select make_dt_interval(1, 2)
-- !query schema
struct<make_dt_interval(1, 2, 0, 0.000000):interval day to second>
-- !query output
1 02:00:00.000000000
-- !query
select make_dt_interval(1, 2, 3)
-- !query schema
struct<make_dt_interval(1, 2, 3, 0.000000):interval day to second>
-- !query output
1 02:03:00.000000000
-- !query
select make_dt_interval(1, 2, 3, 4.005006)
-- !query schema
struct<make_dt_interval(1, 2, 3, 4.005006):interval day to second>
-- !query output
1 02:03:04.005006000
-- !query
select make_dt_interval(1, 0, 0, 123456789012.123456)
-- !query schema
struct<make_dt_interval(1, 0, 0, 123456789012.123456):interval day to second>
-- !query output
1428899 00:30:12.123456000
-- !query
select make_dt_interval(2147483647)
-- !query schema
struct<>
-- !query output
java.lang.ArithmeticException
long overflow
-- !query
select make_ym_interval(1)
-- !query schema