[SPARK-34695][SQL] Fix long overflow in conversion of minimum duration to microseconds
### What changes were proposed in this pull request? In the PR, I propose to especially handle the amount of seconds `-9223372036855` in `IntervalUtils. durationToMicros()`. Starting from the amount (any durations with the second field < `-9223372036855`), input durations cannot fit to `Long` in the conversion to microseconds. For example, the amount of microseconds = `Long.MinValue = -9223372036854775808` can be represented in two forms: 1. seconds = -9223372036854, nanoAdjustment = -775808, or 2. seconds = -9223372036855, nanoAdjustment = +224192 And the method `Duration.ofSeconds()` produces the last form but such form causes overflow while converting `-9223372036855` seconds to microseconds. In the PR, I propose to convert the second form to the first one if the second field of input duration is equal to `-9223372036855`. ### Why are the changes needed? The changes fix the issue demonstrated by the code: ```scala scala> durationToMicros(microsToDuration(Long.MinValue)) java.lang.ArithmeticException: long overflow at java.lang.Math.multiplyExact(Math.java:892) at org.apache.spark.sql.catalyst.util.IntervalUtils$.durationToMicros(IntervalUtils.scala:782) ... 49 elided ``` The `durationToMicros()` method cannot handle valid output of `microsToDuration()`. ### Does this PR introduce _any_ user-facing change? Should not since new interval types has not been released yet. ### How was this patch tested? By running new UT from `IntervalUtilsSuite`. Closes #31799 from MaxGekk/fix-min-duration. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
744a73df9e
commit
d7bb327aee
|
@ -765,6 +765,9 @@ object IntervalUtils {
|
|||
new CalendarInterval(totalMonths, totalDays, micros)
|
||||
}
|
||||
|
||||
// The amount of seconds that can cause overflow in the conversion to microseconds
|
||||
private final val minDurationSeconds = Math.floorDiv(Long.MinValue, MICROS_PER_SECOND)
|
||||
|
||||
/**
|
||||
* Converts this duration to the total length in microseconds.
|
||||
* <p>
|
||||
|
@ -779,9 +782,18 @@ object IntervalUtils {
|
|||
* @throws ArithmeticException If numeric overflow occurs
|
||||
*/
|
||||
def durationToMicros(duration: Duration): Long = {
|
||||
val us = Math.multiplyExact(duration.getSeconds, MICROS_PER_SECOND)
|
||||
val result = Math.addExact(us, duration.getNano / NANOS_PER_MICROS)
|
||||
result
|
||||
val seconds = duration.getSeconds
|
||||
if (seconds == minDurationSeconds) {
|
||||
val microsInSeconds = (minDurationSeconds + 1) * MICROS_PER_SECOND
|
||||
val nanoAdjustment = duration.getNano
|
||||
assert(0 <= nanoAdjustment && nanoAdjustment < NANOS_PER_SECOND,
|
||||
"Duration.getNano() must return the adjustment to the seconds field " +
|
||||
"in the range from 0 to 999999999 nanoseconds, inclusive.")
|
||||
Math.addExact(microsInSeconds, (nanoAdjustment - NANOS_PER_SECOND) / NANOS_PER_MICROS)
|
||||
} else {
|
||||
val microsInSeconds = Math.multiplyExact(seconds, MICROS_PER_SECOND)
|
||||
Math.addExact(microsInSeconds, duration.getNano / NANOS_PER_MICROS)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -425,4 +425,20 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
|
|||
assert(monthsToPeriod(Int.MaxValue) === Period.ofYears(178956970).withMonths(7))
|
||||
assert(monthsToPeriod(Int.MinValue) === Period.ofYears(-178956970).withMonths(-8))
|
||||
}
|
||||
|
||||
test("SPARK-34695: round trip conversion of micros -> duration -> micros") {
|
||||
Seq(
|
||||
0,
|
||||
MICROS_PER_SECOND - 1,
|
||||
-MICROS_PER_SECOND + 1,
|
||||
MICROS_PER_SECOND,
|
||||
-MICROS_PER_SECOND,
|
||||
Long.MaxValue - MICROS_PER_SECOND,
|
||||
Long.MinValue + MICROS_PER_SECOND,
|
||||
Long.MaxValue,
|
||||
Long.MinValue).foreach { micros =>
|
||||
val duration = microsToDuration(micros)
|
||||
assert(durationToMicros(duration) === micros)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue