[SPARK-29651][SQL] Fix parsing of interval seconds fraction

### What changes were proposed in this pull request?
In the PR, I propose to extract parsing of the seconds interval units to the private method `parseNanos` in `IntervalUtils` and modify the code to correctly parse the fractional part of the seconds unit of intervals in the cases:
- When the fractional part has less than 9 digits
- The seconds unit is negative

### Why are the changes needed?
The changes are needed to fix the issues:
```sql
spark-sql> select interval '10.123456 seconds';
interval 10 seconds 123 microseconds
```
The correct result must be `interval 10 seconds 123 milliseconds 456 microseconds`
```sql
spark-sql> select interval '-10.123456789 seconds';
interval -9 seconds -876 milliseconds -544 microseconds
```
but the whole interval should be negated, and the result must be `interval -10 seconds -123 milliseconds -456 microseconds`, taking into account the truncation to microseconds.

### Does this PR introduce any user-facing change?
Yes. After changes:
```sql
spark-sql> select interval '10.123456 seconds';
interval 10 seconds 123 milliseconds 456 microseconds
spark-sql> select interval '-10.123456789 seconds';
interval -10 seconds -123 milliseconds -456 microseconds
```

### How was this patch tested?
By existing and new tests in `ExpressionParserSuite`.

Closes #26313 from MaxGekk/fix-interval-nanos-parsing.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Maxim Gekk 2019-10-31 09:20:46 +08:00 committed by Wenchen Fan
parent cfc80d0eb1
commit 3206a99870
3 changed files with 36 additions and 18 deletions

View file

@ -218,27 +218,22 @@ object IntervalUtils {
minutes = toLongWithRange("second", m.group(7), 0, 59)
}
// Hive allow nanosecond precision interval
val nanoStr = if (m.group(9) == null) {
null
} else {
(m.group(9) + "000000000").substring(0, 9)
}
var nanos = toLongWithRange("nanosecond", nanoStr, 0L, 999999999L)
var secondsFraction = parseNanos(m.group(9), seconds < 0)
to match {
case "hour" =>
minutes = 0
seconds = 0
nanos = 0
secondsFraction = 0
case "minute" =>
seconds = 0
nanos = 0
secondsFraction = 0
case "second" =>
// No-op
case _ =>
throw new IllegalArgumentException(
s"Cannot support (interval '$input' $from to $to) expression")
}
var micros = nanos / DateTimeUtils.NANOS_PER_MICROS
var micros = secondsFraction
micros = Math.addExact(micros, Math.multiplyExact(days, DateTimeUtils.MICROS_PER_DAY))
micros = Math.addExact(micros, Math.multiplyExact(hours, MICROS_PER_HOUR))
micros = Math.addExact(micros, Math.multiplyExact(minutes, MICROS_PER_MINUTE))
@ -292,6 +287,21 @@ object IntervalUtils {
new CalendarInterval(months, microseconds)
}
// Parses a string with nanoseconds, truncates the result and returns microseconds
private def parseNanos(nanosStr: String, isNegative: Boolean): Long = {
if (nanosStr != null) {
val maxNanosLen = 9
val alignedStr = if (nanosStr.length < maxNanosLen) {
(nanosStr + "000000000").substring(0, maxNanosLen)
} else nanosStr
val nanos = toLongWithRange("nanosecond", alignedStr, 0L, 999999999L)
val micros = nanos / DateTimeUtils.NANOS_PER_MICROS
if (isNegative) -micros else micros
} else {
0L
}
}
/**
* Parse second_nano string in ss.nnnnnnnnn format to microseconds
*/
@ -303,15 +313,13 @@ object IntervalUtils {
Long.MinValue / DateTimeUtils.MICROS_PER_SECOND,
Long.MaxValue / DateTimeUtils.MICROS_PER_SECOND) * DateTimeUtils.MICROS_PER_SECOND
}
def parseNanos(nanosStr: String): Long = {
toLongWithRange("nanosecond", nanosStr, 0L, 999999999L) / DateTimeUtils.NANOS_PER_MICROS
}
secondNano.split("\\.") match {
case Array(secondsStr) => parseSeconds(secondsStr)
case Array("", nanosStr) => parseNanos(nanosStr)
case Array("", nanosStr) => parseNanos(nanosStr, false)
case Array(secondsStr, nanosStr) =>
Math.addExact(parseSeconds(secondsStr), parseNanos(nanosStr))
val seconds = parseSeconds(secondsStr)
Math.addExact(seconds, parseNanos(nanosStr, seconds < 0))
case _ =>
throw new IllegalArgumentException(
"Interval string does not match second-nano format of ss.nnnnnnnnn")

View file

@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, IntervalUtils}
import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, IntervalUtils}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval
@ -628,7 +628,17 @@ class ExpressionParserSuite extends AnalysisTest {
// Hive nanosecond notation.
checkIntervals("13.123456789 seconds", intervalLiteral("second", "13.123456789"))
checkIntervals("-13.123456789 second", intervalLiteral("second", "-13.123456789"))
checkIntervals(
"-13.123456789 second",
Literal(new CalendarInterval(
0,
-13 * DateTimeUtils.MICROS_PER_SECOND - 123 * DateTimeUtils.MICROS_PER_MILLIS - 456)))
checkIntervals(
"13.123456 second",
Literal(new CalendarInterval(
0,
13 * DateTimeUtils.MICROS_PER_SECOND + 123 * DateTimeUtils.MICROS_PER_MILLIS + 456)))
checkIntervals("1.001 second", Literal(IntervalUtils.fromString("1 second 1 millisecond")))
// Non Existing unit
intercept("interval 10 nanoseconds",

View file

@ -323,9 +323,9 @@ select timestamp '2016-33-11 20:54:00.000'
-- !query 34
select interval 13.123456789 seconds, interval -13.123456789 second
-- !query 34 schema
struct<interval 13 seconds 123 milliseconds 456 microseconds:interval,interval -12 seconds -876 milliseconds -544 microseconds:interval>
struct<interval 13 seconds 123 milliseconds 456 microseconds:interval,interval -13 seconds -123 milliseconds -456 microseconds:interval>
-- !query 34 output
interval 13 seconds 123 milliseconds 456 microseconds interval -12 seconds -876 milliseconds -544 microseconds
interval 13 seconds 123 milliseconds 456 microseconds interval -13 seconds -123 milliseconds -456 microseconds
-- !query 35