[SPARK-29651][SQL] Fix parsing of interval seconds fraction
### What changes were proposed in this pull request? In the PR, I propose to extract parsing of the seconds interval units to the private method `parseNanos` in `IntervalUtils` and modify the code to correctly parse the fractional part of the seconds unit of intervals in the cases: - When the fractional part has less than 9 digits - The seconds unit is negative ### Why are the changes needed? The changes are needed to fix the issues: ```sql spark-sql> select interval '10.123456 seconds'; interval 10 seconds 123 microseconds ``` The correct result must be `interval 10 seconds 123 milliseconds 456 microseconds` ```sql spark-sql> select interval '-10.123456789 seconds'; interval -9 seconds -876 milliseconds -544 microseconds ``` but the whole interval should be negated, and the result must be `interval -10 seconds -123 milliseconds -456 microseconds`, taking into account the truncation to microseconds. ### Does this PR introduce any user-facing change? Yes. After changes: ```sql spark-sql> select interval '10.123456 seconds'; interval 10 seconds 123 milliseconds 456 microseconds spark-sql> select interval '-10.123456789 seconds'; interval -10 seconds -123 milliseconds -456 microseconds ``` ### How was this patch tested? By existing and new tests in `ExpressionParserSuite`. Closes #26313 from MaxGekk/fix-interval-nanos-parsing. Authored-by: Maxim Gekk <max.gekk@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
cfc80d0eb1
commit
3206a99870
|
@ -218,27 +218,22 @@ object IntervalUtils {
|
|||
minutes = toLongWithRange("second", m.group(7), 0, 59)
|
||||
}
|
||||
// Hive allow nanosecond precision interval
|
||||
val nanoStr = if (m.group(9) == null) {
|
||||
null
|
||||
} else {
|
||||
(m.group(9) + "000000000").substring(0, 9)
|
||||
}
|
||||
var nanos = toLongWithRange("nanosecond", nanoStr, 0L, 999999999L)
|
||||
var secondsFraction = parseNanos(m.group(9), seconds < 0)
|
||||
to match {
|
||||
case "hour" =>
|
||||
minutes = 0
|
||||
seconds = 0
|
||||
nanos = 0
|
||||
secondsFraction = 0
|
||||
case "minute" =>
|
||||
seconds = 0
|
||||
nanos = 0
|
||||
secondsFraction = 0
|
||||
case "second" =>
|
||||
// No-op
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(
|
||||
s"Cannot support (interval '$input' $from to $to) expression")
|
||||
}
|
||||
var micros = nanos / DateTimeUtils.NANOS_PER_MICROS
|
||||
var micros = secondsFraction
|
||||
micros = Math.addExact(micros, Math.multiplyExact(days, DateTimeUtils.MICROS_PER_DAY))
|
||||
micros = Math.addExact(micros, Math.multiplyExact(hours, MICROS_PER_HOUR))
|
||||
micros = Math.addExact(micros, Math.multiplyExact(minutes, MICROS_PER_MINUTE))
|
||||
|
@ -292,6 +287,21 @@ object IntervalUtils {
|
|||
new CalendarInterval(months, microseconds)
|
||||
}
|
||||
|
||||
// Parses a string with nanoseconds, truncates the result and returns microseconds
|
||||
private def parseNanos(nanosStr: String, isNegative: Boolean): Long = {
|
||||
if (nanosStr != null) {
|
||||
val maxNanosLen = 9
|
||||
val alignedStr = if (nanosStr.length < maxNanosLen) {
|
||||
(nanosStr + "000000000").substring(0, maxNanosLen)
|
||||
} else nanosStr
|
||||
val nanos = toLongWithRange("nanosecond", alignedStr, 0L, 999999999L)
|
||||
val micros = nanos / DateTimeUtils.NANOS_PER_MICROS
|
||||
if (isNegative) -micros else micros
|
||||
} else {
|
||||
0L
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse second_nano string in ss.nnnnnnnnn format to microseconds
|
||||
*/
|
||||
|
@ -303,15 +313,13 @@ object IntervalUtils {
|
|||
Long.MinValue / DateTimeUtils.MICROS_PER_SECOND,
|
||||
Long.MaxValue / DateTimeUtils.MICROS_PER_SECOND) * DateTimeUtils.MICROS_PER_SECOND
|
||||
}
|
||||
def parseNanos(nanosStr: String): Long = {
|
||||
toLongWithRange("nanosecond", nanosStr, 0L, 999999999L) / DateTimeUtils.NANOS_PER_MICROS
|
||||
}
|
||||
|
||||
secondNano.split("\\.") match {
|
||||
case Array(secondsStr) => parseSeconds(secondsStr)
|
||||
case Array("", nanosStr) => parseNanos(nanosStr)
|
||||
case Array("", nanosStr) => parseNanos(nanosStr, false)
|
||||
case Array(secondsStr, nanosStr) =>
|
||||
Math.addExact(parseSeconds(secondsStr), parseNanos(nanosStr))
|
||||
val seconds = parseSeconds(secondsStr)
|
||||
Math.addExact(seconds, parseNanos(nanosStr, seconds < 0))
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(
|
||||
"Interval string does not match second-nano format of ss.nnnnnnnnn")
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.FunctionIdentifier
|
|||
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
|
||||
import org.apache.spark.sql.catalyst.expressions._
|
||||
import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
|
||||
import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, IntervalUtils}
|
||||
import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, IntervalUtils}
|
||||
import org.apache.spark.sql.internal.SQLConf
|
||||
import org.apache.spark.sql.types._
|
||||
import org.apache.spark.unsafe.types.CalendarInterval
|
||||
|
@ -628,7 +628,17 @@ class ExpressionParserSuite extends AnalysisTest {
|
|||
|
||||
// Hive nanosecond notation.
|
||||
checkIntervals("13.123456789 seconds", intervalLiteral("second", "13.123456789"))
|
||||
checkIntervals("-13.123456789 second", intervalLiteral("second", "-13.123456789"))
|
||||
checkIntervals(
|
||||
"-13.123456789 second",
|
||||
Literal(new CalendarInterval(
|
||||
0,
|
||||
-13 * DateTimeUtils.MICROS_PER_SECOND - 123 * DateTimeUtils.MICROS_PER_MILLIS - 456)))
|
||||
checkIntervals(
|
||||
"13.123456 second",
|
||||
Literal(new CalendarInterval(
|
||||
0,
|
||||
13 * DateTimeUtils.MICROS_PER_SECOND + 123 * DateTimeUtils.MICROS_PER_MILLIS + 456)))
|
||||
checkIntervals("1.001 second", Literal(IntervalUtils.fromString("1 second 1 millisecond")))
|
||||
|
||||
// Non Existing unit
|
||||
intercept("interval 10 nanoseconds",
|
||||
|
|
|
@ -323,9 +323,9 @@ select timestamp '2016-33-11 20:54:00.000'
|
|||
-- !query 34
|
||||
select interval 13.123456789 seconds, interval -13.123456789 second
|
||||
-- !query 34 schema
|
||||
struct<interval 13 seconds 123 milliseconds 456 microseconds:interval,interval -12 seconds -876 milliseconds -544 microseconds:interval>
|
||||
struct<interval 13 seconds 123 milliseconds 456 microseconds:interval,interval -13 seconds -123 milliseconds -456 microseconds:interval>
|
||||
-- !query 34 output
|
||||
interval 13 seconds 123 milliseconds 456 microseconds interval -12 seconds -876 milliseconds -544 microseconds
|
||||
interval 13 seconds 123 milliseconds 456 microseconds interval -13 seconds -123 milliseconds -456 microseconds
|
||||
|
||||
|
||||
-- !query 35
|
||||
|
|
Loading…
Reference in a new issue