[SPARK-36049][SQL] Remove IntervalUnit

### What changes were proposed in this pull request?
Remove IntervalUnit

### Why are the changes needed?
Clean code

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Not need

Closes #33265 from AngersZhuuuu/SPARK-36049.

Lead-authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
This commit is contained in:
Angerszhuuuu 2021-07-08 23:02:21 +03:00 committed by Max Gekk
parent 382b66e267
commit fef7e1703c
2 changed files with 58 additions and 95 deletions

View file

@ -41,22 +41,6 @@ object IntervalStringStyles extends Enumeration {
object IntervalUtils { object IntervalUtils {
object IntervalUnit extends Enumeration {
type IntervalUnit = Value
val NANOSECOND = Value(0, "nanosecond")
val MICROSECOND = Value(1, "microsecond")
val MILLISECOND = Value(2, "millisecond")
val SECOND = Value(3, "second")
val MINUTE = Value(4, "minute")
val HOUR = Value(5, "hour")
val DAY = Value(6, "day")
val WEEK = Value(7, "week")
val MONTH = Value(8, "month")
val YEAR = Value(9, "year")
}
import IntervalUnit._
private val MAX_DAY = Long.MaxValue / MICROS_PER_DAY private val MAX_DAY = Long.MaxValue / MICROS_PER_DAY
private val MAX_HOUR = Long.MaxValue / MICROS_PER_HOUR private val MAX_HOUR = Long.MaxValue / MICROS_PER_HOUR
private val MAX_MINUTE = Long.MaxValue / MICROS_PER_MINUTE private val MAX_MINUTE = Long.MaxValue / MICROS_PER_MINUTE
@ -97,7 +81,7 @@ object IntervalUtils {
def getSeconds(interval: CalendarInterval): Decimal = getSeconds(interval.microseconds) def getSeconds(interval: CalendarInterval): Decimal = getSeconds(interval.microseconds)
private def toLongWithRange( private def toLongWithRange(
fieldName: IntervalUnit, fieldName: UTF8String,
s: String, s: String,
minValue: Long, minValue: Long,
maxValue: Long): Long = { maxValue: Long): Long = {
@ -250,10 +234,11 @@ object IntervalUtils {
} }
} }
private def toYMInterval(yearStr: String, monthStr: String, sign: Int): Int = { private def toYMInterval(year: String, month: String, sign: Int): Int = {
safeToInterval("year-month") { safeToInterval("year-month") {
val years = toLongWithRange(YEAR, yearStr, 0, Integer.MAX_VALUE / MONTHS_PER_YEAR) val years = toLongWithRange(yearStr, year, 0, Integer.MAX_VALUE / MONTHS_PER_YEAR)
val totalMonths = sign * (years * MONTHS_PER_YEAR + toLongWithRange(MONTH, monthStr, 0, 11)) val totalMonths =
sign * (years * MONTHS_PER_YEAR + toLongWithRange(monthStr, month, 0, 11))
Math.toIntExact(totalMonths) Math.toIntExact(totalMonths)
} }
} }
@ -402,45 +387,33 @@ object IntervalUtils {
} }
} }
def toDTInterval( def toDTInterval(day: String, hour: String, minute: String, second: String, sign: Int): Long = {
dayStr: String,
hourStr: String,
minuteStr: String,
secondStr: String,
sign: Int): Long = {
var micros = 0L var micros = 0L
val days = toLongWithRange(DAY, dayStr, 0, MAX_DAY).toInt val days = toLongWithRange(dayStr, day, 0, MAX_DAY).toInt
micros = Math.addExact(micros, sign * days * MICROS_PER_DAY) micros = Math.addExact(micros, sign * days * MICROS_PER_DAY)
val hours = toLongWithRange(HOUR, hourStr, 0, 23) val hours = toLongWithRange(hourStr, hour, 0, 23)
micros = Math.addExact(micros, sign * hours * MICROS_PER_HOUR) micros = Math.addExact(micros, sign * hours * MICROS_PER_HOUR)
val minutes = toLongWithRange(MINUTE, minuteStr, 0, 59) val minutes = toLongWithRange(minuteStr, minute, 0, 59)
micros = Math.addExact(micros, sign * minutes * MICROS_PER_MINUTE) micros = Math.addExact(micros, sign * minutes * MICROS_PER_MINUTE)
micros = Math.addExact(micros, sign * parseSecondNano(secondStr)) micros = Math.addExact(micros, sign * parseSecondNano(second))
micros micros
} }
def toDTInterval( def toDTInterval(hour: String, minute: String, second: String, sign: Int): Long = {
hourStr: String,
minuteStr: String,
secondStr: String,
sign: Int): Long = {
var micros = 0L var micros = 0L
val hours = toLongWithRange(HOUR, hourStr, 0, MAX_HOUR) val hours = toLongWithRange(hourStr, hour, 0, MAX_HOUR)
micros = Math.addExact(micros, sign * hours * MICROS_PER_HOUR) micros = Math.addExact(micros, sign * hours * MICROS_PER_HOUR)
val minutes = toLongWithRange(MINUTE, minuteStr, 0, 59) val minutes = toLongWithRange(minuteStr, minute, 0, 59)
micros = Math.addExact(micros, sign * minutes * MICROS_PER_MINUTE) micros = Math.addExact(micros, sign * minutes * MICROS_PER_MINUTE)
micros = Math.addExact(micros, sign * parseSecondNano(secondStr)) micros = Math.addExact(micros, sign * parseSecondNano(second))
micros micros
} }
def toDTInterval( def toDTInterval(minute: String, second: String, sign: Int): Long = {
minuteStr: String,
secondStr: String,
sign: Int): Long = {
var micros = 0L var micros = 0L
val minutes = toLongWithRange(MINUTE, minuteStr, 0, MAX_MINUTE) val minutes = toLongWithRange(minuteStr, minute, 0, MAX_MINUTE)
micros = Math.addExact(micros, sign * minutes * MICROS_PER_MINUTE) micros = Math.addExact(micros, sign * minutes * MICROS_PER_MINUTE)
micros = Math.addExact(micros, sign * parseSecondNano(secondStr)) micros = Math.addExact(micros, sign * parseSecondNano(second))
micros micros
} }
@ -511,21 +484,21 @@ object IntervalUtils {
val days = if (m.group(2) == null) { val days = if (m.group(2) == null) {
0 0
} else { } else {
toLongWithRange(DAY, m.group(3), 0, Integer.MAX_VALUE).toInt toLongWithRange(dayStr, m.group(3), 0, Integer.MAX_VALUE).toInt
} }
var hours: Long = 0L var hours: Long = 0L
var minutes: Long = 0L var minutes: Long = 0L
var seconds: Long = 0L var seconds: Long = 0L
if (m.group(5) != null || from == DT.MINUTE) { // 'HH:mm:ss' or 'mm:ss minute' if (m.group(5) != null || from == DT.MINUTE) { // 'HH:mm:ss' or 'mm:ss minute'
hours = toLongWithRange(HOUR, m.group(5), 0, 23) hours = toLongWithRange(hourStr, m.group(5), 0, 23)
minutes = toLongWithRange(MINUTE, m.group(6), 0, 59) minutes = toLongWithRange(minuteStr, m.group(6), 0, 59)
seconds = toLongWithRange(SECOND, m.group(7), 0, 59) seconds = toLongWithRange(secondStr, m.group(7), 0, 59)
} else if (m.group(8) != null) { // 'mm:ss.nn' } else if (m.group(8) != null) { // 'mm:ss.nn'
minutes = toLongWithRange(MINUTE, m.group(6), 0, 59) minutes = toLongWithRange(minuteStr, m.group(6), 0, 59)
seconds = toLongWithRange(SECOND, m.group(7), 0, 59) seconds = toLongWithRange(secondStr, m.group(7), 0, 59)
} else { // 'HH:mm' } else { // 'HH:mm'
hours = toLongWithRange(HOUR, m.group(6), 0, 23) hours = toLongWithRange(hourStr, m.group(6), 0, 23)
minutes = toLongWithRange(SECOND, m.group(7), 0, 59) minutes = toLongWithRange(secondStr, m.group(7), 0, 59)
} }
// Hive allow nanosecond precision interval // Hive allow nanosecond precision interval
var secondsFraction = parseNanos(m.group(9), seconds < 0) var secondsFraction = parseNanos(m.group(9), seconds < 0)
@ -555,15 +528,15 @@ object IntervalUtils {
} }
// Parses a string with nanoseconds, truncates the result and returns microseconds // Parses a string with nanoseconds, truncates the result and returns microseconds
private def parseNanos(nanosStr: String, isNegative: Boolean): Long = { private def parseNanos(nanos: String, isNegative: Boolean): Long = {
if (nanosStr != null) { if (nanos != null) {
val maxNanosLen = 9 val maxNanosLen = 9
val alignedStr = if (nanosStr.length < maxNanosLen) { val alignedStr = if (nanos.length < maxNanosLen) {
(nanosStr + "000000000").substring(0, maxNanosLen) (nanos + "000000000").substring(0, maxNanosLen)
} else nanosStr } else nanos
val nanos = toLongWithRange(NANOSECOND, alignedStr, 0L, 999999999L) val nanoSecond = toLongWithRange(nanosStr, alignedStr, 0L, 999999999L)
val micros = nanos / NANOS_PER_MICROS val microSecond = nanoSecond / NANOS_PER_MICROS
if (isNegative) -micros else micros if (isNegative) -microSecond else microSecond
} else { } else {
0L 0L
} }
@ -574,7 +547,7 @@ object IntervalUtils {
*/ */
private def parseSecondNano(secondNano: String): Long = { private def parseSecondNano(secondNano: String): Long = {
def parseSeconds(secondsStr: String): Long = { def parseSeconds(secondsStr: String): Long = {
toLongWithRange(SECOND, secondsStr, MIN_SECOND, MAX_SECOND) * MICROS_PER_SECOND toLongWithRange(secondStr, secondsStr, MIN_SECOND, MAX_SECOND) * MICROS_PER_SECOND
} }
secondNano.split("\\.") match { secondNano.split("\\.") match {
@ -774,19 +747,20 @@ object IntervalUtils {
UNIT_SUFFIX, UNIT_SUFFIX,
UNIT_END = Value UNIT_END = Value
} }
private final val intervalStr = UTF8String.fromString("interval") private final val intervalStr = unitToUtf8("interval")
private def unitToUtf8(unit: IntervalUnit): UTF8String = { private def unitToUtf8(unit: String): UTF8String = {
UTF8String.fromString(unit.toString) UTF8String.fromString(unit)
} }
private final val yearStr = unitToUtf8(YEAR) private final val yearStr = unitToUtf8("year")
private final val monthStr = unitToUtf8(MONTH) private final val monthStr = unitToUtf8("month")
private final val weekStr = unitToUtf8(WEEK) private final val weekStr = unitToUtf8("week")
private final val dayStr = unitToUtf8(DAY) private final val dayStr = unitToUtf8("day")
private final val hourStr = unitToUtf8(HOUR) private final val hourStr = unitToUtf8("hour")
private final val minuteStr = unitToUtf8(MINUTE) private final val minuteStr = unitToUtf8("minute")
private final val secondStr = unitToUtf8(SECOND) private final val secondStr = unitToUtf8("second")
private final val millisStr = unitToUtf8(MILLISECOND) private final val millisStr = unitToUtf8("millisecond")
private final val microsStr = unitToUtf8(MICROSECOND) private final val microsStr = unitToUtf8("microsecond")
private final val nanosStr = unitToUtf8("nanosecond")
/** /**
* A safe version of `stringToInterval`. It returns null for invalid input string. * A safe version of `stringToInterval`. It returns null for invalid input string.

View file

@ -27,7 +27,6 @@ import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last} import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, IntervalUtils} import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, IntervalUtils}
import org.apache.spark.sql.catalyst.util.IntervalUtils.IntervalUnit._
import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.TimestampTypes import org.apache.spark.sql.internal.SQLConf.TimestampTypes
import org.apache.spark.sql.types._ import org.apache.spark.sql.types._
@ -679,18 +678,7 @@ class ExpressionParserSuite extends AnalysisTest {
} }
} }
val intervalUnits = Seq( def intervalLiteral(u: UTF8String, s: String): Literal = {
YEAR,
MONTH,
WEEK,
DAY,
HOUR,
MINUTE,
SECOND,
MILLISECOND,
MICROSECOND)
def intervalLiteral(u: IntervalUnit, s: String): Literal = {
Literal(IntervalUtils.stringToInterval(s + " " + u.toString)) Literal(IntervalUtils.stringToInterval(s + " " + u.toString))
} }
@ -710,18 +698,19 @@ class ExpressionParserSuite extends AnalysisTest {
// Single Intervals. // Single Intervals.
val forms = Seq("", "s") val forms = Seq("", "s")
val values = Seq("0", "10", "-7", "21") val values = Seq("0", "10", "-7", "21")
intervalUnits.foreach { unit => Seq("year", "month", "week", "day", "hour", "minute", "second", "millisecond", "microsecond")
forms.foreach { form => .foreach { unit =>
values.foreach { value => forms.foreach { form =>
val expected = intervalLiteral(unit, value) values.foreach { value =>
checkIntervals(s"$value $unit$form", expected) val expected = intervalLiteral(unit, value)
checkIntervals(s"'$value' $unit$form", expected) checkIntervals(s"$value $unit$form", expected)
} checkIntervals(s"'$value' $unit$form", expected)
}
}
} }
}
// Hive nanosecond notation. // Hive nanosecond notation.
checkIntervals("13.123456789 seconds", intervalLiteral(SECOND, "13.123456789")) checkIntervals("13.123456789 seconds", intervalLiteral("second", "13.123456789"))
checkIntervals( checkIntervals(
"-13.123456789 second", "-13.123456789 second",
Literal(new CalendarInterval( Literal(new CalendarInterval(