[SPARK-9794] [SQL] Fix datetime parsing in SparkSQL.
This fixes https://issues.apache.org/jira/browse/SPARK-9794 by using a real ISO8601 parser. (courtesy of the xml component of the standard java library) cc: angelini Author: Kevin Cox <kevincox@kevincox.ca> Closes #8396 from kevincox/kevincox-sql-time-parsing.
This commit is contained in:
parent
896edb51ab
commit
d39f15ea2b
|
@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.util
|
|||
import java.sql.{Date, Timestamp}
|
||||
import java.text.{DateFormat, SimpleDateFormat}
|
||||
import java.util.{TimeZone, Calendar}
|
||||
import javax.xml.bind.DatatypeConverter;
|
||||
|
||||
import org.apache.spark.unsafe.types.UTF8String
|
||||
|
||||
|
@ -109,30 +110,22 @@ object DateTimeUtils {
|
|||
}
|
||||
|
||||
def stringToTime(s: String): java.util.Date = {
|
||||
if (!s.contains('T')) {
|
||||
var indexOfGMT = s.indexOf("GMT");
|
||||
if (indexOfGMT != -1) {
|
||||
// ISO8601 with a weird time zone specifier (2000-01-01T00:00GMT+01:00)
|
||||
val s0 = s.substring(0, indexOfGMT)
|
||||
val s1 = s.substring(indexOfGMT + 3)
|
||||
// Mapped to 2000-01-01T00:00+01:00
|
||||
stringToTime(s0 + s1)
|
||||
} else if (!s.contains('T')) {
|
||||
// JDBC escape string
|
||||
if (s.contains(' ')) {
|
||||
Timestamp.valueOf(s)
|
||||
} else {
|
||||
Date.valueOf(s)
|
||||
}
|
||||
} else if (s.endsWith("Z")) {
|
||||
// this is zero timezone of ISO8601
|
||||
stringToTime(s.substring(0, s.length - 1) + "GMT-00:00")
|
||||
} else if (s.indexOf("GMT") == -1) {
|
||||
// timezone with ISO8601
|
||||
val inset = "+00.00".length
|
||||
val s0 = s.substring(0, s.length - inset)
|
||||
val s1 = s.substring(s.length - inset, s.length)
|
||||
if (s0.substring(s0.lastIndexOf(':')).contains('.')) {
|
||||
stringToTime(s0 + "GMT" + s1)
|
||||
} else {
|
||||
stringToTime(s0 + ".0GMT" + s1)
|
||||
}
|
||||
} else {
|
||||
// ISO8601 with GMT insert
|
||||
val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSSz" )
|
||||
ISO8601GMT.parse(s)
|
||||
DatatypeConverter.parseDateTime(s).getTime()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -136,6 +136,38 @@ class DateTimeUtilsSuite extends SparkFunSuite {
|
|||
assert(stringToDate(UTF8String.fromString("2015-031-8")).isEmpty)
|
||||
}
|
||||
|
||||
test("string to time") {
|
||||
// Tests with UTC.
|
||||
var c = Calendar.getInstance(TimeZone.getTimeZone("UTC"))
|
||||
c.set(Calendar.MILLISECOND, 0)
|
||||
|
||||
c.set(1900, 0, 1, 0, 0, 0)
|
||||
assert(stringToTime("1900-01-01T00:00:00GMT-00:00") === c.getTime())
|
||||
|
||||
c.set(2000, 11, 30, 10, 0, 0)
|
||||
assert(stringToTime("2000-12-30T10:00:00Z") === c.getTime())
|
||||
|
||||
// Tests with set time zone.
|
||||
c.setTimeZone(TimeZone.getTimeZone("GMT-04:00"))
|
||||
c.set(Calendar.MILLISECOND, 0)
|
||||
|
||||
c.set(1900, 0, 1, 0, 0, 0)
|
||||
assert(stringToTime("1900-01-01T00:00:00-04:00") === c.getTime())
|
||||
|
||||
c.set(1900, 0, 1, 0, 0, 0)
|
||||
assert(stringToTime("1900-01-01T00:00:00GMT-04:00") === c.getTime())
|
||||
|
||||
// Tests with local time zone.
|
||||
c.setTimeZone(TimeZone.getDefault())
|
||||
c.set(Calendar.MILLISECOND, 0)
|
||||
|
||||
c.set(2000, 11, 30, 0, 0, 0)
|
||||
assert(stringToTime("2000-12-30") === new Date(c.getTimeInMillis()))
|
||||
|
||||
c.set(2000, 11, 30, 10, 0, 0)
|
||||
assert(stringToTime("2000-12-30 10:00:00") === new Timestamp(c.getTimeInMillis()))
|
||||
}
|
||||
|
||||
test("string to timestamp") {
|
||||
var c = Calendar.getInstance()
|
||||
c.set(1969, 11, 31, 16, 0, 0)
|
||||
|
|
Loading…
Reference in a new issue