[SPARK-33498][SQL] Datetime parsing should fail if the input string can't be parsed, or the pattern string is invalid
### What changes were proposed in this pull request? Datetime parsing should fail if the input string can't be parsed, or the pattern string is invalid, when ANSI mode is enable. This patch should update GetTimeStamp, UnixTimeStamp, ToUnixTimeStamp and Cast. ### Why are the changes needed? For ANSI mode. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Added UT and Existing UT. Closes #30442 from leanken/leanken-SPARK-33498. Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
e43255051c
commit
b9f2f78de5
|
@ -136,12 +136,17 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql.
|
|||
- `element_at`: This function throws `NoSuchElementException` if key does not exist in map.
|
||||
- `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
|
||||
- `parse_url`: This function throws `IllegalArgumentException` if an input string is not a valid url.
|
||||
- `to_date` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
|
||||
- `to_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
|
||||
- `unix_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
|
||||
- `to_unix_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
|
||||
|
||||
### SQL Operators
|
||||
|
||||
The behavior of some SQL operators can be different under ANSI mode (`spark.sql.ansi.enabled=true`).
|
||||
- `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices.
|
||||
- `map_col[key]`: This operator throws `NoSuchElementException` if key does not exist in map.
|
||||
- `CAST(string_col AS TIMESTAMP)`: This operator should fail with an exception if the input string can't be parsed.
|
||||
|
||||
### SQL Keywords
|
||||
|
||||
|
|
|
@ -448,7 +448,13 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
|
|||
// TimestampConverter
|
||||
private[this] def castToTimestamp(from: DataType): Any => Any = from match {
|
||||
case StringType =>
|
||||
buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull)
|
||||
buildCast[UTF8String](_, utfs => {
|
||||
if (ansiEnabled) {
|
||||
DateTimeUtils.stringToTimestampAnsi(utfs, zoneId)
|
||||
} else {
|
||||
DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull
|
||||
}
|
||||
})
|
||||
case BooleanType =>
|
||||
buildCast[Boolean](_, b => if (b) 1L else 0)
|
||||
case LongType =>
|
||||
|
@ -1250,15 +1256,22 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
|
|||
zoneIdClass)
|
||||
val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]])
|
||||
(c, evPrim, evNull) =>
|
||||
code"""
|
||||
scala.Option<Long> $longOpt =
|
||||
org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $zid);
|
||||
if ($longOpt.isDefined()) {
|
||||
$evPrim = ((Long) $longOpt.get()).longValue();
|
||||
} else {
|
||||
$evNull = true;
|
||||
}
|
||||
"""
|
||||
if (ansiEnabled) {
|
||||
code"""
|
||||
$evPrim =
|
||||
org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestampAnsi($c, $zid);
|
||||
"""
|
||||
} else {
|
||||
code"""
|
||||
scala.Option<Long> $longOpt =
|
||||
org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $zid);
|
||||
if ($longOpt.isDefined()) {
|
||||
$evPrim = ((Long) $longOpt.get()).longValue();
|
||||
} else {
|
||||
$evNull = true;
|
||||
}
|
||||
"""
|
||||
}
|
||||
case BooleanType =>
|
||||
(c, evPrim, evNull) => code"$evPrim = $c ? 1L : 0L;"
|
||||
case _: IntegralType =>
|
||||
|
|
|
@ -720,10 +720,12 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
|
|||
case class ToUnixTimestamp(
|
||||
timeExp: Expression,
|
||||
format: Expression,
|
||||
timeZoneId: Option[String] = None)
|
||||
timeZoneId: Option[String] = None,
|
||||
failOnError: Boolean = SQLConf.get.ansiEnabled)
|
||||
extends UnixTime {
|
||||
|
||||
def this(timeExp: Expression, format: Expression) = this(timeExp, format, None)
|
||||
def this(timeExp: Expression, format: Expression) =
|
||||
this(timeExp, format, None, SQLConf.get.ansiEnabled)
|
||||
|
||||
override def left: Expression = timeExp
|
||||
override def right: Expression = format
|
||||
|
@ -767,10 +769,15 @@ case class ToUnixTimestamp(
|
|||
group = "datetime_funcs",
|
||||
since = "1.5.0")
|
||||
// scalastyle:on line.size.limit
|
||||
case class UnixTimestamp(timeExp: Expression, format: Expression, timeZoneId: Option[String] = None)
|
||||
case class UnixTimestamp(
|
||||
timeExp: Expression,
|
||||
format: Expression,
|
||||
timeZoneId: Option[String] = None,
|
||||
failOnError: Boolean = SQLConf.get.ansiEnabled)
|
||||
extends UnixTime {
|
||||
|
||||
def this(timeExp: Expression, format: Expression) = this(timeExp, format, None)
|
||||
def this(timeExp: Expression, format: Expression) =
|
||||
this(timeExp, format, None, SQLConf.get.ansiEnabled)
|
||||
|
||||
override def left: Expression = timeExp
|
||||
override def right: Expression = format
|
||||
|
@ -792,6 +799,8 @@ case class UnixTimestamp(timeExp: Expression, format: Expression, timeZoneId: Op
|
|||
abstract class ToTimestamp
|
||||
extends BinaryExpression with TimestampFormatterHelper with ExpectsInputTypes {
|
||||
|
||||
def failOnError: Boolean
|
||||
|
||||
// The result of the conversion to timestamp is microseconds divided by this factor.
|
||||
// For example if the factor is 1000000, the result of the expression is in seconds.
|
||||
protected def downScaleFactor: Long
|
||||
|
@ -803,7 +812,14 @@ abstract class ToTimestamp
|
|||
Seq(TypeCollection(StringType, DateType, TimestampType), StringType)
|
||||
|
||||
override def dataType: DataType = LongType
|
||||
override def nullable: Boolean = true
|
||||
override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true
|
||||
|
||||
private def isParseError(e: Throwable): Boolean = e match {
|
||||
case _: DateTimeParseException |
|
||||
_: DateTimeException |
|
||||
_: ParseException => true
|
||||
case _ => false
|
||||
}
|
||||
|
||||
override def eval(input: InternalRow): Any = {
|
||||
val t = left.eval(input)
|
||||
|
@ -824,9 +840,12 @@ abstract class ToTimestamp
|
|||
try {
|
||||
formatter.parse(t.asInstanceOf[UTF8String].toString) / downScaleFactor
|
||||
} catch {
|
||||
case _: DateTimeParseException |
|
||||
_: DateTimeException |
|
||||
_: ParseException => null
|
||||
case e if isParseError(e) =>
|
||||
if (failOnError) {
|
||||
throw e
|
||||
} else {
|
||||
null
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -835,6 +854,7 @@ abstract class ToTimestamp
|
|||
|
||||
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
|
||||
val javaType = CodeGenerator.javaType(dataType)
|
||||
val parseErrorBranch = if (failOnError) "throw e;" else s"${ev.isNull} = true;"
|
||||
left.dataType match {
|
||||
case StringType => formatterOption.map { fmt =>
|
||||
val df = classOf[TimestampFormatter].getName
|
||||
|
@ -844,11 +864,11 @@ abstract class ToTimestamp
|
|||
|try {
|
||||
| ${ev.value} = $formatterName.parse($datetimeStr.toString()) / $downScaleFactor;
|
||||
|} catch (java.time.DateTimeException e) {
|
||||
| ${ev.isNull} = true;
|
||||
| $parseErrorBranch
|
||||
|} catch (java.time.format.DateTimeParseException e) {
|
||||
| ${ev.isNull} = true;
|
||||
| $parseErrorBranch
|
||||
|} catch (java.text.ParseException e) {
|
||||
| ${ev.isNull} = true;
|
||||
| $parseErrorBranch
|
||||
|}
|
||||
|""".stripMargin)
|
||||
}.getOrElse {
|
||||
|
@ -866,11 +886,11 @@ abstract class ToTimestamp
|
|||
|try {
|
||||
| ${ev.value} = $timestampFormatter.parse($string.toString()) / $downScaleFactor;
|
||||
|} catch (java.time.format.DateTimeParseException e) {
|
||||
| ${ev.isNull} = true;
|
||||
| $parseErrorBranch
|
||||
|} catch (java.time.DateTimeException e) {
|
||||
| ${ev.isNull} = true;
|
||||
| $parseErrorBranch
|
||||
|} catch (java.text.ParseException e) {
|
||||
| ${ev.isNull} = true;
|
||||
| $parseErrorBranch
|
||||
|}
|
||||
|""".stripMargin)
|
||||
}
|
||||
|
@ -1737,7 +1757,8 @@ case class DateDiff(endDate: Expression, startDate: Expression)
|
|||
private case class GetTimestamp(
|
||||
left: Expression,
|
||||
right: Expression,
|
||||
timeZoneId: Option[String] = None)
|
||||
timeZoneId: Option[String] = None,
|
||||
failOnError: Boolean = SQLConf.get.ansiEnabled)
|
||||
extends ToTimestamp {
|
||||
|
||||
override val downScaleFactor = 1
|
||||
|
|
|
@ -364,6 +364,15 @@ object DateTimeUtils {
|
|||
}
|
||||
}
|
||||
|
||||
def stringToTimestampAnsi(s: UTF8String, timeZoneId: ZoneId): Long = {
|
||||
val timestamp = stringToTimestamp(s, timeZoneId)
|
||||
if (timestamp.isEmpty) {
|
||||
throw new DateTimeException(s"Cannot cast $s to TimestampType.")
|
||||
} else {
|
||||
timestamp.get
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the number of microseconds since the epoch of 1970-01-01 00:00:00Z from the given
|
||||
* instance of `java.time.Instant`. The epoch microsecond count is a simple incrementing count of
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.spark.sql.catalyst.expressions
|
||||
|
||||
import java.sql.{Date, Timestamp}
|
||||
import java.time.DateTimeException
|
||||
import java.util.{Calendar, TimeZone}
|
||||
|
||||
import scala.collection.parallel.immutable.ParVector
|
||||
|
@ -106,8 +107,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
|
|||
checkEvaluation(cast(Literal(str), TimestampType, Option(zid.getId)), expected)
|
||||
}
|
||||
|
||||
checkCastStringToTimestamp("123", null)
|
||||
|
||||
val tz = TimeZone.getTimeZone(zid)
|
||||
var c = Calendar.getInstance(tz)
|
||||
c.set(2015, 0, 1, 0, 0, 0)
|
||||
|
@ -184,15 +183,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
|
|||
c.set(2015, 2, 18, 12, 3, 17)
|
||||
c.set(Calendar.MILLISECOND, 123)
|
||||
checkCastStringToTimestamp("2015-03-18T12:03:17.123+7:3", new Timestamp(c.getTimeInMillis))
|
||||
|
||||
checkCastStringToTimestamp("2015-03-18 123142", null)
|
||||
checkCastStringToTimestamp("2015-03-18T123123", null)
|
||||
checkCastStringToTimestamp("2015-03-18X", null)
|
||||
checkCastStringToTimestamp("2015/03/18", null)
|
||||
checkCastStringToTimestamp("2015.03.18", null)
|
||||
checkCastStringToTimestamp("20150318", null)
|
||||
checkCastStringToTimestamp("2015-031-8", null)
|
||||
checkCastStringToTimestamp("2015-03-18T12:03:17-0:70", null)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -302,7 +292,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
|
|||
}
|
||||
|
||||
checkEvaluation(cast("abdef", StringType), "abdef")
|
||||
checkEvaluation(cast("abdef", TimestampType, UTC_OPT), null)
|
||||
checkEvaluation(cast("12.65", DecimalType.SYSTEM_DEFAULT), Decimal(12.65))
|
||||
|
||||
checkEvaluation(cast(cast(sd, DateType), StringType), sd)
|
||||
|
@ -962,6 +951,34 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
|
|||
cast("abcd", DecimalType(38, 1)),
|
||||
"invalid input syntax for type numeric")
|
||||
}
|
||||
|
||||
test("ANSI mode: cast string to timestamp with parse error") {
|
||||
val activeConf = conf
|
||||
new ParVector(ALL_TIMEZONES.toVector).foreach { zid =>
|
||||
def checkCastWithParseError(str: String): Unit = {
|
||||
checkExceptionInExpression[DateTimeException](
|
||||
cast(Literal(str), TimestampType, Option(zid.getId)),
|
||||
s"Cannot cast $str to TimestampType.")
|
||||
}
|
||||
|
||||
SQLConf.withExistingConf(activeConf) {
|
||||
checkCastWithParseError("123")
|
||||
checkCastWithParseError("2015-03-18 123142")
|
||||
checkCastWithParseError("2015-03-18T123123")
|
||||
checkCastWithParseError("2015-03-18X")
|
||||
checkCastWithParseError("2015/03/18")
|
||||
checkCastWithParseError("2015.03.18")
|
||||
checkCastWithParseError("20150318")
|
||||
checkCastWithParseError("2015-031-8")
|
||||
checkCastWithParseError("2015-03-18T12:03:17-0:70")
|
||||
|
||||
val input = "abdef"
|
||||
checkExceptionInExpression[DateTimeException](
|
||||
cast(input, TimestampType, Option(zid.getId)),
|
||||
s"Cannot cast $input to TimestampType.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -18,8 +18,9 @@
|
|||
package org.apache.spark.sql.catalyst.expressions
|
||||
|
||||
import java.sql.{Date, Timestamp}
|
||||
import java.text.SimpleDateFormat
|
||||
import java.text.{ParseException, SimpleDateFormat}
|
||||
import java.time.{Instant, LocalDate, ZoneId}
|
||||
import java.time.format.DateTimeParseException
|
||||
import java.util.{Calendar, Locale, TimeZone}
|
||||
import java.util.concurrent.TimeUnit._
|
||||
|
||||
|
@ -1286,4 +1287,58 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
|
|||
testIntegralFunc(Long.MaxValue)
|
||||
testIntegralFunc(Long.MinValue)
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") {
|
||||
Seq(true, false).foreach { ansiEnabled =>
|
||||
Seq("LEGACY", "CORRECTED", "EXCEPTION").foreach { policy =>
|
||||
withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> policy,
|
||||
SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
|
||||
|
||||
val exprSeq = Seq[Expression](
|
||||
GetTimestamp(Literal("2020-01-27T20:06:11.847"), Literal("yyyy-MM-dd HH:mm:ss.SSS")),
|
||||
GetTimestamp(Literal("Unparseable"), Literal("yyyy-MM-dd HH:mm:ss.SSS")),
|
||||
UnixTimestamp(Literal("2020-01-27T20:06:11.847"), Literal("yyyy-MM-dd HH:mm:ss.SSS")),
|
||||
UnixTimestamp(Literal("Unparseable"), Literal("yyyy-MM-dd HH:mm:ss.SSS")),
|
||||
ToUnixTimestamp(Literal("2020-01-27T20:06:11.847"), Literal("yyyy-MM-dd HH:mm:ss.SSS")),
|
||||
ToUnixTimestamp(Literal("Unparseable"), Literal("yyyy-MM-dd HH:mm:ss.SSS"))
|
||||
)
|
||||
|
||||
if (!ansiEnabled) {
|
||||
exprSeq.foreach(checkEvaluation(_, null))
|
||||
} else if (policy == "LEGACY") {
|
||||
exprSeq.foreach(checkExceptionInExpression[ParseException](_, "Unparseable"))
|
||||
} else {
|
||||
exprSeq.foreach(
|
||||
checkExceptionInExpression[DateTimeParseException](_, "could not be parsed"))
|
||||
}
|
||||
|
||||
// LEGACY works, CORRECTED failed, EXCEPTION with SparkUpgradeException
|
||||
val exprSeq2 = Seq[(Expression, Long)](
|
||||
(GetTimestamp(Literal("2020-01-27T20:06:11.847!!!"),
|
||||
Literal("yyyy-MM-dd'T'HH:mm:ss.SSS")), 1580184371847000L),
|
||||
(UnixTimestamp(Literal("2020-01-27T20:06:11.847!!!"),
|
||||
Literal("yyyy-MM-dd'T'HH:mm:ss.SSS")), 1580184371L),
|
||||
(ToUnixTimestamp(Literal("2020-01-27T20:06:11.847!!!"),
|
||||
Literal("yyyy-MM-dd'T'HH:mm:ss.SSS")), 1580184371L)
|
||||
)
|
||||
|
||||
if (policy == "LEGACY") {
|
||||
exprSeq2.foreach(pair => checkEvaluation(pair._1, pair._2))
|
||||
} else if (policy == "EXCEPTION") {
|
||||
exprSeq2.foreach(pair =>
|
||||
checkExceptionInExpression[SparkUpgradeException](
|
||||
pair._1,
|
||||
"You may get a different result due to the upgrading of Spark 3.0"))
|
||||
} else {
|
||||
if (ansiEnabled) {
|
||||
exprSeq2.foreach(pair =>
|
||||
checkExceptionInExpression[DateTimeParseException](pair._1, "could not be parsed"))
|
||||
} else {
|
||||
exprSeq2.foreach(pair => checkEvaluation(pair._1, null))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -153,3 +153,14 @@ select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat'
|
|||
select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'));
|
||||
select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'));
|
||||
select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'));
|
||||
|
||||
-- Timestamp type parse error
|
||||
select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
|
||||
select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
|
||||
select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
|
||||
select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
|
||||
select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
|
||||
select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
|
||||
select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
|
||||
select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
|
||||
select cast("Unparseable" as timestamp)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 108
|
||||
-- Number of queries: 117
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -301,9 +301,10 @@ struct<CAST(2011-11-11 11:11:11 - INTERVAL '2 seconds' AS STRING):string>
|
|||
-- !query
|
||||
select '1' - interval '2' second
|
||||
-- !query schema
|
||||
struct<CAST(1 - INTERVAL '2 seconds' AS STRING):string>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.time.DateTimeException
|
||||
Cannot cast 1 to TimestampType.
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -600,9 +601,10 @@ struct<subtractdates(DATE '2001-10-01', DATE '2001-09-28'):interval>
|
|||
-- !query
|
||||
select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
|
||||
-- !query schema
|
||||
struct<to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.time.format.DateTimeParseException
|
||||
Text '2019-10-06 10:11:12.' could not be parsed at index 20
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -664,9 +666,10 @@ struct<to_timestamp(2019-10-06 10:11:12.123456PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zz
|
|||
-- !query
|
||||
select to_timestamp('2019-10-06 10:11:12.1234567PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
|
||||
-- !query schema
|
||||
struct<to_timestamp(2019-10-06 10:11:12.1234567PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.time.format.DateTimeParseException
|
||||
Text '2019-10-06 10:11:12.1234567PST' could not be parsed, unparsed text found at index 26
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -680,9 +683,10 @@ struct<to_timestamp(123456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:m
|
|||
-- !query
|
||||
select to_timestamp('223456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
|
||||
-- !query schema
|
||||
struct<to_timestamp(223456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.time.format.DateTimeParseException
|
||||
Text '223456 2019-10-06 10:11:12.123456PST' could not be parsed at index 27
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -744,17 +748,19 @@ struct<to_timestamp(12.12342019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm):timestam
|
|||
-- !query
|
||||
select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
|
||||
-- !query schema
|
||||
struct<to_timestamp(12.1232019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm):timestamp>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.time.format.DateTimeParseException
|
||||
Text '12.1232019-10-06S10:11' could not be parsed at index 7
|
||||
|
||||
|
||||
-- !query
|
||||
select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyy-MM-dd'S'HH:mm")
|
||||
-- !query schema
|
||||
struct<to_timestamp(12.1232019-10-06S10:11, ss.SSSSyy-MM-dd'S'HH:mm):timestamp>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.time.format.DateTimeParseException
|
||||
Text '12.1232019-10-06S10:11' could not be parsed at index 9
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -824,9 +830,10 @@ struct<to_timestamp(16, dd):timestamp>
|
|||
-- !query
|
||||
select to_timestamp("02-29", "MM-dd")
|
||||
-- !query schema
|
||||
struct<to_timestamp(02-29, MM-dd):timestamp>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.time.DateTimeException
|
||||
Invalid date 'February 29' as '1970' is not a leap year
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -840,9 +847,10 @@ struct<to_date(16, dd):date>
|
|||
-- !query
|
||||
select to_date("02-29", "MM-dd")
|
||||
-- !query schema
|
||||
struct<to_date(02-29, MM-dd):date>
|
||||
struct<>
|
||||
-- !query output
|
||||
NULL
|
||||
java.time.DateTimeException
|
||||
Invalid date 'February 29' as '1970' is not a leap year
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -931,3 +939,84 @@ struct<>
|
|||
-- !query output
|
||||
org.apache.spark.SparkUpgradeException
|
||||
You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
|
||||
|
||||
-- !query
|
||||
select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
java.time.format.DateTimeParseException
|
||||
Text '2020-01-27T20:06:11.847' could not be parsed at index 10
|
||||
|
||||
|
||||
-- !query
|
||||
select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
java.time.format.DateTimeParseException
|
||||
Text 'Unparseable' could not be parsed at index 0
|
||||
|
||||
|
||||
-- !query
|
||||
select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
java.time.format.DateTimeParseException
|
||||
Text '2020-01-27T20:06:11.847' could not be parsed at index 10
|
||||
|
||||
|
||||
-- !query
|
||||
select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
java.time.format.DateTimeParseException
|
||||
Text 'Unparseable' could not be parsed at index 0
|
||||
|
||||
|
||||
-- !query
|
||||
select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
java.time.format.DateTimeParseException
|
||||
Text '2020-01-27T20:06:11.847' could not be parsed at index 10
|
||||
|
||||
|
||||
-- !query
|
||||
select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
java.time.format.DateTimeParseException
|
||||
Text 'Unparseable' could not be parsed at index 0
|
||||
|
||||
|
||||
-- !query
|
||||
select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
java.time.format.DateTimeParseException
|
||||
Text '2020-01-27T20:06:11.847' could not be parsed at index 10
|
||||
|
||||
|
||||
-- !query
|
||||
select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
java.time.format.DateTimeParseException
|
||||
Text 'Unparseable' could not be parsed at index 0
|
||||
|
||||
|
||||
-- !query
|
||||
select cast("Unparseable" as timestamp)
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
java.time.DateTimeException
|
||||
Cannot cast Unparseable to TimestampType.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 108
|
||||
-- Number of queries: 117
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -901,3 +901,75 @@ select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
|
|||
struct<from_csv(26/October/2015):struct<d:date>>
|
||||
-- !query output
|
||||
{"d":2015-10-26}
|
||||
|
||||
|
||||
-- !query
|
||||
select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<to_date(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):date>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<to_date(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):date>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<to_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):timestamp>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<to_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):timestamp>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):bigint>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):bigint>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):bigint>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):bigint>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select cast("Unparseable" as timestamp)
|
||||
-- !query schema
|
||||
struct<CAST(Unparseable AS TIMESTAMP):timestamp>
|
||||
-- !query output
|
||||
NULL
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 108
|
||||
-- Number of queries: 117
|
||||
|
||||
|
||||
-- !query
|
||||
|
@ -909,3 +909,75 @@ struct<>
|
|||
-- !query output
|
||||
org.apache.spark.SparkUpgradeException
|
||||
You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
|
||||
|
||||
|
||||
-- !query
|
||||
select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<to_date(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):date>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<to_date(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):date>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<to_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):timestamp>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<to_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):timestamp>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):bigint>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):bigint>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):bigint>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
|
||||
-- !query schema
|
||||
struct<to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):bigint>
|
||||
-- !query output
|
||||
NULL
|
||||
|
||||
|
||||
-- !query
|
||||
select cast("Unparseable" as timestamp)
|
||||
-- !query schema
|
||||
struct<CAST(Unparseable AS TIMESTAMP):timestamp>
|
||||
-- !query output
|
||||
NULL
|
||||
|
|
|
@ -71,7 +71,8 @@ insert into datetimes values
|
|||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
|
||||
org.apache.spark.sql.AnalysisException
|
||||
failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): Cannot cast 11:00 BST to TimestampType.; line 1 pos 22
|
||||
|
||||
|
||||
-- !query
|
||||
|
|
Loading…
Reference in a new issue