[SPARK-3598][SQL]cast to timestamp should be the same as hive

this patch fixes timestamp smaller than 0 and cast int as timestamp

select cast(1000 as timestamp) from src limit 1;

should return 1970-01-01 00:00:01, but we now take it as 1000 seconds.
also, current implementation has bug when the time is before 1970-01-01 00:00:00.
rxin marmbrus chenghao-intel

Author: Daoyuan Wang <daoyuan.wang@intel.com>

Closes #2458 from adrian-wang/timestamp and squashes the following commits:

4274b1d [Daoyuan Wang] set test not related to timezone
1234f66 [Daoyuan Wang] fix timestamp smaller than 0 and cast int as timestamp
This commit is contained in:
Daoyuan Wang 2014-09-23 11:45:44 -07:00 committed by Michael Armbrust
parent 11c10df825
commit 66bc0f2d67
11 changed files with 50 additions and 15 deletions

View file

@ -86,15 +86,15 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
try Timestamp.valueOf(n) catch { case _: java.lang.IllegalArgumentException => null }
})
case BooleanType =>
buildCast[Boolean](_, b => new Timestamp((if (b) 1 else 0) * 1000))
buildCast[Boolean](_, b => new Timestamp((if (b) 1 else 0)))
case LongType =>
buildCast[Long](_, l => new Timestamp(l * 1000))
buildCast[Long](_, l => new Timestamp(l))
case IntegerType =>
buildCast[Int](_, i => new Timestamp(i * 1000))
buildCast[Int](_, i => new Timestamp(i))
case ShortType =>
buildCast[Short](_, s => new Timestamp(s * 1000))
buildCast[Short](_, s => new Timestamp(s))
case ByteType =>
buildCast[Byte](_, b => new Timestamp(b * 1000))
buildCast[Byte](_, b => new Timestamp(b))
// TimestampWritable.decimalToTimestamp
case DecimalType =>
buildCast[BigDecimal](_, d => decimalToTimestamp(d))
@ -107,11 +107,10 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
}
private[this] def decimalToTimestamp(d: BigDecimal) = {
val seconds = d.longValue()
val seconds = Math.floor(d.toDouble).toLong
val bd = (d - seconds) * 1000000000
val nanos = bd.intValue()
// Convert to millis
val millis = seconds * 1000
val t = new Timestamp(millis)
@ -121,11 +120,11 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
}
// Timestamp to long, converting milliseconds to seconds
private[this] def timestampToLong(ts: Timestamp) = ts.getTime / 1000
private[this] def timestampToLong(ts: Timestamp) = Math.floor(ts.getTime / 1000.0).toLong
private[this] def timestampToDouble(ts: Timestamp) = {
// First part is the seconds since the beginning of time, followed by nanosecs.
ts.getTime / 1000 + ts.getNanos.toDouble / 1000000000
Math.floor(ts.getTime / 1000.0).toLong + ts.getNanos.toDouble / 1000000000
}
// Converts Timestamp to string according to Hive TimestampWritable convention

View file

@ -231,7 +231,9 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation("12.65" cast DecimalType, BigDecimal(12.65))
checkEvaluation(Literal(1) cast LongType, 1)
checkEvaluation(Cast(Literal(1) cast TimestampType, LongType), 1)
checkEvaluation(Cast(Literal(1000) cast TimestampType, LongType), 1.toLong)
checkEvaluation(Cast(Literal(-1200) cast TimestampType, LongType), -2.toLong)
checkEvaluation(Cast(Literal(1.toDouble) cast TimestampType, DoubleType), 1.toDouble)
checkEvaluation(Cast(Literal(1.toDouble) cast TimestampType, DoubleType), 1.toDouble)
checkEvaluation(Cast(Literal(sts) cast TimestampType, StringType), sts)
@ -242,11 +244,11 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation(Cast(Cast(Cast(Cast(
Cast("5" cast ByteType, ShortType), IntegerType), FloatType), DoubleType), LongType), 5)
checkEvaluation(Cast(Cast(Cast(Cast(
Cast("5" cast ByteType, TimestampType), DecimalType), LongType), StringType), ShortType), 5)
Cast("5" cast ByteType, TimestampType), DecimalType), LongType), StringType), ShortType), 0)
checkEvaluation(Cast(Cast(Cast(Cast(
Cast("5" cast TimestampType, ByteType), DecimalType), LongType), StringType), ShortType), null)
checkEvaluation(Cast(Cast(Cast(Cast(
Cast("5" cast DecimalType, ByteType), TimestampType), LongType), StringType), ShortType), 5)
Cast("5" cast DecimalType, ByteType), TimestampType), LongType), StringType), ShortType), 0)
checkEvaluation(Literal(true) cast IntegerType, 1)
checkEvaluation(Literal(false) cast IntegerType, 0)
checkEvaluation(Cast(Literal(1) cast BooleanType, IntegerType), 1)
@ -293,16 +295,18 @@ class ExpressionEvaluationSuite extends FunSuite {
test("timestamp casting") {
val millis = 15 * 1000 + 2
val seconds = millis * 1000 + 2
val ts = new Timestamp(millis)
val ts1 = new Timestamp(15 * 1000) // a timestamp without the milliseconds part
val tss = new Timestamp(seconds)
checkEvaluation(Cast(ts, ShortType), 15)
checkEvaluation(Cast(ts, IntegerType), 15)
checkEvaluation(Cast(ts, LongType), 15)
checkEvaluation(Cast(ts, FloatType), 15.002f)
checkEvaluation(Cast(ts, DoubleType), 15.002)
checkEvaluation(Cast(Cast(ts, ShortType), TimestampType), ts1)
checkEvaluation(Cast(Cast(ts, IntegerType), TimestampType), ts1)
checkEvaluation(Cast(Cast(ts, LongType), TimestampType), ts1)
checkEvaluation(Cast(Cast(tss, ShortType), TimestampType), ts)
checkEvaluation(Cast(Cast(tss, IntegerType), TimestampType), ts)
checkEvaluation(Cast(Cast(tss, LongType), TimestampType), ts)
checkEvaluation(Cast(Cast(millis.toFloat / 1000, TimestampType), FloatType),
millis.toFloat / 1000)
checkEvaluation(Cast(Cast(millis.toDouble / 1000, TimestampType), DoubleType),

View file

@ -0,0 +1 @@
-0.0010000000000000009

View file

@ -303,6 +303,30 @@ class HiveQuerySuite extends HiveComparisonTest {
createQueryTest("case statements WITHOUT key #4",
"SELECT (CASE WHEN key > 2 THEN 3 WHEN 2 > key THEN 2 ELSE 0 END) FROM src WHERE key < 15")
createQueryTest("timestamp cast #1",
"SELECT CAST(CAST(1 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
createQueryTest("timestamp cast #2",
"SELECT CAST(CAST(1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
createQueryTest("timestamp cast #3",
"SELECT CAST(CAST(1200 AS TIMESTAMP) AS INT) FROM src LIMIT 1")
createQueryTest("timestamp cast #4",
"SELECT CAST(CAST(1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
createQueryTest("timestamp cast #5",
"SELECT CAST(CAST(-1 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
createQueryTest("timestamp cast #6",
"SELECT CAST(CAST(-1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
createQueryTest("timestamp cast #7",
"SELECT CAST(CAST(-1200 AS TIMESTAMP) AS INT) FROM src LIMIT 1")
createQueryTest("timestamp cast #8",
"SELECT CAST(CAST(-1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
test("implement identity function using case statement") {
val actual = sql("SELECT (CASE key WHEN key THEN key END) FROM src")
.map { case Row(i: Int) => i }