[SPARK-30292][SQL][FOLLOWUP] ansi cast from strings to integral numbers (byte/short/int/long) should fail with fraction

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/26933

Fraction string like "1.23" is definitely not a valid integral format and we should fail to do the cast under the ANSI mode.

### Why are the changes needed?

correct the ANSI cast behavior from string to integral

### Does this PR introduce any user-facing change?

Yes under ANSI mode, but ANSI mode is off by default.

### How was this patch tested?

new test

Closes #27957 from cloud-fan/ansi.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
This commit is contained in:
Wenchen Fan 2020-03-20 00:52:09 +09:00 committed by Takeshi Yamamuro
parent a1776288f4
commit ac262cb272
2 changed files with 16 additions and 10 deletions

View file

@ -1105,6 +1105,10 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
* @return true if the parsing was successful else false
*/
public boolean toLong(LongWrapper toLongResult) {
return toLong(toLongResult, true);
}
private boolean toLong(LongWrapper toLongResult, boolean allowDecimal) {
int offset = 0;
while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
if (offset == this.numBytes) return false;
@ -1129,7 +1133,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
while (offset <= end) {
b = getByte(offset);
offset++;
if (b == separator) {
if (b == separator && allowDecimal) {
// We allow decimals and will return a truncated integral in that case.
// Therefore we won't throw an exception here (checking the fractional
// part happens below.)
@ -1198,6 +1202,10 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
* @return true if the parsing was successful else false
*/
public boolean toInt(IntWrapper intWrapper) {
return toInt(intWrapper, true);
}
private boolean toInt(IntWrapper intWrapper, boolean allowDecimal) {
int offset = 0;
while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
if (offset == this.numBytes) return false;
@ -1222,7 +1230,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
while (offset <= end) {
b = getByte(offset);
offset++;
if (b == separator) {
if (b == separator && allowDecimal) {
// We allow decimals and will return a truncated integral in that case.
// Therefore we won't throw an exception here (checking the fractional
// part happens below.)
@ -1276,9 +1284,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
if (toInt(intWrapper)) {
int intValue = intWrapper.value;
short result = (short) intValue;
if (result == intValue) {
return true;
}
return result == intValue;
}
return false;
}
@ -1287,9 +1293,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
if (toInt(intWrapper)) {
int intValue = intWrapper.value;
byte result = (byte) intValue;
if (result == intValue) {
return true;
}
return result == intValue;
}
return false;
}
@ -1302,7 +1306,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
*/
public long toLongExact() {
LongWrapper result = new LongWrapper();
if (toLong(result)) {
if (toLong(result, false)) {
return result.value;
}
throw new NumberFormatException("invalid input syntax for type numeric: " + this);
@ -1316,7 +1320,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
*/
public int toIntExact() {
IntWrapper result = new IntWrapper();
if (toInt(result)) {
if (toInt(result, false)) {
return result.value;
}
throw new NumberFormatException("invalid input syntax for type numeric: " + this);

View file

@ -1287,6 +1287,8 @@ class AnsiCastSuite extends CastSuiteBase {
cast("123-string", dataType), "invalid input")
checkExceptionInExpression[NumberFormatException](
cast("2020-07-19", dataType), "invalid input")
checkExceptionInExpression[NumberFormatException](
cast("1.23", dataType), "invalid input")
}
Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType =>