[SPARK-30292][SQL][FOLLOWUP] ansi cast from strings to integral numbers (byte/short/int/long) should fail with fraction
### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/26933 Fraction string like "1.23" is definitely not a valid integral format and we should fail to do the cast under the ANSI mode. ### Why are the changes needed? correct the ANSI cast behavior from string to integral ### Does this PR introduce any user-facing change? Yes under ANSI mode, but ANSI mode is off by default. ### How was this patch tested? new test Closes #27957 from cloud-fan/ansi. Authored-by: Wenchen Fan <wenchen@databricks.com> Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
This commit is contained in:
parent
a1776288f4
commit
ac262cb272
|
@ -1105,6 +1105,10 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
|
|||
* @return true if the parsing was successful else false
|
||||
*/
|
||||
public boolean toLong(LongWrapper toLongResult) {
|
||||
return toLong(toLongResult, true);
|
||||
}
|
||||
|
||||
private boolean toLong(LongWrapper toLongResult, boolean allowDecimal) {
|
||||
int offset = 0;
|
||||
while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
|
||||
if (offset == this.numBytes) return false;
|
||||
|
@ -1129,7 +1133,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
|
|||
while (offset <= end) {
|
||||
b = getByte(offset);
|
||||
offset++;
|
||||
if (b == separator) {
|
||||
if (b == separator && allowDecimal) {
|
||||
// We allow decimals and will return a truncated integral in that case.
|
||||
// Therefore we won't throw an exception here (checking the fractional
|
||||
// part happens below.)
|
||||
|
@ -1198,6 +1202,10 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
|
|||
* @return true if the parsing was successful else false
|
||||
*/
|
||||
public boolean toInt(IntWrapper intWrapper) {
|
||||
return toInt(intWrapper, true);
|
||||
}
|
||||
|
||||
private boolean toInt(IntWrapper intWrapper, boolean allowDecimal) {
|
||||
int offset = 0;
|
||||
while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
|
||||
if (offset == this.numBytes) return false;
|
||||
|
@ -1222,7 +1230,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
|
|||
while (offset <= end) {
|
||||
b = getByte(offset);
|
||||
offset++;
|
||||
if (b == separator) {
|
||||
if (b == separator && allowDecimal) {
|
||||
// We allow decimals and will return a truncated integral in that case.
|
||||
// Therefore we won't throw an exception here (checking the fractional
|
||||
// part happens below.)
|
||||
|
@ -1276,9 +1284,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
|
|||
if (toInt(intWrapper)) {
|
||||
int intValue = intWrapper.value;
|
||||
short result = (short) intValue;
|
||||
if (result == intValue) {
|
||||
return true;
|
||||
}
|
||||
return result == intValue;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -1287,9 +1293,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
|
|||
if (toInt(intWrapper)) {
|
||||
int intValue = intWrapper.value;
|
||||
byte result = (byte) intValue;
|
||||
if (result == intValue) {
|
||||
return true;
|
||||
}
|
||||
return result == intValue;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -1302,7 +1306,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
|
|||
*/
|
||||
public long toLongExact() {
|
||||
LongWrapper result = new LongWrapper();
|
||||
if (toLong(result)) {
|
||||
if (toLong(result, false)) {
|
||||
return result.value;
|
||||
}
|
||||
throw new NumberFormatException("invalid input syntax for type numeric: " + this);
|
||||
|
@ -1316,7 +1320,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
|
|||
*/
|
||||
public int toIntExact() {
|
||||
IntWrapper result = new IntWrapper();
|
||||
if (toInt(result)) {
|
||||
if (toInt(result, false)) {
|
||||
return result.value;
|
||||
}
|
||||
throw new NumberFormatException("invalid input syntax for type numeric: " + this);
|
||||
|
|
|
@ -1287,6 +1287,8 @@ class AnsiCastSuite extends CastSuiteBase {
|
|||
cast("123-string", dataType), "invalid input")
|
||||
checkExceptionInExpression[NumberFormatException](
|
||||
cast("2020-07-19", dataType), "invalid input")
|
||||
checkExceptionInExpression[NumberFormatException](
|
||||
cast("1.23", dataType), "invalid input")
|
||||
}
|
||||
|
||||
Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType =>
|
||||
|
|
Loading…
Reference in a new issue