[SPARK-32115][SQL] Fix SUBSTRING to handle integer overflows
### What changes were proposed in this pull request? Bug fix for overflow case in `UTF8String.substringSQL`. ### Why are the changes needed? SQL query `SELECT SUBSTRING("abc", -1207959552, -1207959552)` incorrectly returns` "abc"` against expected output of `""`. For query `SUBSTRING("abc", -100, -100)`, we'll get the right output of `""`. ### Does this PR introduce _any_ user-facing change? Yes, bug fix for the overflow case. ### How was this patch tested? New UT. Closes #28937 from xuanyuanking/SPARK-32115. Authored-by: Yuanjian Li <xyliyuanjian@gmail.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
parent
8c44d74463
commit
6484c14c57
|
@ -341,8 +341,17 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
|
|||
// to the -ith element before the end of the sequence. If a start index i is 0, it
|
||||
// refers to the first element.
|
||||
int len = numChars();
|
||||
// `len + pos` does not overflow as `len >= 0`.
|
||||
int start = (pos > 0) ? pos -1 : ((pos < 0) ? len + pos : 0);
|
||||
int end = (length == Integer.MAX_VALUE) ? len : start + length;
|
||||
|
||||
int end;
|
||||
if ((long) start + length > Integer.MAX_VALUE) {
|
||||
end = Integer.MAX_VALUE;
|
||||
} else if ((long) start + length < Integer.MIN_VALUE) {
|
||||
end = Integer.MIN_VALUE;
|
||||
} else {
|
||||
end = start + length;
|
||||
}
|
||||
return substring(start, end);
|
||||
}
|
||||
|
||||
|
|
|
@ -390,6 +390,10 @@ public class UTF8StringSuite {
|
|||
assertEquals(fromString("example"), e.substringSQL(0, Integer.MAX_VALUE));
|
||||
assertEquals(fromString("example"), e.substringSQL(1, Integer.MAX_VALUE));
|
||||
assertEquals(fromString("xample"), e.substringSQL(2, Integer.MAX_VALUE));
|
||||
assertEquals(EMPTY_UTF8, e.substringSQL(-100, -100));
|
||||
assertEquals(EMPTY_UTF8, e.substringSQL(-1207959552, -1207959552));
|
||||
assertEquals(fromString("pl"), e.substringSQL(-3, 2));
|
||||
assertEquals(EMPTY_UTF8, e.substringSQL(Integer.MIN_VALUE, 6));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -236,6 +236,10 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
|
|||
"xample",
|
||||
row)
|
||||
|
||||
// Substring with from negative position with negative length
|
||||
checkEvaluation(Substring(s, Literal.create(-1207959552, IntegerType),
|
||||
Literal.create(-1207959552, IntegerType)), "", row)
|
||||
|
||||
val s_notNull = 'a.string.notNull.at(0)
|
||||
|
||||
assert(Substring(s, Literal.create(0, IntegerType), Literal.create(2, IntegerType)).nullable)
|
||||
|
|
Loading…
Reference in a new issue