[SPARK-32115][SQL] Fix SUBSTRING to handle integer overflows

### What changes were proposed in this pull request?
Bug fix for overflow case in `UTF8String.substringSQL`.

### Why are the changes needed?
SQL query `SELECT SUBSTRING("abc", -1207959552, -1207959552)` incorrectly returns` "abc"` against expected output of `""`. For query `SUBSTRING("abc", -100, -100)`, we'll get the right output of `""`.

### Does this PR introduce _any_ user-facing change?
Yes, bug fix for the overflow case.

### How was this patch tested?
New UT.

Closes #28937 from xuanyuanking/SPARK-32115.

Authored-by: Yuanjian Li <xyliyuanjian@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
Yuanjian Li 2020-06-28 12:22:44 -07:00 committed by Dongjoon Hyun
parent 8c44d74463
commit 6484c14c57
3 changed files with 18 additions and 1 deletions

View file

@ -341,8 +341,17 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
// to the -ith element before the end of the sequence. If a start index i is 0, it
// refers to the first element.
int len = numChars();
// `len + pos` does not overflow as `len >= 0`.
int start = (pos > 0) ? pos -1 : ((pos < 0) ? len + pos : 0);
int end = (length == Integer.MAX_VALUE) ? len : start + length;
int end;
if ((long) start + length > Integer.MAX_VALUE) {
end = Integer.MAX_VALUE;
} else if ((long) start + length < Integer.MIN_VALUE) {
end = Integer.MIN_VALUE;
} else {
end = start + length;
}
return substring(start, end);
}

View file

@ -390,6 +390,10 @@ public class UTF8StringSuite {
assertEquals(fromString("example"), e.substringSQL(0, Integer.MAX_VALUE));
assertEquals(fromString("example"), e.substringSQL(1, Integer.MAX_VALUE));
assertEquals(fromString("xample"), e.substringSQL(2, Integer.MAX_VALUE));
assertEquals(EMPTY_UTF8, e.substringSQL(-100, -100));
assertEquals(EMPTY_UTF8, e.substringSQL(-1207959552, -1207959552));
assertEquals(fromString("pl"), e.substringSQL(-3, 2));
assertEquals(EMPTY_UTF8, e.substringSQL(Integer.MIN_VALUE, 6));
}
@Test

View file

@ -236,6 +236,10 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
"xample",
row)
// Substring with from negative position with negative length
checkEvaluation(Substring(s, Literal.create(-1207959552, IntegerType),
Literal.create(-1207959552, IntegerType)), "", row)
val s_notNull = 'a.string.notNull.at(0)
assert(Substring(s, Literal.create(0, IntegerType), Literal.create(2, IntegerType)).nullable)