[SPARK-18126][SPARK-CORE] getIteratorZipWithIndex accepts negative value as index
## What changes were proposed in this pull request? (Please fill in changes proposed in this fix) `Utils.getIteratorZipWithIndex` was added to deal with number of records > 2147483647 in one partition. method `getIteratorZipWithIndex` accepts `startIndex` < 0, which leads to negative index. This PR just adds a defensive check on `startIndex` to make sure it is >= 0. ## How was this patch tested? Add a new unit test. Author: Miao Wang <miaowang@Miaos-MacBook-Pro.local> Closes #15639 from wangmiao1981/zip.
This commit is contained in:
parent
29cea8f332
commit
a76846cfb1
|
@ -1765,6 +1765,7 @@ private[spark] object Utils extends Logging {
|
||||||
*/
|
*/
|
||||||
def getIteratorZipWithIndex[T](iterator: Iterator[T], startIndex: Long): Iterator[(T, Long)] = {
|
def getIteratorZipWithIndex[T](iterator: Iterator[T], startIndex: Long): Iterator[(T, Long)] = {
|
||||||
new Iterator[(T, Long)] {
|
new Iterator[(T, Long)] {
|
||||||
|
require(startIndex >= 0, "startIndex should be >= 0.")
|
||||||
var index: Long = startIndex - 1L
|
var index: Long = startIndex - 1L
|
||||||
def hasNext: Boolean = iterator.hasNext
|
def hasNext: Boolean = iterator.hasNext
|
||||||
def next(): (T, Long) = {
|
def next(): (T, Long) = {
|
||||||
|
|
|
@ -401,6 +401,9 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
|
||||||
assert(iterator.toArray === Array(
|
assert(iterator.toArray === Array(
|
||||||
(0, -1L + Int.MaxValue), (1, 0L + Int.MaxValue), (2, 1L + Int.MaxValue)
|
(0, -1L + Int.MaxValue), (1, 0L + Int.MaxValue), (2, 1L + Int.MaxValue)
|
||||||
))
|
))
|
||||||
|
intercept[IllegalArgumentException] {
|
||||||
|
Utils.getIteratorZipWithIndex(Iterator(0, 1, 2), -1L)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
test("doesDirectoryContainFilesNewerThan") {
|
test("doesDirectoryContainFilesNewerThan") {
|
||||||
|
|
Loading…
Reference in a new issue