[SPARK-27586][SQL] Improve binary comparison: replace Scala's for-comprehension if statements with while loop

## What changes were proposed in this pull request?

This PR replaces for-comprehension if statement with while loop to gain better performance in `TypeUtils.compareBinary`.

## How was this patch tested?

Add UT to test old version and new version comparison result

Closes #24494 from woudygao/opt_binary_compare.

Authored-by: gaoweikang <gaoweikang@bytedance.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
gaoweikang 2019-05-02 20:33:27 -07:00 committed by Dongjoon Hyun
parent 375cfa3d89
commit 3859ca37d9
2 changed files with 23 additions and 4 deletions

View file

@ -73,11 +73,12 @@ object TypeUtils {
}
def compareBinary(x: Array[Byte], y: Array[Byte]): Int = {
for (i <- 0 until x.length; if i < y.length) {
val v1 = x(i) & 0xff
val v2 = y(i) & 0xff
val res = v1 - v2
val limit = if (x.length <= y.length) x.length else y.length
var i = 0
while (i < limit) {
val res = (x(i) & 0xff) - (y(i) & 0xff)
if (res != 0) return res
i += 1
}
x.length - y.length
}

View file

@ -43,4 +43,22 @@ class TypeUtilsSuite extends SparkFunSuite {
typeCheckPass(ArrayType(StringType, containsNull = true) ::
ArrayType(StringType, containsNull = false) :: Nil)
}
test("compareBinary") {
val x1 = Array[Byte]()
val y1 = Array(1, 2, 3).map(_.toByte)
assert(TypeUtils.compareBinary(x1, y1) < 0)
val x2 = Array(200, 100).map(_.toByte)
val y2 = Array(100, 100).map(_.toByte)
assert(TypeUtils.compareBinary(x2, y2) > 0)
val x3 = Array(100, 200, 12).map(_.toByte)
val y3 = Array(100, 200).map(_.toByte)
assert(TypeUtils.compareBinary(x3, y3) > 0)
val x4 = Array(100, 200).map(_.toByte)
val y4 = Array(100, 200).map(_.toByte)
assert(TypeUtils.compareBinary(x4, y4) == 0)
}
}