diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java index 3ced2094f5..7ced13d357 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java @@ -44,7 +44,7 @@ public final class ByteArray { final int minLen = Math.min(bytes.length, 8); long p = 0; for (int i = 0; i < minLen; ++i) { - p |= (128L + Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i)) + p |= ((long) Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i) & 0xff) << (56 - 8 * i); } return p; diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala index 5180c58a56..73546ef1b7 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala @@ -63,7 +63,9 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks { def compareBinary(x: Array[Byte], y: Array[Byte]): Int = { for (i <- 0 until x.length; if i < y.length) { - val res = x(i).compare(y(i)) + val v1 = x(i) & 0xff + val v2 = y(i) & 0xff + val res = v1 - v2 if (res != 0) return res } x.length - y.length @@ -80,6 +82,19 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks { (prefixComparisonResult > 0 && compareBinary(x, y) > 0)) } + val binaryRegressionTests = Seq( + (Array[Byte](1), Array[Byte](-1)), + (Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, -1)), + (Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, -1)), + (Array[Byte](1), Array[Byte](1, 1, 1, 1)), + (Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1)), + (Array[Byte](-1), Array[Byte](-1, -1, -1, -1)), + (Array[Byte](-1, -1, -1, -1, -1), Array[Byte](-1, -1, -1, -1, -1, -1, -1, -1, -1)) + ) + binaryRegressionTests.foreach { case (b1, b2) => + testPrefixComparison(b1, b2) + } + // scalastyle:off val regressionTests = Table( ("s1", "s2"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala index 7101ca5a17..45225779bf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala @@ -70,7 +70,9 @@ object TypeUtils { def compareBinary(x: Array[Byte], y: Array[Byte]): Int = { for (i <- 0 until x.length; if i < y.length) { - val res = x(i).compareTo(y(i)) + val v1 = x(i) & 0xff + val v2 = y(i) & 0xff + val res = v1 - v2 if (res != 0) return res } x.length - y.length diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala index 190fab5d24..aa61ba2bff 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala @@ -137,4 +137,23 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper { // verify that we can support up to 5000 ordering comparisons, which should be sufficient GenerateOrdering.generate(Array.fill(5000)(sortOrder)) } + + test("SPARK-21344: BinaryType comparison does signed byte array comparison") { + val data = Seq( + (Array[Byte](1), Array[Byte](-1)), + (Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, -1)), + (Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, -1)) + ) + data.foreach { case (b1, b2) => + val rowOrdering = InterpretedOrdering.forSchema(Seq(BinaryType)) + val genOrdering = GenerateOrdering.generate( + BoundReference(0, BinaryType, nullable = true).asc :: Nil) + val rowType = StructType(StructField("b", BinaryType, nullable = true) :: Nil) + val toCatalyst = CatalystTypeConverters.createToCatalystConverter(rowType) + val rowB1 = toCatalyst(Row(b1)).asInstanceOf[InternalRow] + val rowB2 = toCatalyst(Row(b2)).asInstanceOf[InternalRow] + assert(rowOrdering.compare(rowB1, rowB2) < 0) + assert(genOrdering.compare(rowB1, rowB2) < 0) + } + } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/comparator.sql b/sql/core/src/test/resources/sql-tests/inputs/comparator.sql new file mode 100644 index 0000000000..3e2447723e --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/comparator.sql @@ -0,0 +1,3 @@ +-- binary type +select x'00' < x'0f'; +select x'00' < x'ff'; diff --git a/sql/core/src/test/resources/sql-tests/results/comparator.sql.out b/sql/core/src/test/resources/sql-tests/results/comparator.sql.out new file mode 100644 index 0000000000..afc7b5448b --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/comparator.sql.out @@ -0,0 +1,18 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 2 + + +-- !query 0 +select x'00' < x'0f' +-- !query 0 schema +struct<(X'00' < X'0F'):boolean> +-- !query 0 output +true + + +-- !query 1 +select x'00' < x'ff' +-- !query 1 schema +struct<(X'00' < X'FF'):boolean> +-- !query 1 output +true