From 2f7d0b68a29de9755fc9fafd9a52c048981ad880 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 16 Jan 2016 00:38:17 -0800 Subject: [PATCH] [SPARK-12856] [SQL] speed up hashCode of unsafe array We iterate the bytes to calculate hashCode before, but now we have `Murmur3_x86_32.hashUnsafeBytes` that don't require the bytes to be word algned, we should use that instead. A simple benchmark shows it's about 3 X faster, benchmark code: https://gist.github.com/cloud-fan/fa77713ccebf0823b2ab#file-arrayhashbenchmark-scala Author: Wenchen Fan Closes #10784 from cloud-fan/array-hashcode. --- .../spark/sql/catalyst/expressions/UnsafeArrayData.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java index 3d80df2271..648625b2cc 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.util.ArrayData; import org.apache.spark.sql.types.*; import org.apache.spark.unsafe.Platform; import org.apache.spark.unsafe.array.ByteArrayMethods; +import org.apache.spark.unsafe.hash.Murmur3_x86_32; import org.apache.spark.unsafe.types.CalendarInterval; import org.apache.spark.unsafe.types.UTF8String; @@ -299,11 +300,7 @@ public class UnsafeArrayData extends ArrayData { @Override public int hashCode() { - int result = 37; - for (int i = 0; i < sizeInBytes; i++) { - result = 37 * result + Platform.getByte(baseObject, baseOffset + i); - } - return result; + return Murmur3_x86_32.hashUnsafeBytes(baseObject, baseOffset, sizeInBytes, 42); } @Override