[SPARK-11737] [SQL] Fix serialization of UTF8String with Kyro

The default implementation of serialization UTF8String with Kyro may be not correct (BYTE_ARRAY_OFFSET could be different across JVM)

Author: Davies Liu <davies@databricks.com>

Closes #9704 from davies/kyro_string.
This commit is contained in:
Davies Liu 2015-11-17 19:50:02 -08:00 committed by Davies Liu
parent e33053ee00
commit 98be8169f0
2 changed files with 26 additions and 2 deletions

View file

@ -36,6 +36,10 @@
</properties>
<dependencies>
<dependency>
<groupId>com.twitter</groupId>
<artifactId>chill_${scala.binary.version}</artifactId>
</dependency>
<!-- Core dependencies -->
<dependency>

View file

@ -24,6 +24,11 @@ import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.Map;
import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.KryoSerializable;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
import org.apache.spark.unsafe.Platform;
import org.apache.spark.unsafe.array.ByteArrayMethods;
@ -38,9 +43,9 @@ import static org.apache.spark.unsafe.Platform.*;
* <p>
* Note: This is not designed for general use cases, should not be used outside SQL.
*/
public final class UTF8String implements Comparable<UTF8String>, Externalizable {
public final class UTF8String implements Comparable<UTF8String>, Externalizable, KryoSerializable {
// These are only updated by readExternal()
// These are only updated by readExternal() or read()
@Nonnull
private Object base;
private long offset;
@ -1003,4 +1008,19 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable
in.readFully((byte[]) base);
}
@Override
public void write(Kryo kryo, Output out) {
byte[] bytes = getBytes();
out.writeInt(bytes.length);
out.write(bytes);
}
@Override
public void read(Kryo kryo, Input in) {
this.offset = BYTE_ARRAY_OFFSET;
this.numBytes = in.readInt();
this.base = new byte[numBytes];
in.read((byte[]) base);
}
}