diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStream.scala index a625b32895..34d36655a6 100644 --- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStream.scala +++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStream.scala @@ -48,7 +48,7 @@ private[spark] class ChunkedByteBufferOutputStream( * This can also never be 0. */ private[this] var position = chunkSize - private[this] var _size = 0 + private[this] var _size = 0L private[this] var closed: Boolean = false def size: Long = _size @@ -120,4 +120,5 @@ private[spark] class ChunkedByteBufferOutputStream( new ChunkedByteBuffer(ret) } } + } diff --git a/core/src/test/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStreamSuite.scala index 8696174567..29443e275f 100644 --- a/core/src/test/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStreamSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStreamSuite.scala @@ -119,4 +119,14 @@ class ChunkedByteBufferOutputStreamSuite extends SparkFunSuite { assert(arrays(1).toSeq === ref.slice(10, 20)) assert(arrays(2).toSeq === ref.slice(20, 30)) } + + test("SPARK-36464: size returns correct positive number even with over 2GB data") { + val ref = new Array[Byte](1024 * 1024 * 1024) + val o = new ChunkedByteBufferOutputStream(1024 * 1024, ByteBuffer.allocate) + o.write(ref) + o.write(ref) + o.close() + assert(o.size > 0L) // make sure it is not overflowing + assert(o.size == ref.length.toLong * 2) + } }