[SPARK-25704][CORE] Allocate a bit less than Int.MaxValue
JVMs don't you allocate arrays of length exactly Int.MaxValue, so leave a little extra room. This is necessary when reading blocks >2GB off the network (for remote reads or for cache replication). Unit tests via jenkins, ran a test with blocks over 2gb on a cluster Closes #22705 from squito/SPARK-25704. Authored-by: Imran Rashid <irashid@cloudera.com> Signed-off-by: Imran Rashid <irashid@cloudera.com>
This commit is contained in:
parent
130121711c
commit
43717dee57
|
@ -133,8 +133,6 @@ private[spark] class BlockManager(
|
|||
|
||||
private[spark] val externalShuffleServiceEnabled =
|
||||
conf.get(config.SHUFFLE_SERVICE_ENABLED)
|
||||
private val chunkSize =
|
||||
conf.getSizeAsBytes("spark.storage.memoryMapLimitForTests", Int.MaxValue.toString).toInt
|
||||
private val remoteReadNioBufferConversion =
|
||||
conf.getBoolean("spark.network.remoteReadNioBufferConversion", false)
|
||||
|
||||
|
@ -451,7 +449,7 @@ private[spark] class BlockManager(
|
|||
new EncryptedBlockData(tmpFile, blockSize, conf, key).toChunkedByteBuffer(allocator)
|
||||
|
||||
case None =>
|
||||
ChunkedByteBuffer.fromFile(tmpFile, conf.get(config.MEMORY_MAP_LIMIT_FOR_TESTS).toInt)
|
||||
ChunkedByteBuffer.fromFile(tmpFile)
|
||||
}
|
||||
putBytes(blockId, buffer, level)(classTag)
|
||||
tmpFile.delete()
|
||||
|
@ -797,7 +795,7 @@ private[spark] class BlockManager(
|
|||
if (remoteReadNioBufferConversion) {
|
||||
return Some(new ChunkedByteBuffer(data.nioByteBuffer()))
|
||||
} else {
|
||||
return Some(ChunkedByteBuffer.fromManagedBuffer(data, chunkSize))
|
||||
return Some(ChunkedByteBuffer.fromManagedBuffer(data))
|
||||
}
|
||||
}
|
||||
logDebug(s"The value of block $blockId is null")
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.spark.internal.config
|
|||
import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
|
||||
import org.apache.spark.network.util.{ByteArrayWritableChannel, LimitedInputStream}
|
||||
import org.apache.spark.storage.StorageUtils
|
||||
import org.apache.spark.unsafe.array.ByteArrayMethods
|
||||
import org.apache.spark.util.Utils
|
||||
|
||||
/**
|
||||
|
@ -169,24 +170,25 @@ private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
|
|||
|
||||
}
|
||||
|
||||
object ChunkedByteBuffer {
|
||||
private[spark] object ChunkedByteBuffer {
|
||||
|
||||
|
||||
// TODO eliminate this method if we switch BlockManager to getting InputStreams
|
||||
def fromManagedBuffer(data: ManagedBuffer, maxChunkSize: Int): ChunkedByteBuffer = {
|
||||
def fromManagedBuffer(data: ManagedBuffer): ChunkedByteBuffer = {
|
||||
data match {
|
||||
case f: FileSegmentManagedBuffer =>
|
||||
fromFile(f.getFile, maxChunkSize, f.getOffset, f.getLength)
|
||||
fromFile(f.getFile, f.getOffset, f.getLength)
|
||||
case other =>
|
||||
new ChunkedByteBuffer(other.nioByteBuffer())
|
||||
}
|
||||
}
|
||||
|
||||
def fromFile(file: File, maxChunkSize: Int): ChunkedByteBuffer = {
|
||||
fromFile(file, maxChunkSize, 0, file.length())
|
||||
def fromFile(file: File): ChunkedByteBuffer = {
|
||||
fromFile(file, 0, file.length())
|
||||
}
|
||||
|
||||
private def fromFile(
|
||||
file: File,
|
||||
maxChunkSize: Int,
|
||||
offset: Long,
|
||||
length: Long): ChunkedByteBuffer = {
|
||||
// We do *not* memory map the file, because we may end up putting this into the memory store,
|
||||
|
@ -195,7 +197,7 @@ object ChunkedByteBuffer {
|
|||
val is = new FileInputStream(file)
|
||||
ByteStreams.skipFully(is, offset)
|
||||
val in = new LimitedInputStream(is, length)
|
||||
val chunkSize = math.min(maxChunkSize, length).toInt
|
||||
val chunkSize = math.min(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH, length).toInt
|
||||
val out = new ChunkedByteBufferOutputStream(chunkSize, ByteBuffer.allocate _)
|
||||
Utils.tryWithSafeFinally {
|
||||
IOUtils.copy(in, out)
|
||||
|
|
Loading…
Reference in a new issue