[SPARK-2953] Allow using short names for io compression codecs
Instead of requiring "org.apache.spark.io.LZ4CompressionCodec", it is easier for users if Spark just accepts "lz4", "lzf", "snappy". Author: Reynold Xin <rxin@apache.org> Closes #1873 from rxin/compressionCodecShortForm and squashes the following commits: 9f50962 [Reynold Xin] Specify short-form compression codec names first. 63f78ee [Reynold Xin] Updated configuration documentation. 47b3848 [Reynold Xin] [SPARK-2953] Allow using short names for io compression codecs
This commit is contained in:
parent
c235b83e27
commit
676f98289d
|
@ -46,17 +46,24 @@ trait CompressionCodec {
|
|||
|
||||
|
||||
private[spark] object CompressionCodec {
|
||||
|
||||
private val shortCompressionCodecNames = Map(
|
||||
"lz4" -> classOf[LZ4CompressionCodec].getName,
|
||||
"lzf" -> classOf[LZFCompressionCodec].getName,
|
||||
"snappy" -> classOf[SnappyCompressionCodec].getName)
|
||||
|
||||
def createCodec(conf: SparkConf): CompressionCodec = {
|
||||
createCodec(conf, conf.get("spark.io.compression.codec", DEFAULT_COMPRESSION_CODEC))
|
||||
}
|
||||
|
||||
def createCodec(conf: SparkConf, codecName: String): CompressionCodec = {
|
||||
val ctor = Class.forName(codecName, true, Utils.getContextOrSparkClassLoader)
|
||||
val codecClass = shortCompressionCodecNames.getOrElse(codecName.toLowerCase, codecName)
|
||||
val ctor = Class.forName(codecClass, true, Utils.getContextOrSparkClassLoader)
|
||||
.getConstructor(classOf[SparkConf])
|
||||
ctor.newInstance(conf).asInstanceOf[CompressionCodec]
|
||||
}
|
||||
|
||||
val DEFAULT_COMPRESSION_CODEC = classOf[SnappyCompressionCodec].getName
|
||||
val DEFAULT_COMPRESSION_CODEC = "snappy"
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -56,15 +56,33 @@ class CompressionCodecSuite extends FunSuite {
|
|||
testCodec(codec)
|
||||
}
|
||||
|
||||
test("lz4 compression codec short form") {
|
||||
val codec = CompressionCodec.createCodec(conf, "lz4")
|
||||
assert(codec.getClass === classOf[LZ4CompressionCodec])
|
||||
testCodec(codec)
|
||||
}
|
||||
|
||||
test("lzf compression codec") {
|
||||
val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName)
|
||||
assert(codec.getClass === classOf[LZFCompressionCodec])
|
||||
testCodec(codec)
|
||||
}
|
||||
|
||||
test("lzf compression codec short form") {
|
||||
val codec = CompressionCodec.createCodec(conf, "lzf")
|
||||
assert(codec.getClass === classOf[LZFCompressionCodec])
|
||||
testCodec(codec)
|
||||
}
|
||||
|
||||
test("snappy compression codec") {
|
||||
val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName)
|
||||
assert(codec.getClass === classOf[SnappyCompressionCodec])
|
||||
testCodec(codec)
|
||||
}
|
||||
|
||||
test("snappy compression codec short form") {
|
||||
val codec = CompressionCodec.createCodec(conf, "snappy")
|
||||
assert(codec.getClass === classOf[SnappyCompressionCodec])
|
||||
testCodec(codec)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -373,10 +373,12 @@ Apart from these, the following properties are also available, and may be useful
|
|||
</tr>
|
||||
<tr>
|
||||
<td><code>spark.io.compression.codec</code></td>
|
||||
<td>org.apache.spark.io.<br />SnappyCompressionCodec</td>
|
||||
<td>snappy</td>
|
||||
<td>
|
||||
The codec used to compress internal data such as RDD partitions and shuffle outputs.
|
||||
By default, Spark provides three codecs: <code>org.apache.spark.io.LZ4CompressionCodec</code>,
|
||||
The codec used to compress internal data such as RDD partitions and shuffle outputs. By default,
|
||||
Spark provides three codecs: <code>lz4</code>, <code>lzf</code>, and <code>snappy</code>. You
|
||||
can also use fully qualified class names to specify the codec, e.g.
|
||||
<code>org.apache.spark.io.LZ4CompressionCodec</code>,
|
||||
<code>org.apache.spark.io.LZFCompressionCodec</code>,
|
||||
and <code>org.apache.spark.io.SnappyCompressionCodec</code>.
|
||||
</td>
|
||||
|
|
Loading…
Reference in a new issue