[SPARK-36820][3.2][SQL] Disable tests related to LZ4 for Hadoop 2.7 profile

### What changes were proposed in this pull request? Disable tests related to LZ4 in `FileSourceCodecSuite` and `FileSuite` when using `hadoop-2.7` profile. ### Why are the changes needed? At the moment, parquet-mr uses LZ4 compression codec provided by Hadoop, and only since HADOOP-17292 (in 3.3.1/3.4.0) the latter added `lz4-java` to remove the restriction that the codec can only be run with native library. As consequence, the test will fail when using `hadoop-2.7` profile. ### Does this PR introduce _any_ user-facing change? No, it's just test. ### How was this patch tested? Existing test Closes #34066 from sunchao/SpARK-36820-3.2. Authored-by: Chao Sun <sunchao@apple.com> Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
2021-09-22 00:14:45 -07:00 · 2021-09-22 00:14:45 -07:00 · a28d8d9b0e
parent 4543ac62bc
commit a28d8d9b0e
3 changed files with 20 additions and 6 deletions
--- a/core/src/main/scala/org/apache/spark/util/VersionUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/VersionUtils.scala
@ -17,6 +17,8 @@

 package org.apache.spark.util

+import org.apache.hadoop.util.VersionInfo
+
 /**
 * Utilities for working with Spark version strings
 */
@ -26,6 +28,11 @@ private[spark] object VersionUtils {
  private val shortVersionRegex = """^(\d+\.\d+\.\d+)(.*)?$""".r
  private val majorMinorPatchRegex = """^(\d+)(?:\.(\d+)(?:\.(\d+)(?:[.-].*)?)?)?$""".r

+  /**
+   * Whether the Hadoop version used by Spark is 3.x
+   */
+  def isHadoop3: Boolean = majorVersion(VersionInfo.getVersion) == 3
+
  /**
   * Given a Spark version string, return the major version number.
   * E.g., for 2.0.1-SNAPSHOT, return 2.
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@ -38,7 +38,7 @@ import org.apache.spark.internal.config._
 import org.apache.spark.rdd.{HadoopRDD, NewHadoopRDD}
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{Utils, VersionUtils}

 class FileSuite extends SparkFunSuite with LocalSparkContext {
  var tempDir: File = _
@ -137,9 +137,11 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {

  // Hadoop "gzip" and "zstd" codecs require native library installed for sequence files
  // "snappy" codec does not work due to SPARK-36681.
-  Seq((new DefaultCodec(), "default"), (new BZip2Codec(), "bzip2"), (new Lz4Codec(), "lz4"))
-    .foreach { case (codec, codecName) =>
-      runSequenceFileCodecTest(codec, codecName)
+  val codecs = Seq((new DefaultCodec(), "default"), (new BZip2Codec(), "bzip2")) ++ {
+    if (VersionUtils.isHadoop3) Seq((new Lz4Codec(), "lz4")) else Seq()
+  }
+  codecs.foreach { case (codec, codecName) =>
+    runSequenceFileCodecTest(codec, codecName)
  }

  test("SequenceFile with writable key") {
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.util.VersionUtils

 trait FileSourceCodecSuite extends QueryTest with SQLTestUtils with SharedSparkSession {

@ -58,8 +59,12 @@ class ParquetCodecSuite extends FileSourceCodecSuite {
  // Exclude "lzo" because it is GPL-licenced so not included in Hadoop.
  // Exclude "brotli" because the com.github.rdblue:brotli-codec dependency is not available
  // on Maven Central.
-  override protected def availableCodecs: Seq[String] =
-    Seq("none", "uncompressed", "snappy", "lz4", "gzip", "zstd")
+  override protected def availableCodecs: Seq[String] = {
+    Seq("none", "uncompressed", "snappy", "gzip", "zstd") ++ {
+      // Exclude "lz4" for Hadoop 2.x profile since the lz4-java support is only in 3.x
+      if (VersionUtils.isHadoop3) Seq("lz4") else Seq()
+    }
+  }
 }

 class OrcCodecSuite extends FileSourceCodecSuite {