From 9c0af746e5dda9f05e64f0a16a3dbe11a23024de Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 16 Mar 2019 19:42:05 -0500 Subject: [PATCH] [SPARK-27175][BUILD] Upgrade hadoop-3 to 3.2.0 ## What changes were proposed in this pull request? This PR upgrade `hadoop-3` to `3.2.0` to workaround [HADOOP-16086](https://issues.apache.org/jira/browse/HADOOP-16086). Otherwise some test case will throw IllegalArgumentException: ```java 02:44:34.707 ERROR org.apache.hadoop.hive.ql.exec.Task: Job Submission failed with exception 'java.io.IOException(Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.)' java.io.IOException: Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses. at org.apache.hadoop.mapreduce.Cluster.initialize(Cluster.java:116) at org.apache.hadoop.mapreduce.Cluster.(Cluster.java:109) at org.apache.hadoop.mapreduce.Cluster.(Cluster.java:102) at org.apache.hadoop.mapred.JobClient.init(JobClient.java:475) at org.apache.hadoop.mapred.JobClient.(JobClient.java:454) at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:369) at org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:151) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2183) at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1839) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1526) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227) at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$runHive$1(HiveClientImpl.scala:730) at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:283) at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:221) at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:220) at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:266) at org.apache.spark.sql.hive.client.HiveClientImpl.runHive(HiveClientImpl.scala:719) at org.apache.spark.sql.hive.client.HiveClientImpl.runSqlHive(HiveClientImpl.scala:709) at org.apache.spark.sql.hive.StatisticsSuite.createNonPartitionedTable(StatisticsSuite.scala:719) at org.apache.spark.sql.hive.StatisticsSuite.$anonfun$testAlterTableProperties$2(StatisticsSuite.scala:822) ``` ## How was this patch tested? manual tests Closes #24106 from wangyum/SPARK-27175. Authored-by: Yuming Wang Signed-off-by: Sean Owen --- ...-deps-hadoop-3.1 => spark-deps-hadoop-3.2} | 42 ++++++++++--------- dev/run-tests-jenkins.py | 4 +- dev/run-tests.py | 2 +- dev/test-dependencies.sh | 2 +- hadoop-cloud/pom.xml | 2 +- pom.xml | 8 ++-- 6 files changed, 31 insertions(+), 29 deletions(-) rename dev/deps/{spark-deps-hadoop-3.1 => spark-deps-hadoop-3.2} (87%) diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.2 similarity index 87% rename from dev/deps/spark-deps-hadoop-3.1 rename to dev/deps/spark-deps-hadoop-3.2 index 1f95c67d7a..6f3bbce3e7 100644 --- a/dev/deps/spark-deps-hadoop-3.1 +++ b/dev/deps/spark-deps-hadoop-3.2 @@ -15,6 +15,7 @@ arpack_combined_all-0.1.jar arrow-format-0.12.0.jar arrow-memory-0.12.0.jar arrow-vector-0.12.0.jar +audience-annotations-0.5.0.jar automaton-1.11-8.jar avro-1.8.2.jar avro-ipc-1.8.2.jar @@ -42,11 +43,12 @@ commons-logging-1.1.3.jar commons-math3-3.4.1.jar commons-net-3.1.jar commons-pool-1.5.4.jar +commons-text-1.6.jar compress-lzf-1.0.3.jar core-1.1.2.jar -curator-client-2.12.0.jar -curator-framework-2.12.0.jar -curator-recipes-2.12.0.jar +curator-client-2.13.0.jar +curator-framework-2.13.0.jar +curator-recipes-2.13.0.jar datanucleus-api-jdo-3.2.6.jar datanucleus-core-3.2.10.jar datanucleus-rdbms-3.2.9.jar @@ -60,20 +62,20 @@ gson-2.2.4.jar guava-14.0.1.jar guice-4.0.jar guice-servlet-4.0.jar -hadoop-annotations-3.1.0.jar -hadoop-auth-3.1.0.jar -hadoop-client-3.1.0.jar -hadoop-common-3.1.0.jar -hadoop-hdfs-client-3.1.0.jar -hadoop-mapreduce-client-common-3.1.0.jar -hadoop-mapreduce-client-core-3.1.0.jar -hadoop-mapreduce-client-jobclient-3.1.0.jar -hadoop-yarn-api-3.1.0.jar -hadoop-yarn-client-3.1.0.jar -hadoop-yarn-common-3.1.0.jar -hadoop-yarn-registry-3.1.0.jar -hadoop-yarn-server-common-3.1.0.jar -hadoop-yarn-server-web-proxy-3.1.0.jar +hadoop-annotations-3.2.0.jar +hadoop-auth-3.2.0.jar +hadoop-client-3.2.0.jar +hadoop-common-3.2.0.jar +hadoop-hdfs-client-3.2.0.jar +hadoop-mapreduce-client-common-3.2.0.jar +hadoop-mapreduce-client-core-3.2.0.jar +hadoop-mapreduce-client-jobclient-3.2.0.jar +hadoop-yarn-api-3.2.0.jar +hadoop-yarn-client-3.2.0.jar +hadoop-yarn-common-3.2.0.jar +hadoop-yarn-registry-3.2.0.jar +hadoop-yarn-server-common-3.2.0.jar +hadoop-yarn-server-web-proxy-3.2.0.jar hk2-api-2.4.0-b34.jar hk2-locator-2.4.0-b34.jar hk2-utils-2.4.0-b34.jar @@ -88,8 +90,8 @@ jackson-core-2.9.8.jar jackson-core-asl-1.9.13.jar jackson-databind-2.9.8.jar jackson-dataformat-yaml-2.9.8.jar -jackson-jaxrs-base-2.7.8.jar -jackson-jaxrs-json-provider-2.7.8.jar +jackson-jaxrs-base-2.9.5.jar +jackson-jaxrs-json-provider-2.9.5.jar jackson-mapper-asl-1.9.13.jar jackson-module-jaxb-annotations-2.9.8.jar jackson-module-paranamer-2.9.8.jar @@ -215,5 +217,5 @@ woodstox-core-5.0.3.jar xbean-asm7-shaded-4.12.jar xz-1.5.jar zjsonpatch-0.3.0.jar -zookeeper-3.4.9.jar +zookeeper-3.4.13.jar zstd-jni-1.3.2-2.jar diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index e01bcbef8f..fdc4f2b486 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -173,8 +173,8 @@ def main(): os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.6" if "test-hadoop2.7" in ghprb_pull_title: os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.7" - if "test-hadoop3.1" in ghprb_pull_title: - os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.1" + if "test-hadoop3.2" in ghprb_pull_title: + os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2" build_display_name = os.environ["BUILD_DISPLAY_NAME"] build_url = os.environ["BUILD_URL"] diff --git a/dev/run-tests.py b/dev/run-tests.py index 535c8775fd..dfad299107 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -280,7 +280,7 @@ def get_hadoop_profiles(hadoop_version): sbt_maven_hadoop_profiles = { "hadoop2.7": ["-Phadoop-2.7"], - "hadoop3.1": ["-Phadoop-3.1"], + "hadoop3.2": ["-Phadoop-3.2"], } if hadoop_version in sbt_maven_hadoop_profiles: diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh index 63e01e1085..54574f6097 100755 --- a/dev/test-dependencies.sh +++ b/dev/test-dependencies.sh @@ -33,7 +33,7 @@ HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pmesos -Pkubernetes -Pyarn -Phive" MVN="build/mvn" HADOOP_PROFILES=( hadoop-2.7 - hadoop-3.1 + hadoop-3.2 ) # We'll switch the version to a temp. one, publish POMs using that new version, then switch back to diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 2e5b04622c..68d3d04a9a 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -199,7 +199,7 @@ enables store-specific committers. --> - hadoop-3.1 + hadoop-3.2