From 02a0cdea13a5eebd27649a60d981de35156ba52c Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Fri, 23 Aug 2019 21:34:30 -0700 Subject: [PATCH] [SPARK-28723][SQL] Upgrade to Hive 2.3.6 for HiveMetastore Client and Hadoop-3.2 profile ### What changes were proposed in this pull request? This PR upgrade the built-in Hive to 2.3.6 for `hadoop-3.2`. Hive 2.3.6 release notes: - [HIVE-22096](https://issues.apache.org/jira/browse/HIVE-22096): Backport [HIVE-21584](https://issues.apache.org/jira/browse/HIVE-21584) (Java 11 preparation: system class loader is not URLClassLoader) - [HIVE-21859](https://issues.apache.org/jira/browse/HIVE-21859): Backport [HIVE-17466](https://issues.apache.org/jira/browse/HIVE-17466) (Metastore API to list unique partition-key-value combinations) - [HIVE-21786](https://issues.apache.org/jira/browse/HIVE-21786): Update repo URLs in poms branch 2.3 version ### Why are the changes needed? Make Spark support JDK 11. ### Does this PR introduce any user-facing change? Yes. Please see [SPARK-28684](https://issues.apache.org/jira/browse/SPARK-28684) and [SPARK-24417](https://issues.apache.org/jira/browse/SPARK-24417) for more details. ### How was this patch tested? Existing unit test and manual test. Closes #25443 from wangyum/test-on-jenkins. Lead-authored-by: Yuming Wang Co-authored-by: HyukjinKwon Co-authored-by: Hyukjin Kwon Signed-off-by: Dongjoon Hyun --- docs/building-spark.md | 4 ++-- docs/sql-data-sources-hive-tables.md | 2 +- docs/sql-migration-guide-hive-compatibility.md | 2 +- pom.xml | 2 +- .../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 3 +-- .../hive/thriftserver/HiveThriftServer2Suites.scala | 4 ++-- .../hive/thriftserver/ThriftserverShimUtils.scala | 7 +++++++ .../hive/thriftserver/ThriftserverShimUtils.scala | 12 ++++++++++++ .../scala/org/apache/spark/sql/hive/HiveUtils.scala | 2 +- .../spark/sql/hive/client/IsolatedClientLoader.scala | 2 +- .../org/apache/spark/sql/hive/client/package.scala | 2 +- 11 files changed, 30 insertions(+), 12 deletions(-) diff --git a/docs/building-spark.md b/docs/building-spark.md index fe7b4be20a..1f8e51fe32 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -83,12 +83,12 @@ Example: To enable Hive integration for Spark SQL along with its JDBC server and CLI, add the `-Phive` and `Phive-thriftserver` profiles to your existing build options. -By default, Spark will use Hive 1.2.1 with the `hadoop-2.7` profile, and Hive 2.3.5 with the `hadoop-3.2` profile. +By default, Spark will use Hive 1.2.1 with the `hadoop-2.7` profile, and Hive 2.3.6 with the `hadoop-3.2` profile. # With Hive 1.2.1 support ./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests clean package - # With Hive 2.3.5 support + # With Hive 2.3.6 support ./build/mvn -Pyarn -Phive -Phive-thriftserver -Phadoop-3.2 -DskipTests clean package ## Packaging without Hadoop Dependencies for YARN diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md index 5688011514..8e4b8329d5 100644 --- a/docs/sql-data-sources-hive-tables.md +++ b/docs/sql-data-sources-hive-tables.md @@ -130,7 +130,7 @@ The following options can be used to configure the version of Hive that is used 1.2.1 Version of the Hive metastore. Available - options are 0.12.0 through 2.3.5 and 3.0.0 through 3.1.1. + options are 0.12.0 through 2.3.6 and 3.0.0 through 3.1.1. diff --git a/docs/sql-migration-guide-hive-compatibility.md b/docs/sql-migration-guide-hive-compatibility.md index f955e31d49..c410d60e86 100644 --- a/docs/sql-migration-guide-hive-compatibility.md +++ b/docs/sql-migration-guide-hive-compatibility.md @@ -25,7 +25,7 @@ license: | Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs. Currently, Hive SerDes and UDFs are based on Hive 1.2.1, and Spark SQL can be connected to different versions of Hive Metastore -(from 0.12.0 to 2.3.5 and 3.0.0 to 3.1.1. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). +(from 0.12.0 to 2.3.6 and 3.0.0 to 3.1.1. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). #### Deploying in Existing Hive Warehouses diff --git a/pom.xml b/pom.xml index de35234224..6a8424cc13 100644 --- a/pom.xml +++ b/pom.xml @@ -132,7 +132,7 @@ 1.2.1.spark2 - 2.3.5 + 2.3.6 1.2.1 diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index bd58c8b6ee..b9614d49ea 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -32,7 +32,6 @@ import org.apache.hadoop.hive.cli.{CliDriver, CliSessionState, OptionsProcessor} import org.apache.hadoop.hive.common.HiveInterruptUtils import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.ql.Driver -import org.apache.hadoop.hive.ql.exec.Utilities import org.apache.hadoop.hive.ql.processors._ import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.security.{Credentials, UserGroupInformation} @@ -143,7 +142,7 @@ private[hive] object SparkSQLCLIDriver extends Logging { var loader = conf.getClassLoader val auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS) if (StringUtils.isNotBlank(auxJars)) { - loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ",")) + loader = ThriftserverShimUtils.addToClassPath(loader, StringUtils.split(auxJars, ",")) } conf.setClassLoader(loader) Thread.currentThread().setContextClassLoader(loader) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index 9c53e90186..b7185db2f2 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -537,7 +537,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { } if (HiveUtils.isHive23) { - assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.5")) + assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.6")) } else { assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("1.2.1")) } @@ -554,7 +554,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { } if (HiveUtils.isHive23) { - assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.5")) + assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.6")) } else { assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("1.2.1")) } diff --git a/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala index 4eb5f5da8f..87c0f8f6a5 100644 --- a/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala +++ b/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.hive.thriftserver import org.apache.commons.logging.LogFactory +import org.apache.hadoop.hive.ql.exec.Utilities import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema, Type} import org.apache.hive.service.cli.thrift.TProtocolVersion._ @@ -50,6 +51,12 @@ private[thriftserver] object ThriftserverShimUtils { private[thriftserver] def toJavaSQLType(s: String): Int = Type.getType(s).toJavaSQLType + private[thriftserver] def addToClassPath( + loader: ClassLoader, + auxJars: Array[String]): ClassLoader = { + Utilities.addToClassPath(loader, auxJars) + } + private[thriftserver] val testedProtocolVersions = Seq( HIVE_CLI_SERVICE_PROTOCOL_V1, HIVE_CLI_SERVICE_PROTOCOL_V2, diff --git a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala index d586c0e1b6..124c9937c0 100644 --- a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala +++ b/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala @@ -17,6 +17,11 @@ package org.apache.spark.sql.hive.thriftserver +import java.security.AccessController + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.hive.ql.exec.AddToClassPathAction import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.serde2.thrift.Type import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema} @@ -51,6 +56,13 @@ private[thriftserver] object ThriftserverShimUtils { private[thriftserver] def toJavaSQLType(s: String): Int = Type.getType(s).toJavaSQLType + private[thriftserver] def addToClassPath( + loader: ClassLoader, + auxJars: Array[String]): ClassLoader = { + val addAction = new AddToClassPathAction(loader, auxJars.toList.asJava) + AccessController.doPrivileged(addAction) + } + private[thriftserver] val testedProtocolVersions = Seq( HIVE_CLI_SERVICE_PROTOCOL_V1, HIVE_CLI_SERVICE_PROTOCOL_V2, diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 2e6811d5f2..7574898056 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -63,7 +63,7 @@ private[spark] object HiveUtils extends Logging { val HIVE_METASTORE_VERSION = buildConf("spark.sql.hive.metastore.version") .doc("Version of the Hive metastore. Available options are " + - "0.12.0 through 2.3.5 and " + + "0.12.0 through 2.3.6 and " + "3.0.0 through 3.1.1.") .stringConf .createWithDefault(builtinHiveVersion) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 32178800a8..752ed9ac33 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -101,7 +101,7 @@ private[hive] object IsolatedClientLoader extends Logging { case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0 case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1 case "2.2" | "2.2.0" => hive.v2_2 - case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" => hive.v2_3 + case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" => hive.v2_3 case "3.0" | "3.0.0" => hive.v3_0 case "3.1" | "3.1.0" | "3.1.1" => hive.v3_1 case version => diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index 31a060fc8f..4082b4a7b7 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -89,7 +89,7 @@ package object client { // Since HIVE-14496, Hive materialized view need calcite-core. // For spark, only VersionsSuite currently creates a hive materialized view for testing. - case object v2_3 extends HiveVersion("2.3.5", + case object v2_3 extends HiveVersion("2.3.6", exclusions = Seq("org.apache.calcite:calcite-druid", "org.apache.calcite.avatica:avatica", "org.apache.curator:*",