[SPARK-28723][SQL] Upgrade to Hive 2.3.6 for HiveMetastore Client and Hadoop-3.2 profile
### What changes were proposed in this pull request? This PR upgrade the built-in Hive to 2.3.6 for `hadoop-3.2`. Hive 2.3.6 release notes: - [HIVE-22096](https://issues.apache.org/jira/browse/HIVE-22096): Backport [HIVE-21584](https://issues.apache.org/jira/browse/HIVE-21584) (Java 11 preparation: system class loader is not URLClassLoader) - [HIVE-21859](https://issues.apache.org/jira/browse/HIVE-21859): Backport [HIVE-17466](https://issues.apache.org/jira/browse/HIVE-17466) (Metastore API to list unique partition-key-value combinations) - [HIVE-21786](https://issues.apache.org/jira/browse/HIVE-21786): Update repo URLs in poms branch 2.3 version ### Why are the changes needed? Make Spark support JDK 11. ### Does this PR introduce any user-facing change? Yes. Please see [SPARK-28684](https://issues.apache.org/jira/browse/SPARK-28684) and [SPARK-24417](https://issues.apache.org/jira/browse/SPARK-24417) for more details. ### How was this patch tested? Existing unit test and manual test. Closes #25443 from wangyum/test-on-jenkins. Lead-authored-by: Yuming Wang <yumwang@ebay.com> Co-authored-by: HyukjinKwon <gurwls223@apache.org> Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
f17f1d01e2
commit
02a0cdea13
|
@ -83,12 +83,12 @@ Example:
|
|||
|
||||
To enable Hive integration for Spark SQL along with its JDBC server and CLI,
|
||||
add the `-Phive` and `Phive-thriftserver` profiles to your existing build options.
|
||||
By default, Spark will use Hive 1.2.1 with the `hadoop-2.7` profile, and Hive 2.3.5 with the `hadoop-3.2` profile.
|
||||
By default, Spark will use Hive 1.2.1 with the `hadoop-2.7` profile, and Hive 2.3.6 with the `hadoop-3.2` profile.
|
||||
|
||||
# With Hive 1.2.1 support
|
||||
./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests clean package
|
||||
|
||||
# With Hive 2.3.5 support
|
||||
# With Hive 2.3.6 support
|
||||
./build/mvn -Pyarn -Phive -Phive-thriftserver -Phadoop-3.2 -DskipTests clean package
|
||||
|
||||
## Packaging without Hadoop Dependencies for YARN
|
||||
|
|
|
@ -130,7 +130,7 @@ The following options can be used to configure the version of Hive that is used
|
|||
<td><code>1.2.1</code></td>
|
||||
<td>
|
||||
Version of the Hive metastore. Available
|
||||
options are <code>0.12.0</code> through <code>2.3.5</code> and <code>3.0.0</code> through <code>3.1.1</code>.
|
||||
options are <code>0.12.0</code> through <code>2.3.6</code> and <code>3.0.0</code> through <code>3.1.1</code>.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
|
|
@ -25,7 +25,7 @@ license: |
|
|||
Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs.
|
||||
Currently, Hive SerDes and UDFs are based on Hive 1.2.1,
|
||||
and Spark SQL can be connected to different versions of Hive Metastore
|
||||
(from 0.12.0 to 2.3.5 and 3.0.0 to 3.1.1. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
|
||||
(from 0.12.0 to 2.3.6 and 3.0.0 to 3.1.1. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
|
||||
|
||||
#### Deploying in Existing Hive Warehouses
|
||||
|
||||
|
|
2
pom.xml
2
pom.xml
|
@ -132,7 +132,7 @@
|
|||
<hive.classifier></hive.classifier>
|
||||
<!-- Version used in Maven Hive dependency -->
|
||||
<hive.version>1.2.1.spark2</hive.version>
|
||||
<hive23.version>2.3.5</hive23.version>
|
||||
<hive23.version>2.3.6</hive23.version>
|
||||
<!-- Version used for internal directory structure -->
|
||||
<hive.version.short>1.2.1</hive.version.short>
|
||||
<!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
|
||||
|
|
|
@ -32,7 +32,6 @@ import org.apache.hadoop.hive.cli.{CliDriver, CliSessionState, OptionsProcessor}
|
|||
import org.apache.hadoop.hive.common.HiveInterruptUtils
|
||||
import org.apache.hadoop.hive.conf.HiveConf
|
||||
import org.apache.hadoop.hive.ql.Driver
|
||||
import org.apache.hadoop.hive.ql.exec.Utilities
|
||||
import org.apache.hadoop.hive.ql.processors._
|
||||
import org.apache.hadoop.hive.ql.session.SessionState
|
||||
import org.apache.hadoop.security.{Credentials, UserGroupInformation}
|
||||
|
@ -143,7 +142,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
|
|||
var loader = conf.getClassLoader
|
||||
val auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS)
|
||||
if (StringUtils.isNotBlank(auxJars)) {
|
||||
loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ","))
|
||||
loader = ThriftserverShimUtils.addToClassPath(loader, StringUtils.split(auxJars, ","))
|
||||
}
|
||||
conf.setClassLoader(loader)
|
||||
Thread.currentThread().setContextClassLoader(loader)
|
||||
|
|
|
@ -537,7 +537,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
|
|||
}
|
||||
|
||||
if (HiveUtils.isHive23) {
|
||||
assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.5"))
|
||||
assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.6"))
|
||||
} else {
|
||||
assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("1.2.1"))
|
||||
}
|
||||
|
@ -554,7 +554,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
|
|||
}
|
||||
|
||||
if (HiveUtils.isHive23) {
|
||||
assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.5"))
|
||||
assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.6"))
|
||||
} else {
|
||||
assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("1.2.1"))
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.spark.sql.hive.thriftserver
|
||||
|
||||
import org.apache.commons.logging.LogFactory
|
||||
import org.apache.hadoop.hive.ql.exec.Utilities
|
||||
import org.apache.hadoop.hive.ql.session.SessionState
|
||||
import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema, Type}
|
||||
import org.apache.hive.service.cli.thrift.TProtocolVersion._
|
||||
|
@ -50,6 +51,12 @@ private[thriftserver] object ThriftserverShimUtils {
|
|||
|
||||
private[thriftserver] def toJavaSQLType(s: String): Int = Type.getType(s).toJavaSQLType
|
||||
|
||||
private[thriftserver] def addToClassPath(
|
||||
loader: ClassLoader,
|
||||
auxJars: Array[String]): ClassLoader = {
|
||||
Utilities.addToClassPath(loader, auxJars)
|
||||
}
|
||||
|
||||
private[thriftserver] val testedProtocolVersions = Seq(
|
||||
HIVE_CLI_SERVICE_PROTOCOL_V1,
|
||||
HIVE_CLI_SERVICE_PROTOCOL_V2,
|
||||
|
|
|
@ -17,6 +17,11 @@
|
|||
|
||||
package org.apache.spark.sql.hive.thriftserver
|
||||
|
||||
import java.security.AccessController
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
import org.apache.hadoop.hive.ql.exec.AddToClassPathAction
|
||||
import org.apache.hadoop.hive.ql.session.SessionState
|
||||
import org.apache.hadoop.hive.serde2.thrift.Type
|
||||
import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema}
|
||||
|
@ -51,6 +56,13 @@ private[thriftserver] object ThriftserverShimUtils {
|
|||
|
||||
private[thriftserver] def toJavaSQLType(s: String): Int = Type.getType(s).toJavaSQLType
|
||||
|
||||
private[thriftserver] def addToClassPath(
|
||||
loader: ClassLoader,
|
||||
auxJars: Array[String]): ClassLoader = {
|
||||
val addAction = new AddToClassPathAction(loader, auxJars.toList.asJava)
|
||||
AccessController.doPrivileged(addAction)
|
||||
}
|
||||
|
||||
private[thriftserver] val testedProtocolVersions = Seq(
|
||||
HIVE_CLI_SERVICE_PROTOCOL_V1,
|
||||
HIVE_CLI_SERVICE_PROTOCOL_V2,
|
||||
|
|
|
@ -63,7 +63,7 @@ private[spark] object HiveUtils extends Logging {
|
|||
|
||||
val HIVE_METASTORE_VERSION = buildConf("spark.sql.hive.metastore.version")
|
||||
.doc("Version of the Hive metastore. Available options are " +
|
||||
"<code>0.12.0</code> through <code>2.3.5</code> and " +
|
||||
"<code>0.12.0</code> through <code>2.3.6</code> and " +
|
||||
"<code>3.0.0</code> through <code>3.1.1</code>.")
|
||||
.stringConf
|
||||
.createWithDefault(builtinHiveVersion)
|
||||
|
|
|
@ -101,7 +101,7 @@ private[hive] object IsolatedClientLoader extends Logging {
|
|||
case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0
|
||||
case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1
|
||||
case "2.2" | "2.2.0" => hive.v2_2
|
||||
case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" => hive.v2_3
|
||||
case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" => hive.v2_3
|
||||
case "3.0" | "3.0.0" => hive.v3_0
|
||||
case "3.1" | "3.1.0" | "3.1.1" => hive.v3_1
|
||||
case version =>
|
||||
|
|
|
@ -89,7 +89,7 @@ package object client {
|
|||
|
||||
// Since HIVE-14496, Hive materialized view need calcite-core.
|
||||
// For spark, only VersionsSuite currently creates a hive materialized view for testing.
|
||||
case object v2_3 extends HiveVersion("2.3.5",
|
||||
case object v2_3 extends HiveVersion("2.3.6",
|
||||
exclusions = Seq("org.apache.calcite:calcite-druid",
|
||||
"org.apache.calcite.avatica:avatica",
|
||||
"org.apache.curator:*",
|
||||
|
|
Loading…
Reference in a new issue