diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 86d2c3ab2b..1a89da42d6 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -81,21 +81,21 @@ hadoop-yarn-client/2.7.4//hadoop-yarn-client-2.7.4.jar hadoop-yarn-common/2.7.4//hadoop-yarn-common-2.7.4.jar hadoop-yarn-server-common/2.7.4//hadoop-yarn-server-common-2.7.4.jar hadoop-yarn-server-web-proxy/2.7.4//hadoop-yarn-server-web-proxy-2.7.4.jar -hive-beeline/2.3.7//hive-beeline-2.3.7.jar -hive-cli/2.3.7//hive-cli-2.3.7.jar -hive-common/2.3.7//hive-common-2.3.7.jar -hive-exec/2.3.7/core/hive-exec-2.3.7-core.jar -hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar -hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar -hive-metastore/2.3.7//hive-metastore-2.3.7.jar -hive-serde/2.3.7//hive-serde-2.3.7.jar +hive-beeline/2.3.8//hive-beeline-2.3.8.jar +hive-cli/2.3.8//hive-cli-2.3.8.jar +hive-common/2.3.8//hive-common-2.3.8.jar +hive-exec/2.3.8/core/hive-exec-2.3.8-core.jar +hive-jdbc/2.3.8//hive-jdbc-2.3.8.jar +hive-llap-common/2.3.8//hive-llap-common-2.3.8.jar +hive-metastore/2.3.8//hive-metastore-2.3.8.jar +hive-serde/2.3.8//hive-serde-2.3.8.jar hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar -hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar -hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar -hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar -hive-shims/2.3.7//hive-shims-2.3.7.jar +hive-shims-0.23/2.3.8//hive-shims-0.23-2.3.8.jar +hive-shims-common/2.3.8//hive-shims-common-2.3.8.jar +hive-shims-scheduler/2.3.8//hive-shims-scheduler-2.3.8.jar +hive-shims/2.3.8//hive-shims-2.3.8.jar hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar -hive-vector-code-gen/2.3.7//hive-vector-code-gen-2.3.7.jar +hive-vector-code-gen/2.3.8//hive-vector-code-gen-2.3.8.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 7d3876c347..562a436425 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -58,21 +58,21 @@ gson/2.2.4//gson-2.2.4.jar guava/14.0.1//guava-14.0.1.jar hadoop-client-api/3.2.2//hadoop-client-api-3.2.2.jar hadoop-client-runtime/3.2.2//hadoop-client-runtime-3.2.2.jar -hive-beeline/2.3.7//hive-beeline-2.3.7.jar -hive-cli/2.3.7//hive-cli-2.3.7.jar -hive-common/2.3.7//hive-common-2.3.7.jar -hive-exec/2.3.7/core/hive-exec-2.3.7-core.jar -hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar -hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar -hive-metastore/2.3.7//hive-metastore-2.3.7.jar -hive-serde/2.3.7//hive-serde-2.3.7.jar +hive-beeline/2.3.8//hive-beeline-2.3.8.jar +hive-cli/2.3.8//hive-cli-2.3.8.jar +hive-common/2.3.8//hive-common-2.3.8.jar +hive-exec/2.3.8/core/hive-exec-2.3.8-core.jar +hive-jdbc/2.3.8//hive-jdbc-2.3.8.jar +hive-llap-common/2.3.8//hive-llap-common-2.3.8.jar +hive-metastore/2.3.8//hive-metastore-2.3.8.jar +hive-serde/2.3.8//hive-serde-2.3.8.jar hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar -hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar -hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar -hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar -hive-shims/2.3.7//hive-shims-2.3.7.jar +hive-shims-0.23/2.3.8//hive-shims-0.23-2.3.8.jar +hive-shims-common/2.3.8//hive-shims-common-2.3.8.jar +hive-shims-scheduler/2.3.8//hive-shims-scheduler-2.3.8.jar +hive-shims/2.3.8//hive-shims-2.3.8.jar hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar -hive-vector-code-gen/2.3.7//hive-vector-code-gen-2.3.7.jar +hive-vector-code-gen/2.3.8//hive-vector-code-gen-2.3.8.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar diff --git a/docs/building-spark.md b/docs/building-spark.md index 5106f2abd4..f9599b642d 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -83,9 +83,9 @@ Example: To enable Hive integration for Spark SQL along with its JDBC server and CLI, add the `-Phive` and `-Phive-thriftserver` profiles to your existing build options. -By default Spark will build with Hive 2.3.7. +By default Spark will build with Hive 2.3.8. - # With Hive 2.3.7 support + # With Hive 2.3.8 support ./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests clean package ## Packaging without Hadoop Dependencies for YARN diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md index 376c2042d4..723236e866 100644 --- a/docs/sql-data-sources-hive-tables.md +++ b/docs/sql-data-sources-hive-tables.md @@ -127,10 +127,10 @@ The following options can be used to configure the version of Hive that is used Property NameDefaultMeaningSince Version spark.sql.hive.metastore.version - 2.3.7 + 2.3.8 Version of the Hive metastore. Available - options are 0.12.0 through 2.3.7 and 3.0.0 through 3.1.2. + options are 0.12.0 through 2.3.8 and 3.0.0 through 3.1.2. 1.4.0 @@ -142,9 +142,9 @@ The following options can be used to configure the version of Hive that is used property can be one of four options:
  1. builtin
  2. - Use Hive 2.3.7, which is bundled with the Spark assembly when -Phive is + Use Hive 2.3.8, which is bundled with the Spark assembly when -Phive is enabled. When this option is chosen, spark.sql.hive.metastore.version must be - either 2.3.7 or not defined. + either 2.3.8 or not defined.
  3. maven
  4. Use Hive jars of specified version downloaded from Maven repositories. This configuration is not generally recommended for production deployments. diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index dc21ec3dc1..909fc38313 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -863,7 +863,7 @@ Python UDF registration is unchanged. Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs. Currently, Hive SerDes and UDFs are based on built-in Hive, and Spark SQL can be connected to different versions of Hive Metastore -(from 0.12.0 to 2.3.7 and 3.0.0 to 3.1.2. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). +(from 0.12.0 to 2.3.8 and 3.0.0 to 3.1.2. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). #### Deploying in Existing Hive Warehouses {:.no_toc} diff --git a/pom.xml b/pom.xml index 64b09b4582..3a20427966 100644 --- a/pom.xml +++ b/pom.xml @@ -128,8 +128,8 @@ org.apache.hive core - 2.3.7 - 2.3.7 + 2.3.8 + 2.3.8 2.3 @@ -1891,6 +1891,22 @@ org.apache.logging.log4j * + + net.hydromatic + eigenbase-properties + + + org.codehaus.janino + commons-compiler + + + org.codehaus.janino + janino + + + org.pentaho + pentaho-aggdesigner-algorithm + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index d46db8f995..d738d3c13a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3724,20 +3724,21 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark test("SPARK-33084: Add jar support Ivy URI in SQL") { val sc = spark.sparkContext + val hiveVersion = "2.3.8" // default transitive=false, only download specified jar - sql("ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:2.3.7") + sql(s"ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:$hiveVersion") assert(sc.listJars() - .exists(_.contains("org.apache.hive.hcatalog_hive-hcatalog-core-2.3.7.jar"))) + .exists(_.contains(s"org.apache.hive.hcatalog_hive-hcatalog-core-$hiveVersion.jar"))) // test download ivy URL jar return multiple jars sql("ADD JAR ivy://org.scala-js:scalajs-test-interface_2.12:1.2.0?transitive=true") assert(sc.listJars().exists(_.contains("scalajs-library_2.12"))) assert(sc.listJars().exists(_.contains("scalajs-test-interface_2.12"))) - sql("ADD JAR ivy://org.apache.hive:hive-contrib:2.3.7" + + sql(s"ADD JAR ivy://org.apache.hive:hive-contrib:$hiveVersion" + "?exclude=org.pentaho:pentaho-aggdesigner-algorithm&transitive=true") - assert(sc.listJars().exists(_.contains("org.apache.hive_hive-contrib-2.3.7.jar"))) - assert(sc.listJars().exists(_.contains("org.apache.hive_hive-exec-2.3.7.jar"))) + assert(sc.listJars().exists(_.contains(s"org.apache.hive_hive-contrib-$hiveVersion.jar"))) + assert(sc.listJars().exists(_.contains(s"org.apache.hive_hive-exec-$hiveVersion.jar"))) assert(!sc.listJars().exists(_.contains("org.pentaho.pentaho_aggdesigner-algorithm"))) } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index bd0db743b8..d9003aa404 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -546,7 +546,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftServer2Test { conf += resultSet.getString(1) -> resultSet.getString(2) } - assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7")) + assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.8")) } } @@ -559,7 +559,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftServer2Test { conf += resultSet.getString(1) -> resultSet.getString(2) } - assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7")) + assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.8")) } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index ac199f7496..3ec738c989 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -60,7 +60,7 @@ private[spark] object HiveUtils extends Logging { val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version") .doc("Version of the Hive metastore. Available options are " + - "0.12.0 through 2.3.7 and " + + "0.12.0 through 2.3.8 and " + "3.0.0 through 3.1.2.") .version("1.4.0") .stringConf diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 4e5e58dc08..58ca476e6a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -98,8 +98,8 @@ private[hive] object IsolatedClientLoader extends Logging { case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0 case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1 case "2.2" | "2.2.0" => hive.v2_2 - case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" | "2.3.7" => - hive.v2_3 + case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" | "2.3.7" | + "2.3.8" => hive.v2_3 case "3.0" | "3.0.0" => hive.v3_0 case "3.1" | "3.1.0" | "3.1.1" | "3.1.2" => hive.v3_1 case version => diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index 27ba3eca81..bb78944fb1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -100,11 +100,13 @@ package object client { "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) - // Since HIVE-14496, Hive materialized view need calcite-core. + // Since HIVE-23980, calcite-core included in Hive package jar. // For spark, only VersionsSuite currently creates a hive materialized view for testing. - case object v2_3 extends HiveVersion("2.3.7", - exclusions = Seq("org.apache.calcite:calcite-druid", + case object v2_3 extends HiveVersion("2.3.8", + exclusions = Seq("org.apache.calcite:calcite-core", + "org.apache.calcite:calcite-druid", "org.apache.calcite.avatica:avatica", + "com.fasterxml.jackson.core:*", "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) @@ -114,7 +116,6 @@ package object client { extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0", "org.apache.derby:derby:10.14.1.0"), exclusions = Seq("org.apache.calcite:calcite-druid", - "org.apache.calcite.avatica:avatica", "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) @@ -124,7 +125,6 @@ package object client { extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0", "org.apache.derby:derby:10.14.1.0"), exclusions = Seq("org.apache.calcite:calcite-druid", - "org.apache.calcite.avatica:avatica", "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 37287fc394..a78385ff38 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -60,7 +60,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { .map(new File(_)).getOrElse(Utils.createTempDir(namePrefix = "test-spark")) private val unusedJar = TestUtils.createJarWithClasses(Seq.empty) val hiveVersion = if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) { - "2.3.7" + "2.3.8" } else { "1.2.1" } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index dc2ff26a8a..d4bcba4128 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.expressions.Cast import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.logical.Project import org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec +import org.apache.spark.sql.hive.HiveUtils.{builtinHiveVersion => hiveVersion} import org.apache.spark.sql.hive.test.{HiveTestJars, TestHive} import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.internal.SQLConf @@ -1223,17 +1224,17 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd test("SPARK-33084: Add jar support Ivy URI in SQL") { val testData = TestHive.getHiveFile("data/files/sample.json").toURI withTable("t") { - sql("ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:2.3.7") + sql(s"ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:$hiveVersion") sql( """CREATE TABLE t(a string, b string) |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'""".stripMargin) sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE t""") sql("SELECT * FROM src JOIN t on src.key = t.a") assert(sql("LIST JARS").filter(_.getString(0).contains( - "org.apache.hive.hcatalog_hive-hcatalog-core-2.3.7.jar")).count() > 0) + s"org.apache.hive.hcatalog_hive-hcatalog-core-$hiveVersion.jar")).count() > 0) assert(sql("LIST JAR"). filter(_.getString(0).contains( - "org.apache.hive.hcatalog_hive-hcatalog-core-2.3.7.jar")).count() > 0) + s"org.apache.hive.hcatalog_hive-hcatalog-core-$hiveVersion.jar")).count() > 0) } } }