[SPARK-27831][SQL][TEST] Move Hive test jars to maven dependency

## What changes were proposed in this pull request?

This pr moves Hive test jars(`hive-contrib-0.13.1.jar`, `hive-hcatalog-core-0.13.1.jar`, `hive-contrib-2.3.5.jar` and `hive-hcatalog-core-2.3.5.jar`) to maven dependency.

## How was this patch tested?

Existing test

Please note that this pr need test with `maven` and `sbt`.

Closes #24751 from wangyum/SPARK-27831.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
Yuming Wang 2019-06-02 20:23:08 -07:00 committed by Dongjoon Hyun
parent 2a88fffacb
commit d53b61c311
14 changed files with 142 additions and 34 deletions

69
pom.xml
View file

@ -1941,6 +1941,75 @@
</exclusions>
</dependency>
<dependency>
<groupId>${hive.group}</groupId>
<artifactId>hive-contrib</artifactId>
<version>${hive.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-exec</artifactId>
</exclusion>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-serde</artifactId>
</exclusion>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-shims</artifactId>
</exclusion>
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>${hive.group}.hcatalog</groupId>
<artifactId>hive-hcatalog-core</artifactId>
<version>${hive.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-exec</artifactId>
</exclusion>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-metastore</artifactId>
</exclusion>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-cli</artifactId>
</exclusion>
<exclusion>
<groupId>${hive.group}</groupId>
<artifactId>hive-common</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>

View file

@ -47,6 +47,13 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
@ -63,6 +70,15 @@
<groupId>${hive.group}</groupId>
<artifactId>hive-beeline</artifactId>
</dependency>
<!-- Explicit listing hive-contrib and hive-hcatalog-core. Otherwise the maven test fails. -->
<dependency>
<groupId>${hive.group}</groupId>
<artifactId>hive-contrib</artifactId>
</dependency>
<dependency>
<groupId>${hive.group}.hcatalog</groupId>
<artifactId>hive-hcatalog-core</artifactId>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>

View file

@ -31,6 +31,7 @@ import org.scalatest.BeforeAndAfterAll
import org.apache.spark.SparkFunSuite
import org.apache.spark.internal.Logging
import org.apache.spark.sql.hive.test.HiveTestUtils
import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
import org.apache.spark.util.{ThreadUtils, Utils}
@ -200,10 +201,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
}
test("Commands using SerDe provided in --jars") {
val jarFile =
"../hive/src/test/resources/hive-hcatalog-core-0.13.1.jar"
.split("/")
.mkString(File.separator)
val jarFile = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath
val dataFilePath =
Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt")

View file

@ -44,6 +44,7 @@ import org.scalatest.BeforeAndAfterAll
import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.internal.Logging
import org.apache.spark.sql.hive.HiveUtils
import org.apache.spark.sql.hive.test.HiveTestUtils
import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
import org.apache.spark.util.{ThreadUtils, Utils}
@ -484,10 +485,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
withMultipleConnectionJdbcStatement("smallKV", "addJar")(
{
statement =>
val jarFile =
"../hive/src/test/resources/hive-hcatalog-core-0.13.1.jar"
.split("/")
.mkString(File.separator)
val jarFile = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath
statement.executeQuery(s"ADD JAR $jarFile")
},

View file

@ -103,6 +103,14 @@
<groupId>${hive.group}</groupId>
<artifactId>hive-metastore</artifactId>
</dependency>
<dependency>
<groupId>${hive.group}</groupId>
<artifactId>hive-contrib</artifactId>
</dependency>
<dependency>
<groupId>${hive.group}.hcatalog</groupId>
<artifactId>hive-hcatalog-core</artifactId>
</dependency>
<!--
<dependency>
<groupId>${hive.group}</groupId>

View file

@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.execution.command.DDLUtils
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext}
import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHiveContext}
import org.apache.spark.sql.types.{DecimalType, StructType}
import org.apache.spark.tags.ExtendedHiveTest
import org.apache.spark.util.{ResetSystemProperties, Utils}
@ -108,8 +108,8 @@ class HiveSparkSubmitSuite
val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
val jar2 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassB"))
val jar3 = TestHive.getHiveContribJar().getCanonicalPath
val jar4 = TestHive.getHiveHcatalogCoreJar().getCanonicalPath
val jar3 = HiveTestUtils.getHiveContribJar.getCanonicalPath
val jar4 = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath
val jarsString = Seq(jar1, jar2, jar3, jar4).map(j => j.toString).mkString(",")
val args = Seq(
"--class", SparkSubmitClassLoaderTest.getClass.getName.stripSuffix("$"),
@ -315,7 +315,7 @@ class HiveSparkSubmitSuite
"--master", "local-cluster[2,1,1024]",
"--conf", "spark.ui.enabled=false",
"--conf", "spark.master.rest.enabled=false",
"--jars", TestHive.getHiveContribJar().getCanonicalPath,
"--jars", HiveTestUtils.getHiveContribJar.getCanonicalPath,
unusedJar.toString)
runSparkSubmit(argsForCreateTable)
@ -457,7 +457,7 @@ object TemporaryHiveUDFTest extends Logging {
// Load a Hive UDF from the jar.
logInfo("Registering a temporary Hive UDF provided in a jar.")
val jar = hiveContext.getHiveContribJar().getCanonicalPath
val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath
hiveContext.sql(
s"""
|CREATE TEMPORARY FUNCTION example_max
@ -495,7 +495,7 @@ object PermanentHiveUDFTest1 extends Logging {
// Load a Hive UDF from the jar.
logInfo("Registering a permanent Hive UDF provided in a jar.")
val jar = hiveContext.getHiveContribJar().getCanonicalPath
val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath
hiveContext.sql(
s"""
|CREATE FUNCTION example_max
@ -532,7 +532,7 @@ object PermanentHiveUDFTest2 extends Logging {
val hiveContext = new TestHiveContext(sc)
// Load a Hive UDF from the jar.
logInfo("Write the metadata of a permanent Hive UDF into metastore.")
val jar = hiveContext.getHiveContribJar().getCanonicalPath
val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath
val function = CatalogFunction(
FunctionIdentifier("example_max"),
"org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax",

View file

@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.logical.Project
import org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec
import org.apache.spark.sql.hive._
import org.apache.spark.sql.hive.test.TestHive
import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHive}
import org.apache.spark.sql.hive.test.TestHive._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
@ -816,7 +816,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
test("ADD JAR command 2") {
// this is a test case from mapjoin_addjar.q
val testJar = TestHive.getHiveHcatalogCoreJar().toURI
val testJar = HiveTestUtils.getHiveHcatalogCoreJar.toURI
val testData = TestHive.getHiveFile("data/files/sample.json").toURI
sql(s"ADD JAR $testJar")
sql(
@ -826,9 +826,9 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
sql("select * from src join t1 on src.key = t1.a")
sql("DROP TABLE t1")
assert(sql("list jars").
filter(_.getString(0).contains(TestHive.HIVE_HCATALOG_CORE_JAR)).count() > 0)
filter(_.getString(0).contains(HiveTestUtils.getHiveHcatalogCoreJar.getName)).count() > 0)
assert(sql("list jar").
filter(_.getString(0).contains(TestHive.HIVE_HCATALOG_CORE_JAR)).count() > 0)
filter(_.getString(0).contains(HiveTestUtils.getHiveHcatalogCoreJar.getName)).count() > 0)
val testJar2 = TestHive.getHiveFile("TestUDTF.jar").getCanonicalPath
sql(s"ADD JAR $testJar2")
assert(sql(s"list jar $testJar").count() == 1)

View file

@ -37,7 +37,7 @@ import org.apache.spark.sql.execution.command.LoadDataCommand
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHiveSingleton}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types._
@ -1105,7 +1105,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
override def run() {
// To make sure this test works, this jar should not be loaded in another place.
sql(
s"ADD JAR ${hiveContext.getHiveContribJar().getCanonicalPath}")
s"ADD JAR ${HiveTestUtils.getHiveContribJar.getCanonicalPath}")
try {
sql(
"""

View file

@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.hive.test
import java.io.File
import org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax
import org.apache.hive.hcatalog.data.JsonSerDe
object HiveTestUtils {
val getHiveContribJar: File =
new File(classOf[UDAFExampleMax].getProtectionDomain.getCodeSource.getLocation.getPath)
val getHiveHcatalogCoreJar: File =
new File(classOf[JsonSerDe].getProtectionDomain.getCodeSource.getLocation.getPath)
}

View file

@ -123,11 +123,6 @@ class TestHiveContext(
@transient override val sparkSession: TestHiveSparkSession)
extends SQLContext(sparkSession) {
val HIVE_CONTRIB_JAR: String =
if (HiveUtils.isHive23) "hive-contrib-2.3.5.jar" else "hive-contrib-0.13.1.jar"
val HIVE_HCATALOG_CORE_JAR: String =
if (HiveUtils.isHive23) "hive-hcatalog-core-2.3.5.jar" else "hive-hcatalog-core-0.13.1.jar"
/**
* If loadTestTables is false, no test tables are loaded. Note that this flag can only be true
* when running in the JVM, i.e. it needs to be false when calling from Python.
@ -154,14 +149,6 @@ class TestHiveContext(
sparkSession.getHiveFile(path)
}
def getHiveContribJar(): File = {
sparkSession.getHiveFile(HIVE_CONTRIB_JAR)
}
def getHiveHcatalogCoreJar(): File = {
sparkSession.getHiveFile(HIVE_HCATALOG_CORE_JAR)
}
def loadTestTable(name: String): Unit = {
sparkSession.loadTestTable(name)
}