[SPARK-14775][SQL] Remove TestHiveSparkSession.rewritePaths
## What changes were proposed in this pull request? The path rewrite in TestHiveSparkSession is pretty hacky. I think we can remove those complexity and just do a string replacement when we read the query files in. This would remove the overloading of runNativeSql in TestHive, which will simplify the removal of Hive specific variable substitution. ## How was this patch tested? This is a small test refactoring to simplify test infrastructure. Author: Reynold Xin <rxin@databricks.com> Closes #12543 from rxin/SPARK-14775.
This commit is contained in:
parent
f47dbf27fa
commit
24f338ba7b
|
@ -182,19 +182,6 @@ private[hive] class TestHiveSparkSession(
|
||||||
Option(System.getenv(envVar)).map(new File(_))
|
Option(System.getenv(envVar)).map(new File(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Replaces relative paths to the parent directory "../" with hiveDevHome since this is how the
|
|
||||||
* hive test cases assume the system is set up.
|
|
||||||
*/
|
|
||||||
private[hive] def rewritePaths(cmd: String): String =
|
|
||||||
if (cmd.toUpperCase contains "LOAD DATA") {
|
|
||||||
val testDataLocation =
|
|
||||||
hiveDevHome.map(_.getCanonicalPath).getOrElse(inRepoTests.getCanonicalPath)
|
|
||||||
cmd.replaceAll("\\.\\./\\.\\./", testDataLocation + "/")
|
|
||||||
} else {
|
|
||||||
cmd
|
|
||||||
}
|
|
||||||
|
|
||||||
val hiveFilesTemp = File.createTempFile("catalystHiveFiles", "")
|
val hiveFilesTemp = File.createTempFile("catalystHiveFiles", "")
|
||||||
hiveFilesTemp.delete()
|
hiveFilesTemp.delete()
|
||||||
hiveFilesTemp.mkdir()
|
hiveFilesTemp.mkdir()
|
||||||
|
@ -566,11 +553,6 @@ private[hive] class TestHiveSessionState(sparkSession: TestHiveSparkSession)
|
||||||
override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = {
|
override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = {
|
||||||
new TestHiveQueryExecution(sparkSession, plan)
|
new TestHiveQueryExecution(sparkSession, plan)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Override so we can intercept relative paths and rewrite them to point at hive.
|
|
||||||
override def runNativeSql(sql: String): Seq[String] = {
|
|
||||||
super.runNativeSql(sparkSession.rewritePaths(substitutor.substitute(hiveconf, sql)))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
1
sql/hive/src/test/resources/hive-test-path-helper.txt
Normal file
1
sql/hive/src/test/resources/hive-test-path-helper.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
This file is here so we can match on it and find the path to the current folder.
|
|
@ -47,6 +47,17 @@ import org.apache.spark.sql.hive.test.{TestHive, TestHiveQueryExecution}
|
||||||
abstract class HiveComparisonTest
|
abstract class HiveComparisonTest
|
||||||
extends SparkFunSuite with BeforeAndAfterAll with GivenWhenThen {
|
extends SparkFunSuite with BeforeAndAfterAll with GivenWhenThen {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Path to the test datasets. We find this by looking up "hive-test-path-helper.txt" file.
|
||||||
|
*
|
||||||
|
* Before we run the query in Spark, we replace "../../data" with this path.
|
||||||
|
*/
|
||||||
|
private val testDataPath: String = {
|
||||||
|
Thread.currentThread.getContextClassLoader
|
||||||
|
.getResource("hive-test-path-helper.txt")
|
||||||
|
.getPath.replace("/hive-test-path-helper.txt", "/data")
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When set, any cache files that result in test failures will be deleted. Used when the test
|
* When set, any cache files that result in test failures will be deleted. Used when the test
|
||||||
* harness or hive have been updated thus requiring new golden answers to be computed for some
|
* harness or hive have been updated thus requiring new golden answers to be computed for some
|
||||||
|
@ -386,7 +397,8 @@ abstract class HiveComparisonTest
|
||||||
var query: TestHiveQueryExecution = null
|
var query: TestHiveQueryExecution = null
|
||||||
try {
|
try {
|
||||||
query = {
|
query = {
|
||||||
val originalQuery = new TestHiveQueryExecution(queryString)
|
val originalQuery = new TestHiveQueryExecution(
|
||||||
|
queryString.replace("../../data", testDataPath))
|
||||||
val containsCommands = originalQuery.analyzed.collectFirst {
|
val containsCommands = originalQuery.analyzed.collectFirst {
|
||||||
case _: Command => ()
|
case _: Command => ()
|
||||||
case _: LogicalInsertIntoHiveTable => ()
|
case _: LogicalInsertIntoHiveTable => ()
|
||||||
|
|
|
@ -40,14 +40,14 @@ abstract class HiveQueryFileTest extends HiveComparisonTest {
|
||||||
|
|
||||||
def testCases: Seq[(String, File)]
|
def testCases: Seq[(String, File)]
|
||||||
|
|
||||||
val runAll =
|
val runAll: Boolean =
|
||||||
!(System.getProperty("spark.hive.alltests") == null) ||
|
!(System.getProperty("spark.hive.alltests") == null) ||
|
||||||
runOnlyDirectories.nonEmpty ||
|
runOnlyDirectories.nonEmpty ||
|
||||||
skipDirectories.nonEmpty
|
skipDirectories.nonEmpty
|
||||||
|
|
||||||
val whiteListProperty = "spark.hive.whitelist"
|
val whiteListProperty: String = "spark.hive.whitelist"
|
||||||
// Allow the whiteList to be overridden by a system property
|
// Allow the whiteList to be overridden by a system property
|
||||||
val realWhiteList =
|
val realWhiteList: Seq[String] =
|
||||||
Option(System.getProperty(whiteListProperty)).map(_.split(",").toSeq).getOrElse(whiteList)
|
Option(System.getProperty(whiteListProperty)).map(_.split(",").toSeq).getOrElse(whiteList)
|
||||||
|
|
||||||
// Go through all the test cases and add them to scala test.
|
// Go through all the test cases and add them to scala test.
|
||||||
|
|
Loading…
Reference in a new issue