[SPARK-14775][SQL] Remove TestHiveSparkSession.rewritePaths

## What changes were proposed in this pull request?
The path rewrite in TestHiveSparkSession is pretty hacky. I think we can remove those complexity and just do a string replacement when we read the query files in. This would remove the overloading of runNativeSql in TestHive, which will simplify the removal of Hive specific variable substitution.

## How was this patch tested?
This is a small test refactoring to simplify test infrastructure.

Author: Reynold Xin <rxin@databricks.com>

Closes #12543 from rxin/SPARK-14775.
This commit is contained in:
Reynold Xin 2016-04-20 17:56:31 -07:00
parent f47dbf27fa
commit 24f338ba7b
4 changed files with 17 additions and 22 deletions

View file

@ -182,19 +182,6 @@ private[hive] class TestHiveSparkSession(
Option(System.getenv(envVar)).map(new File(_))
}
/**
* Replaces relative paths to the parent directory "../" with hiveDevHome since this is how the
* hive test cases assume the system is set up.
*/
private[hive] def rewritePaths(cmd: String): String =
if (cmd.toUpperCase contains "LOAD DATA") {
val testDataLocation =
hiveDevHome.map(_.getCanonicalPath).getOrElse(inRepoTests.getCanonicalPath)
cmd.replaceAll("\\.\\./\\.\\./", testDataLocation + "/")
} else {
cmd
}
val hiveFilesTemp = File.createTempFile("catalystHiveFiles", "")
hiveFilesTemp.delete()
hiveFilesTemp.mkdir()
@ -566,11 +553,6 @@ private[hive] class TestHiveSessionState(sparkSession: TestHiveSparkSession)
override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = {
new TestHiveQueryExecution(sparkSession, plan)
}
// Override so we can intercept relative paths and rewrite them to point at hive.
override def runNativeSql(sql: String): Seq[String] = {
super.runNativeSql(sparkSession.rewritePaths(substitutor.substitute(hiveconf, sql)))
}
}

View file

@ -0,0 +1 @@
This file is here so we can match on it and find the path to the current folder.

View file

@ -47,6 +47,17 @@ import org.apache.spark.sql.hive.test.{TestHive, TestHiveQueryExecution}
abstract class HiveComparisonTest
extends SparkFunSuite with BeforeAndAfterAll with GivenWhenThen {
/**
* Path to the test datasets. We find this by looking up "hive-test-path-helper.txt" file.
*
* Before we run the query in Spark, we replace "../../data" with this path.
*/
private val testDataPath: String = {
Thread.currentThread.getContextClassLoader
.getResource("hive-test-path-helper.txt")
.getPath.replace("/hive-test-path-helper.txt", "/data")
}
/**
* When set, any cache files that result in test failures will be deleted. Used when the test
* harness or hive have been updated thus requiring new golden answers to be computed for some
@ -386,7 +397,8 @@ abstract class HiveComparisonTest
var query: TestHiveQueryExecution = null
try {
query = {
val originalQuery = new TestHiveQueryExecution(queryString)
val originalQuery = new TestHiveQueryExecution(
queryString.replace("../../data", testDataPath))
val containsCommands = originalQuery.analyzed.collectFirst {
case _: Command => ()
case _: LogicalInsertIntoHiveTable => ()

View file

@ -40,14 +40,14 @@ abstract class HiveQueryFileTest extends HiveComparisonTest {
def testCases: Seq[(String, File)]
val runAll =
val runAll: Boolean =
!(System.getProperty("spark.hive.alltests") == null) ||
runOnlyDirectories.nonEmpty ||
skipDirectories.nonEmpty
val whiteListProperty = "spark.hive.whitelist"
val whiteListProperty: String = "spark.hive.whitelist"
// Allow the whiteList to be overridden by a system property
val realWhiteList =
val realWhiteList: Seq[String] =
Option(System.getProperty(whiteListProperty)).map(_.split(",").toSeq).getOrElse(whiteList)
// Go through all the test cases and add them to scala test.