[SPARK-16904][SQL] Removal of Hive Built-in Hash Functions and TestHiveFunctionRegistry
### What changes were proposed in this pull request? Currently, the Hive built-in `hash` function is not being used in Spark since Spark 2.0. The public interface does not allow users to unregister the Spark built-in functions. Thus, users will never use Hive's built-in `hash` function. The only exception here is `TestHiveFunctionRegistry`, which allows users to unregister the built-in functions. Thus, we can load Hive's hash function in the test cases. If we disable it, 10+ test cases will fail because the results are different from the Hive golden answer files. This PR is to remove `hash` from the list of `hiveFunctions` in `HiveSessionCatalog`. It will also remove `TestHiveFunctionRegistry`. This removal makes us easier to remove `TestHiveSessionState` in the future. ### How was this patch tested? N/A Author: gatorsmile <gatorsmile@gmail.com> Closes #14498 from gatorsmile/removeHash.
This commit is contained in:
parent
9db06c442c
commit
57626a5570
|
@ -57,8 +57,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
|
|||
TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, 5)
|
||||
// Enable in-memory partition pruning for testing purposes
|
||||
TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true)
|
||||
// Use Hive hash expression instead of the native one
|
||||
TestHive.sessionState.functionRegistry.unregisterFunction("hash")
|
||||
// Ensures that the plans generation use metastore relation and not OrcRelation
|
||||
// Was done because SqlBuilder does not work with plans having logical relation
|
||||
TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, false)
|
||||
|
@ -76,7 +74,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
|
|||
TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning)
|
||||
TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, originalConvertMetastoreOrc)
|
||||
TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled)
|
||||
TestHive.sessionState.functionRegistry.restore()
|
||||
|
||||
// For debugging dump some statistics about how much time was spent in various optimizer rules
|
||||
logWarning(RuleExecutor.dumpTimeSpent())
|
||||
|
@ -581,7 +578,26 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
|
|||
"auto_join6",
|
||||
"auto_join7",
|
||||
"auto_join8",
|
||||
"auto_join9"
|
||||
"auto_join9",
|
||||
|
||||
// These tests are based on the Hive's hash function, which is different from Spark
|
||||
"auto_join19",
|
||||
"auto_join22",
|
||||
"auto_join25",
|
||||
"auto_join26",
|
||||
"auto_join27",
|
||||
"auto_join28",
|
||||
"auto_join30",
|
||||
"auto_join31",
|
||||
"auto_join_nulls",
|
||||
"auto_join_reordering_values",
|
||||
"correlationoptimizer1",
|
||||
"correlationoptimizer2",
|
||||
"correlationoptimizer3",
|
||||
"correlationoptimizer4",
|
||||
"multiMapJoin1",
|
||||
"orc_dictionary_threshold",
|
||||
"udf_hash"
|
||||
)
|
||||
|
||||
/**
|
||||
|
@ -601,16 +617,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
|
|||
"annotate_stats_part",
|
||||
"annotate_stats_table",
|
||||
"annotate_stats_union",
|
||||
"auto_join19",
|
||||
"auto_join22",
|
||||
"auto_join25",
|
||||
"auto_join26",
|
||||
"auto_join27",
|
||||
"auto_join28",
|
||||
"auto_join30",
|
||||
"auto_join31",
|
||||
"auto_join_nulls",
|
||||
"auto_join_reordering_values",
|
||||
"binary_constant",
|
||||
"binarysortable_1",
|
||||
"cast1",
|
||||
|
@ -623,15 +629,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
|
|||
"compute_stats_long",
|
||||
"compute_stats_string",
|
||||
"convert_enum_to_string",
|
||||
"correlationoptimizer1",
|
||||
"correlationoptimizer10",
|
||||
"correlationoptimizer11",
|
||||
"correlationoptimizer13",
|
||||
"correlationoptimizer14",
|
||||
"correlationoptimizer15",
|
||||
"correlationoptimizer2",
|
||||
"correlationoptimizer3",
|
||||
"correlationoptimizer4",
|
||||
"correlationoptimizer6",
|
||||
"correlationoptimizer7",
|
||||
"correlationoptimizer8",
|
||||
|
@ -871,7 +873,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
|
|||
"merge2",
|
||||
"merge4",
|
||||
"mergejoins",
|
||||
"multiMapJoin1",
|
||||
"multiMapJoin2",
|
||||
"multi_insert_gby",
|
||||
"multi_insert_gby3",
|
||||
|
@ -893,7 +894,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
|
|||
"nullinput2",
|
||||
"nullscript",
|
||||
"optional_outer",
|
||||
"orc_dictionary_threshold",
|
||||
"order",
|
||||
"order2",
|
||||
"outer_join_ppr",
|
||||
|
@ -1026,7 +1026,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
|
|||
"udf_from_unixtime",
|
||||
"udf_greaterthan",
|
||||
"udf_greaterthanorequal",
|
||||
"udf_hash",
|
||||
"udf_hex",
|
||||
"udf_if",
|
||||
"udf_index",
|
||||
|
|
|
@ -233,7 +233,6 @@ private[sql] class HiveSessionCatalog(
|
|||
// in_file, index, matchpath, ngrams, noop, noopstreaming, noopwithmap,
|
||||
// noopwithmapstreaming, parse_url_tuple, reflect2, windowingtablefunction.
|
||||
private val hiveFunctions = Seq(
|
||||
"hash",
|
||||
"histogram_numeric",
|
||||
"percentile"
|
||||
)
|
||||
|
|
|
@ -492,24 +492,6 @@ private[hive] class TestHiveQueryExecution(
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
private[hive] class TestHiveFunctionRegistry extends SimpleFunctionRegistry {
|
||||
|
||||
private val removedFunctions =
|
||||
collection.mutable.ArrayBuffer.empty[(String, (ExpressionInfo, FunctionBuilder))]
|
||||
|
||||
def unregisterFunction(name: String): Unit = synchronized {
|
||||
functionBuilders.remove(name).foreach(f => removedFunctions += name -> f)
|
||||
}
|
||||
|
||||
def restore(): Unit = synchronized {
|
||||
removedFunctions.foreach {
|
||||
case (name, (info, builder)) => registerFunction(name, info, builder)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private[hive] class TestHiveSessionState(
|
||||
sparkSession: TestHiveSparkSession)
|
||||
extends HiveSessionState(sparkSession) { self =>
|
||||
|
@ -525,16 +507,6 @@ private[hive] class TestHiveSessionState(
|
|||
}
|
||||
}
|
||||
|
||||
override lazy val functionRegistry: TestHiveFunctionRegistry = {
|
||||
// We use TestHiveFunctionRegistry at here to track functions that have been explicitly
|
||||
// unregistered (through TestHiveFunctionRegistry.unregisterFunction method).
|
||||
val fr = new TestHiveFunctionRegistry
|
||||
org.apache.spark.sql.catalyst.analysis.FunctionRegistry.expressions.foreach {
|
||||
case (name, (info, builder)) => fr.registerFunction(name, info, builder)
|
||||
}
|
||||
fr
|
||||
}
|
||||
|
||||
override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = {
|
||||
new TestHiveQueryExecution(sparkSession, plan)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue