[SPARK-4908][SQL] Prevent multiple concurrent hive native commands

This is just a quick fix that locks when calling `runHive`.  If we can find a way to avoid the error without a global lock that would be better.

Author: Michael Armbrust <michael@databricks.com>

Closes #3834 from marmbrus/hiveConcurrency and squashes the following commits:

bf25300 [Michael Armbrust] prevent multiple concurrent hive native commands
This commit is contained in:
Michael Armbrust 2014-12-30 11:24:46 -08:00
parent efa80a531e
commit 480bd1d2ed
2 changed files with 8 additions and 1 deletions

View file

@ -284,7 +284,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
* Execute the command using Hive and return the results as a sequence. Each element
* in the sequence is one row.
*/
protected def runHive(cmd: String, maxRows: Int = 1000): Seq[String] = {
protected def runHive(cmd: String, maxRows: Int = 1000): Seq[String] = synchronized {
try {
val cmd_trimmed: String = cmd.trim()
val tokens: Array[String] = cmd_trimmed.split("\\s+")

View file

@ -56,6 +56,13 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
Locale.setDefault(originalLocale)
}
test("SPARK-4908: concurent hive native commands") {
(1 to 100).par.map { _ =>
sql("USE default")
sql("SHOW TABLES")
}
}
createQueryTest("constant object inspector for generic udf",
"""SELECT named_struct(
lower("AA"), "10",