[SPARK-20594][SQL] The staging directory should be a child directory starts with "." to avoid being deleted if we set hive.exec.stagingdir under the table directory.
JIRA Issue: https://issues.apache.org/jira/browse/SPARK-20594 ## What changes were proposed in this pull request? The staging directory should be a child directory starts with "." to avoid being deleted before moving staging directory to table directory if we set hive.exec.stagingdir under the table directory. ## How was this patch tested? Added unit tests Author: zuotingbing <zuo.tingbing9@zte.com.cn> Closes #17858 from zuotingbing/spark-stagingdir.
This commit is contained in:
parent
0d3a63193c
commit
e3d2022e4b
|
@ -17,7 +17,7 @@
|
|||
|
||||
package org.apache.spark.sql.hive.execution
|
||||
|
||||
import java.io.IOException
|
||||
import java.io.{File, IOException}
|
||||
import java.net.URI
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.{Date, Locale, Random}
|
||||
|
@ -97,12 +97,24 @@ case class InsertIntoHiveTable(
|
|||
val inputPathUri: URI = inputPath.toUri
|
||||
val inputPathName: String = inputPathUri.getPath
|
||||
val fs: FileSystem = inputPath.getFileSystem(hadoopConf)
|
||||
val stagingPathName: String =
|
||||
var stagingPathName: String =
|
||||
if (inputPathName.indexOf(stagingDir) == -1) {
|
||||
new Path(inputPathName, stagingDir).toString
|
||||
} else {
|
||||
inputPathName.substring(0, inputPathName.indexOf(stagingDir) + stagingDir.length)
|
||||
}
|
||||
|
||||
// SPARK-20594: This is a walk-around fix to resolve a Hive bug. Hive requires that the
|
||||
// staging directory needs to avoid being deleted when users set hive.exec.stagingdir
|
||||
// under the table directory.
|
||||
if (FileUtils.isSubDir(new Path(stagingPathName), inputPath, fs) &&
|
||||
!stagingPathName.stripPrefix(inputPathName).stripPrefix(File.separator).startsWith(".")) {
|
||||
logDebug(s"The staging dir '$stagingPathName' should be a child directory starts " +
|
||||
"with '.' to avoid being deleted if we set hive.exec.stagingdir under the table " +
|
||||
"directory.")
|
||||
stagingPathName = new Path(inputPathName, ".hive-staging").toString
|
||||
}
|
||||
|
||||
val dir: Path =
|
||||
fs.makeQualified(
|
||||
new Path(stagingPathName + "_" + executionId + "-" + TaskRunner.getTaskRunnerID))
|
||||
|
|
|
@ -494,4 +494,15 @@ class InsertIntoHiveTableSuite extends QueryTest with TestHiveSingleton with Bef
|
|||
spark.table("t").write.insertInto(tableName)
|
||||
}
|
||||
}
|
||||
|
||||
test("SPARK-20594: hive.exec.stagingdir was deleted by Hive") {
|
||||
// Set hive.exec.stagingdir under the table directory without start with ".".
|
||||
withSQLConf("hive.exec.stagingdir" -> "./test") {
|
||||
withTable("test_table") {
|
||||
sql("CREATE TABLE test_table (key int)")
|
||||
sql("INSERT OVERWRITE TABLE test_table SELECT 1")
|
||||
checkAnswer(sql("SELECT * FROM test_table"), Row(1))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue