[SPARK-14499][SQL][TEST] Drop Partition Does Not Delete Data of External Tables
#### What changes were proposed in this pull request? This PR is to add a test to ensure drop partitions of an external table will not delete data. cc yhuai andrewor14 #### How was this patch tested? N/A Author: gatorsmile <gatorsmile@gmail.com> This patch had conflicts when merged, resolved by Committer: Andrew Or <andrew@databricks.com> Closes #12350 from gatorsmile/testDropPartition.
This commit is contained in:
parent
1d04c86fc5
commit
c971aee40d
|
@ -17,6 +17,8 @@
|
|||
|
||||
package org.apache.spark.sql.hive.execution
|
||||
|
||||
import java.io.File
|
||||
|
||||
import org.apache.hadoop.fs.Path
|
||||
|
||||
import org.apache.spark.sql.{AnalysisException, QueryTest, SaveMode}
|
||||
|
@ -126,6 +128,71 @@ class HiveDDLSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
|||
}
|
||||
}
|
||||
|
||||
test("add/drop partitions - external table") {
|
||||
val catalog = hiveContext.sessionState.catalog
|
||||
withTempDir { tmpDir =>
|
||||
val basePath = tmpDir.getCanonicalPath
|
||||
val partitionPath_1stCol_part1 = new File(basePath + "/ds=2008-04-08")
|
||||
val partitionPath_1stCol_part2 = new File(basePath + "/ds=2008-04-09")
|
||||
val partitionPath_part1 = new File(basePath + "/ds=2008-04-08/hr=11")
|
||||
val partitionPath_part2 = new File(basePath + "/ds=2008-04-09/hr=11")
|
||||
val partitionPath_part3 = new File(basePath + "/ds=2008-04-08/hr=12")
|
||||
val partitionPath_part4 = new File(basePath + "/ds=2008-04-09/hr=12")
|
||||
val dirSet =
|
||||
tmpDir :: partitionPath_1stCol_part1 :: partitionPath_1stCol_part2 ::
|
||||
partitionPath_part1 :: partitionPath_part2 :: partitionPath_part3 ::
|
||||
partitionPath_part4 :: Nil
|
||||
|
||||
val externalTab = "extTable_with_partitions"
|
||||
withTable(externalTab) {
|
||||
assert(tmpDir.listFiles.isEmpty)
|
||||
sql(
|
||||
s"""
|
||||
|CREATE EXTERNAL TABLE $externalTab (key INT, value STRING)
|
||||
|PARTITIONED BY (ds STRING, hr STRING)
|
||||
|LOCATION '$basePath'
|
||||
""".stripMargin)
|
||||
|
||||
// Before data insertion, all the directory are empty
|
||||
assert(dirSet.forall(dir => dir.listFiles == null || dir.listFiles.isEmpty))
|
||||
|
||||
for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) {
|
||||
sql(
|
||||
s"""
|
||||
|INSERT OVERWRITE TABLE $externalTab
|
||||
|partition (ds='$ds',hr='$hr')
|
||||
|SELECT 1, 'a'
|
||||
""".stripMargin)
|
||||
}
|
||||
|
||||
val hiveTable = catalog.getTableMetadata(TableIdentifier(externalTab, Some("default")))
|
||||
assert(hiveTable.tableType == CatalogTableType.EXTERNAL_TABLE)
|
||||
// After data insertion, all the directory are not empty
|
||||
assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
|
||||
|
||||
sql(
|
||||
s"""
|
||||
|ALTER TABLE $externalTab DROP PARTITION (ds='2008-04-08'),
|
||||
|PARTITION (ds='2008-04-09', hr='12')
|
||||
""".stripMargin)
|
||||
assert(catalog.listPartitions(TableIdentifier(externalTab)).map(_.spec).toSet ==
|
||||
Set(Map("ds" -> "2008-04-09", "hr" -> "11")))
|
||||
// drop partition will not delete the data of external table
|
||||
assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
|
||||
|
||||
sql(s"ALTER TABLE $externalTab ADD PARTITION (ds='2008-04-08', hr='12')")
|
||||
assert(catalog.listPartitions(TableIdentifier(externalTab)).map(_.spec).toSet ==
|
||||
Set(Map("ds" -> "2008-04-08", "hr" -> "12"), Map("ds" -> "2008-04-09", "hr" -> "11")))
|
||||
// add partition will not delete the data
|
||||
assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
|
||||
|
||||
sql(s"DROP TABLE $externalTab")
|
||||
// drop table will not delete the data of external table
|
||||
assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("drop views") {
|
||||
withTable("tab1") {
|
||||
val tabName = "tab1"
|
||||
|
|
Loading…
Reference in a new issue