[SPARK-14499][SQL][TEST] Drop Partition Does Not Delete Data of External Tables

#### What changes were proposed in this pull request?
This PR is to add a test to ensure drop partitions of an external table will not delete data.

cc yhuai andrewor14

#### How was this patch tested?
N/A

Author: gatorsmile <gatorsmile@gmail.com>

This patch had conflicts when merged, resolved by
Committer: Andrew Or <andrew@databricks.com>

Closes #12350 from gatorsmile/testDropPartition.
This commit is contained in:
gatorsmile 2016-04-14 11:03:19 -07:00 committed by Andrew Or
parent 1d04c86fc5
commit c971aee40d

View file

@ -17,6 +17,8 @@
package org.apache.spark.sql.hive.execution
import java.io.File
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.{AnalysisException, QueryTest, SaveMode}
@ -126,6 +128,71 @@ class HiveDDLSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
}
}
test("add/drop partitions - external table") {
val catalog = hiveContext.sessionState.catalog
withTempDir { tmpDir =>
val basePath = tmpDir.getCanonicalPath
val partitionPath_1stCol_part1 = new File(basePath + "/ds=2008-04-08")
val partitionPath_1stCol_part2 = new File(basePath + "/ds=2008-04-09")
val partitionPath_part1 = new File(basePath + "/ds=2008-04-08/hr=11")
val partitionPath_part2 = new File(basePath + "/ds=2008-04-09/hr=11")
val partitionPath_part3 = new File(basePath + "/ds=2008-04-08/hr=12")
val partitionPath_part4 = new File(basePath + "/ds=2008-04-09/hr=12")
val dirSet =
tmpDir :: partitionPath_1stCol_part1 :: partitionPath_1stCol_part2 ::
partitionPath_part1 :: partitionPath_part2 :: partitionPath_part3 ::
partitionPath_part4 :: Nil
val externalTab = "extTable_with_partitions"
withTable(externalTab) {
assert(tmpDir.listFiles.isEmpty)
sql(
s"""
|CREATE EXTERNAL TABLE $externalTab (key INT, value STRING)
|PARTITIONED BY (ds STRING, hr STRING)
|LOCATION '$basePath'
""".stripMargin)
// Before data insertion, all the directory are empty
assert(dirSet.forall(dir => dir.listFiles == null || dir.listFiles.isEmpty))
for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) {
sql(
s"""
|INSERT OVERWRITE TABLE $externalTab
|partition (ds='$ds',hr='$hr')
|SELECT 1, 'a'
""".stripMargin)
}
val hiveTable = catalog.getTableMetadata(TableIdentifier(externalTab, Some("default")))
assert(hiveTable.tableType == CatalogTableType.EXTERNAL_TABLE)
// After data insertion, all the directory are not empty
assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
sql(
s"""
|ALTER TABLE $externalTab DROP PARTITION (ds='2008-04-08'),
|PARTITION (ds='2008-04-09', hr='12')
""".stripMargin)
assert(catalog.listPartitions(TableIdentifier(externalTab)).map(_.spec).toSet ==
Set(Map("ds" -> "2008-04-09", "hr" -> "11")))
// drop partition will not delete the data of external table
assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
sql(s"ALTER TABLE $externalTab ADD PARTITION (ds='2008-04-08', hr='12')")
assert(catalog.listPartitions(TableIdentifier(externalTab)).map(_.spec).toSet ==
Set(Map("ds" -> "2008-04-08", "hr" -> "12"), Map("ds" -> "2008-04-09", "hr" -> "11")))
// add partition will not delete the data
assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
sql(s"DROP TABLE $externalTab")
// drop table will not delete the data of external table
assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
}
}
}
test("drop views") {
withTable("tab1") {
val tabName = "tab1"