[SPARK-5941] [SQL] Unit Test loads the table src
twice for leftsemijoin.q
In `leftsemijoin.q`, there is a data loading command for table `sales` already, but in `TestHive`, it also created the table `sales`, which causes duplicated records inserted into the `sales`. Author: Cheng Hao <hao.cheng@intel.com> Closes #4506 from chenghao-intel/df_table and squashes the following commits: 0be05f7 [Cheng Hao] Remove the table `sales` creating from TestHive
This commit is contained in:
parent
e63a86abe2
commit
c5602bdc31
|
@ -42,7 +42,7 @@ class InMemoryColumnarQuerySuite extends QueryTest {
|
|||
.toDF().registerTempTable("sizeTst")
|
||||
cacheTable("sizeTst")
|
||||
assert(
|
||||
table("sizeTst").queryExecution.logical.statistics.sizeInBytes >
|
||||
table("sizeTst").queryExecution.analyzed.statistics.sizeInBytes >
|
||||
conf.autoBroadcastJoinThreshold)
|
||||
}
|
||||
|
||||
|
|
|
@ -262,12 +262,6 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
|
|||
|WITH SERDEPROPERTIES ('field.delim'='\\t')
|
||||
""".stripMargin.cmd,
|
||||
"INSERT OVERWRITE TABLE serdeins SELECT * FROM src".cmd),
|
||||
TestTable("sales",
|
||||
s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT)
|
||||
|ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}'
|
||||
|WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)")
|
||||
""".stripMargin.cmd,
|
||||
s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales".cmd),
|
||||
TestTable("episodes",
|
||||
s"""CREATE TABLE episodes (title STRING, air_date STRING, doctor INT)
|
||||
|ROW FORMAT SERDE '${classOf[AvroSerDe].getCanonicalName}'
|
||||
|
|
|
@ -1,4 +1,2 @@
|
|||
Hank 2
|
||||
Hank 2
|
||||
Joe 2
|
||||
Joe 2
|
||||
|
|
|
@ -1,4 +1,2 @@
|
|||
Hank 2
|
||||
Hank 2
|
||||
Joe 2
|
||||
Joe 2
|
||||
|
|
|
@ -25,18 +25,25 @@ import org.apache.spark.sql.hive.test.TestHive
|
|||
* A set of tests that validates support for Hive SerDe.
|
||||
*/
|
||||
class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll {
|
||||
|
||||
override def beforeAll(): Unit = {
|
||||
TestHive.cacheTables = false
|
||||
import TestHive._
|
||||
import org.apache.hadoop.hive.serde2.RegexSerDe
|
||||
super.beforeAll()
|
||||
TestHive.cacheTables = false
|
||||
sql(s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT)
|
||||
|ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}'
|
||||
|WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)")
|
||||
""".stripMargin)
|
||||
sql(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales")
|
||||
}
|
||||
|
||||
// table sales is not a cache table, and will be clear after reset
|
||||
createQueryTest("Read with RegexSerDe", "SELECT * FROM sales", false)
|
||||
|
||||
createQueryTest(
|
||||
"Read and write with LazySimpleSerDe (tab separated)",
|
||||
"SELECT * from serdeins")
|
||||
|
||||
createQueryTest("Read with RegexSerDe", "SELECT * FROM sales")
|
||||
|
||||
createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes")
|
||||
|
||||
createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part")
|
||||
|
|
Loading…
Reference in a new issue