[SPARK-5941] [SQL] Unit Test loads the table src twice for leftsemijoin.q

In `leftsemijoin.q`, there is a data loading command for table `sales` already, but in `TestHive`, it also created the table `sales`, which causes duplicated records inserted into the `sales`.

Author: Cheng Hao <hao.cheng@intel.com>

Closes #4506 from chenghao-intel/df_table and squashes the following commits:

0be05f7 [Cheng Hao] Remove the table `sales` creating from TestHive
This commit is contained in:
Cheng Hao 2015-04-13 16:02:18 -07:00 committed by Michael Armbrust
parent e63a86abe2
commit c5602bdc31
5 changed files with 12 additions and 15 deletions

View file

@ -42,7 +42,7 @@ class InMemoryColumnarQuerySuite extends QueryTest {
.toDF().registerTempTable("sizeTst")
cacheTable("sizeTst")
assert(
table("sizeTst").queryExecution.logical.statistics.sizeInBytes >
table("sizeTst").queryExecution.analyzed.statistics.sizeInBytes >
conf.autoBroadcastJoinThreshold)
}

View file

@ -262,12 +262,6 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
|WITH SERDEPROPERTIES ('field.delim'='\\t')
""".stripMargin.cmd,
"INSERT OVERWRITE TABLE serdeins SELECT * FROM src".cmd),
TestTable("sales",
s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT)
|ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}'
|WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)")
""".stripMargin.cmd,
s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales".cmd),
TestTable("episodes",
s"""CREATE TABLE episodes (title STRING, air_date STRING, doctor INT)
|ROW FORMAT SERDE '${classOf[AvroSerDe].getCanonicalName}'

View file

@ -1,4 +1,2 @@
Hank 2
Hank 2
Joe 2
Joe 2

View file

@ -1,4 +1,2 @@
Hank 2
Hank 2
Joe 2
Joe 2

View file

@ -25,18 +25,25 @@ import org.apache.spark.sql.hive.test.TestHive
* A set of tests that validates support for Hive SerDe.
*/
class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll {
override def beforeAll(): Unit = {
import TestHive._
import org.apache.hadoop.hive.serde2.RegexSerDe
super.beforeAll()
TestHive.cacheTables = false
super.beforeAll()
sql(s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT)
|ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}'
|WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)")
""".stripMargin)
sql(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales")
}
// table sales is not a cache table, and will be clear after reset
createQueryTest("Read with RegexSerDe", "SELECT * FROM sales", false)
createQueryTest(
"Read and write with LazySimpleSerDe (tab separated)",
"SELECT * from serdeins")
createQueryTest("Read with RegexSerDe", "SELECT * FROM sales")
createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes")
createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part")