Whitelist Hive Tests

This is ready when Jenkins is.

Author: Michael Armbrust <michael@databricks.com>

Closes #596 from marmbrus/moreTests and squashes the following commits:

85be703 [Michael Armbrust] Blacklist MR required tests.
35bc311 [Michael Armbrust] Add hive golden answers.
ede98fd [Michael Armbrust] More hive gitignore
da096ea [Michael Armbrust] update whitelist
This commit is contained in:
Michael Armbrust 2014-05-03 23:13:51 -07:00 committed by Patrick Wendell
parent b295714708
commit 92b2902ca0
87 changed files with 328 additions and 4 deletions

1
.gitignore vendored
View file

@ -54,3 +54,4 @@ scalastyle.txt
metastore_db/ metastore_db/
metastore/ metastore/
warehouse/ warehouse/
TempStatsStore/

View file

@ -0,0 +1,4 @@
key int None
value string None
Detailed Table Information Table(tableName:dest1, dbName:default, owner:marmbrus, createTime:1398823397, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6323689881248298063/dest1, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1398823397}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)

View file

@ -0,0 +1,10 @@
0 val_0
4 val_4
8 val_8
0 val_0
0 val_0
5 val_5
5 val_5
2 val_2
5 val_5
9 val_9

View file

@ -0,0 +1,4 @@
key int None
value string None
Detailed Table Information Table(tableName:dest1, dbName:default, owner:marmbrus, createTime:1398823407, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6323689881248298063/dest1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1398823407}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)

View file

@ -0,0 +1,10 @@
0 val_0
4 val_4
8 val_8
0 val_0
0 val_0
5 val_5
5 val_5
2 val_2
5 val_5
9 val_9

View file

@ -0,0 +1,2 @@
key int None
value string None

View file

@ -0,0 +1,10 @@
key int None
value string None
ds string None
country string None
# Partition Information
# col_name data_type comment
ds string None
country string None

View file

@ -0,0 +1,2 @@
key int None
value string None

View file

@ -0,0 +1,22 @@
NULL NULL 66 val_66 66 val_66
NULL NULL 98 val_98 98 val_98
NULL NULL 98 val_98 98 val_98
NULL NULL 128 128 val_128
NULL NULL 128 128 val_128
NULL NULL 128 128 val_128
NULL NULL 146 val_146 146 val_146
NULL NULL 146 val_146 146 val_146
NULL NULL 150 val_150 150 val_150
NULL NULL 213 val_213 213 val_213
NULL NULL 213 val_213 213 val_213
NULL NULL 224 224 val_224
NULL NULL 224 224 val_224
NULL NULL 238 val_238 238 val_238
NULL NULL 238 val_238 238 val_238
NULL NULL 255 val_255 255 val_255
NULL NULL 255 val_255 255 val_255
NULL NULL 273 val_273 273 val_273
NULL NULL 273 val_273 273 val_273
NULL NULL 273 val_273 273 val_273
NULL NULL 278 val_278 278 val_278
NULL NULL 278 val_278 278 val_278

View file

@ -0,0 +1,22 @@
NULL NULL 66 val_66 66 val_66
NULL NULL 98 val_98 98 val_98
NULL NULL 98 val_98 98 val_98
NULL NULL 128 128 val_128
NULL NULL 128 128 val_128
NULL NULL 128 128 val_128
NULL NULL 146 val_146 146 val_146
NULL NULL 146 val_146 146 val_146
NULL NULL 150 val_150 150 val_150
NULL NULL 213 val_213 213 val_213
NULL NULL 213 val_213 213 val_213
NULL NULL 224 224 val_224
NULL NULL 224 224 val_224
NULL NULL 238 val_238 238 val_238
NULL NULL 238 val_238 238 val_238
NULL NULL 255 val_255 255 val_255
NULL NULL 255 val_255 255 val_255
NULL NULL 273 val_273 273 val_273
NULL NULL 273 val_273 273 val_273
NULL NULL 273 val_273 273 val_273
NULL NULL 278 val_278 278 val_278
NULL NULL 278 val_278 278 val_278

View file

@ -0,0 +1,22 @@
NULL NULL 66 val_66 66 val_66
NULL NULL 98 val_98 98 val_98
NULL NULL 98 val_98 98 val_98
NULL NULL 128 128 val_128
NULL NULL 128 128 val_128
NULL NULL 128 128 val_128
NULL NULL 146 val_146 146 val_146
NULL NULL 146 val_146 146 val_146
NULL NULL 150 val_150 150 val_150
NULL NULL 213 val_213 213 val_213
NULL NULL 213 val_213 213 val_213
NULL NULL 224 224 val_224
NULL NULL 224 224 val_224
NULL NULL 238 val_238 238 val_238
NULL NULL 238 val_238 238 val_238
NULL NULL 255 val_255 255 val_255
NULL NULL 255 val_255 255 val_255
NULL NULL 273 val_273 273 val_273
NULL NULL 273 val_273 273 val_273
NULL NULL 273 val_273 273 val_273
NULL NULL 278 val_278 278 val_278
NULL NULL 278 val_278 278 val_278

View file

@ -0,0 +1,32 @@
# col_name data_type comment
key string None
value string None
# Detailed Table Information
Database: default
Owner: marmbrus
CreateTime: Tue Apr 29 20:55:07 PDT 2014
LastAccessTime: UNKNOWN
Protect Mode: None
Retention: 0
Location: file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6323689881248298063/total_ordered
Table Type: MANAGED_TABLE
Table Parameters:
numFiles 1
numPartitions 0
numRows 48
rawDataSize 512
totalSize 560
transient_lastDdlTime 1398830107
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1

View file

@ -0,0 +1,48 @@
128 val_128
128 val_128
150 val_150
150 val_150
165 val_165
165 val_165
193 val_193
193 val_193
213 val_213
213 val_213
213 val_213
213 val_213
213 val_214
213 val_214
224 val_224
224 val_224
238 val_238
238 val_238
238 val_239
238 val_239
238 val_240
238 val_240
255 val_255
255 val_255
265 val_265
265 val_265
27 val_27
27 val_27
273 val_273
273 val_273
278 val_278
278 val_278
311 val_311
311 val_311
369 val_369
369 val_369
401 val_401
401 val_401
409 val_409
409 val_409
484 val_484
484 val_484
66 val_66
66 val_66
86 val_86
86 val_86
98 val_98
98 val_98

View file

@ -0,0 +1,32 @@
# col_name data_type comment
key string None
value string None
# Detailed Table Information
Database: default
Owner: marmbrus
CreateTime: Tue Apr 29 20:54:55 PDT 2014
LastAccessTime: UNKNOWN
Protect Mode: None
Retention: 0
Location: file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6323689881248298063/total_ordered
Table Type: MANAGED_TABLE
Table Parameters:
numFiles 1
numPartitions 0
numRows 48
rawDataSize 512
totalSize 560
transient_lastDdlTime 1398830095
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1

View file

@ -0,0 +1,48 @@
128 val_128
128 val_128
150 val_150
150 val_150
165 val_165
165 val_165
193 val_193
193 val_193
213 val_213
213 val_213
213 val_213
213 val_213
213 val_214
213 val_214
224 val_224
224 val_224
238 val_238
238 val_238
238 val_239
238 val_239
238 val_240
238 val_240
255 val_255
255 val_255
265 val_265
265 val_265
27 val_27
27 val_27
273 val_273
273 val_273
278 val_278
278 val_278
311 val_311
311 val_311
369 val_369
369 val_369
401 val_401
401 val_401
409 val_409
409 val_409
484 val_484
484 val_484
66 val_66
66 val_66
86 val_86
86 val_86
98 val_98
98 val_98

View file

@ -0,0 +1 @@
NULL NULL NULL NULL NULL A AB ABC ABC A AB ABC ABC B BC BC BC C C C C C C C C B BC BC BC A AB ABC ABC

View file

@ -0,0 +1 @@
玩 玩 玩玩玩 abc 玩玩玩

View file

@ -112,6 +112,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"stats1.*", "stats1.*",
"stats20", "stats20",
"alter_merge_stats", "alter_merge_stats",
"columnstats.*",
// Hive seems to think 1.0 > NaN = true && 1.0 < NaN = false... which is wrong. // Hive seems to think 1.0 > NaN = true && 1.0 < NaN = false... which is wrong.
// http://stackoverflow.com/a/1573715 // http://stackoverflow.com/a/1573715
@ -176,6 +178,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
*/ */
override def whiteList = Seq( override def whiteList = Seq(
"add_part_exist", "add_part_exist",
"add_part_multiple",
"add_partition_no_whitelist", "add_partition_no_whitelist",
"add_partition_with_whitelist", "add_partition_with_whitelist",
"alias_casted_column", "alias_casted_column",
@ -290,6 +293,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"disable_file_format_check", "disable_file_format_check",
"drop_function", "drop_function",
"drop_index", "drop_index",
"drop_multi_partitions",
"drop_partitions_filter", "drop_partitions_filter",
"drop_partitions_filter2", "drop_partitions_filter2",
"drop_partitions_filter3", "drop_partitions_filter3",
@ -302,6 +306,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"escape_orderby1", "escape_orderby1",
"escape_sortby1", "escape_sortby1",
"fetch_aggregation", "fetch_aggregation",
"fileformat_sequencefile",
"fileformat_text",
"filter_join_breaktask", "filter_join_breaktask",
"filter_join_breaktask2", "filter_join_breaktask2",
"groupby1", "groupby1",
@ -310,6 +316,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"groupby1_map_nomap", "groupby1_map_nomap",
"groupby1_map_skew", "groupby1_map_skew",
"groupby1_noskew", "groupby1_noskew",
"groupby2",
"groupby2_map",
"groupby2_map_skew",
"groupby2_noskew",
"groupby4", "groupby4",
"groupby4_map", "groupby4_map",
"groupby4_map_skew", "groupby4_map_skew",
@ -333,10 +343,12 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"groupby8_noskew", "groupby8_noskew",
"groupby9", "groupby9",
"groupby_distinct_samekey", "groupby_distinct_samekey",
"groupby_map_ppr",
"groupby_multi_insert_common_distinct", "groupby_multi_insert_common_distinct",
"groupby_multi_single_reducer2", "groupby_multi_single_reducer2",
"groupby_mutli_insert_common_distinct", "groupby_mutli_insert_common_distinct",
"groupby_neg_float", "groupby_neg_float",
"groupby_ppr",
"groupby_sort_10", "groupby_sort_10",
"groupby_sort_2", "groupby_sort_2",
"groupby_sort_3", "groupby_sort_3",
@ -352,13 +364,17 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"inoutdriver", "inoutdriver",
"input", "input",
"input0", "input0",
"input1",
"input10",
"input11", "input11",
"input11_limit", "input11_limit",
"input12", "input12",
"input12_hadoop20", "input12_hadoop20",
"input14", "input14",
"input15",
"input19", "input19",
"input1_limit", "input1_limit",
"input2",
"input21", "input21",
"input22", "input22",
"input23", "input23",
@ -367,6 +383,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"input26", "input26",
"input28", "input28",
"input2_limit", "input2_limit",
"input3",
"input4",
"input40", "input40",
"input41", "input41",
"input4_cb_delim", "input4_cb_delim",
@ -374,9 +392,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"input7", "input7",
"input8", "input8",
"input9", "input9",
"inputddl4",
"inputddl7",
"inputddl8",
"input_limit", "input_limit",
"input_part0", "input_part0",
"input_part1", "input_part1",
@ -391,6 +406,13 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"input_part8", "input_part8",
"input_part9", "input_part9",
"input_testsequencefile", "input_testsequencefile",
"inputddl1",
"inputddl2",
"inputddl3",
"inputddl4",
"inputddl6",
"inputddl7",
"inputddl8",
"insert1", "insert1",
"insert2_overwrite_partitions", "insert2_overwrite_partitions",
"insert_compressed", "insert_compressed",
@ -452,7 +474,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"join_reorder4", "join_reorder4",
"join_star", "join_star",
"join_view", "join_view",
"lateral_view",
"lateral_view_cp", "lateral_view_cp",
"lateral_view_outer",
"lateral_view_ppd", "lateral_view_ppd",
"lineage1", "lineage1",
"literal_double", "literal_double",
@ -463,6 +487,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"loadpart1", "loadpart1",
"louter_join_ppr", "louter_join_ppr",
"mapjoin_distinct", "mapjoin_distinct",
"mapjoin_filter_on_outerjoin",
"mapjoin_mapjoin", "mapjoin_mapjoin",
"mapjoin_subquery", "mapjoin_subquery",
"mapjoin_subquery2", "mapjoin_subquery2",
@ -577,6 +602,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"skewjoinopt13", "skewjoinopt13",
"skewjoinopt18", "skewjoinopt18",
"skewjoinopt9", "skewjoinopt9",
"smb_mapjoin9",
"smb_mapjoin_1", "smb_mapjoin_1",
"smb_mapjoin_10", "smb_mapjoin_10",
"smb_mapjoin_13", "smb_mapjoin_13",
@ -621,8 +647,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_10_trims", "udf_10_trims",
"udf2", "udf2",
"udf6", "udf6",
"udf7",
"udf8", "udf8",
"udf9", "udf9",
"udf_E",
"udf_PI",
"udf_abs", "udf_abs",
"udf_acos", "udf_acos",
"udf_add", "udf_add",
@ -646,6 +675,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_ceil", "udf_ceil",
"udf_ceiling", "udf_ceiling",
"udf_concat", "udf_concat",
"udf_concat_insert1",
"udf_concat_insert2", "udf_concat_insert2",
"udf_concat_ws", "udf_concat_ws",
"udf_conv", "udf_conv",
@ -660,6 +690,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_div", "udf_div",
"udf_double", "udf_double",
"udf_E", "udf_E",
"udf_elt",
"udf_exp", "udf_exp",
"udf_field", "udf_field",
"udf_find_in_set", "udf_find_in_set",
@ -669,9 +700,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_from_unixtime", "udf_from_unixtime",
"udf_greaterthan", "udf_greaterthan",
"udf_greaterthanorequal", "udf_greaterthanorequal",
"udf_hash",
"udf_hex", "udf_hex",
"udf_if", "udf_if",
"udf_index", "udf_index",
"udf_instr",
"udf_int", "udf_int",
"udf_isnotnull", "udf_isnotnull",
"udf_isnull", "udf_isnull",
@ -682,6 +715,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_lessthanorequal", "udf_lessthanorequal",
"udf_like", "udf_like",
"udf_ln", "udf_ln",
"udf_locate",
"udf_log", "udf_log",
"udf_log10", "udf_log10",
"udf_log2", "udf_log2",
@ -740,9 +774,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_trim", "udf_trim",
"udf_ucase", "udf_ucase",
"udf_upper", "udf_upper",
"udf_variance",
"udf_var_pop", "udf_var_pop",
"udf_var_samp", "udf_var_samp",
"udf_variance",
"udf_weekofyear", "udf_weekofyear",
"udf_when", "udf_when",
"udf_xpath", "udf_xpath",
@ -768,6 +802,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"union22", "union22",
"union23", "union23",
"union24", "union24",
"union25",
"union26", "union26",
"union27", "union27",
"union28", "union28",