[SQL] SPARK-1661 - Fix regex_serde test
The JIRA in question is actually reporting a bug with Shark, but I wanted to make sure Spark SQL did not have similar problems. This fixes a bug in our parsing code that was preventing the test from executing, but it looks like the RegexSerDe is working in Spark SQL. Author: Michael Armbrust <michael@databricks.com> Closes #595 from marmbrus/fixRegexSerdeTest and squashes the following commits: a4dc612 [Michael Armbrust] Add files created by hive to gitignore. efa6402 [Michael Armbrust] Fix Hive serde_regex test.
This commit is contained in:
parent
98b65593bd
commit
a43d9c14f2
5
.gitignore
vendored
5
.gitignore
vendored
|
@ -49,3 +49,8 @@ unit-tests.log
|
||||||
/lib/
|
/lib/
|
||||||
rat-results.txt
|
rat-results.txt
|
||||||
scalastyle.txt
|
scalastyle.txt
|
||||||
|
|
||||||
|
# For Hive
|
||||||
|
metastore_db/
|
||||||
|
metastore/
|
||||||
|
warehouse/
|
||||||
|
|
|
@ -347,7 +347,11 @@ private[hive] object HiveQl {
|
||||||
protected def nodeToPlan(node: Node): LogicalPlan = node match {
|
protected def nodeToPlan(node: Node): LogicalPlan = node match {
|
||||||
// Just fake explain for any of the native commands.
|
// Just fake explain for any of the native commands.
|
||||||
case Token("TOK_EXPLAIN", explainArgs) if nativeCommands contains explainArgs.head.getText =>
|
case Token("TOK_EXPLAIN", explainArgs) if nativeCommands contains explainArgs.head.getText =>
|
||||||
NoRelation
|
ExplainCommand(NoRelation)
|
||||||
|
// Create tables aren't native commands due to CTAS queries, but we still don't need to
|
||||||
|
// explain them.
|
||||||
|
case Token("TOK_EXPLAIN", explainArgs) if explainArgs.head.getText == "TOK_CREATETABLE" =>
|
||||||
|
ExplainCommand(NoRelation)
|
||||||
case Token("TOK_EXPLAIN", explainArgs) =>
|
case Token("TOK_EXPLAIN", explainArgs) =>
|
||||||
// Ignore FORMATTED if present.
|
// Ignore FORMATTED if present.
|
||||||
val Some(query) :: _ :: _ :: Nil =
|
val Some(query) :: _ :: _ :: Nil =
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
ABSTRACT SYNTAX TREE:
|
||||||
|
(TOK_CREATETABLE (TOK_TABNAME serde_regex) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL host TOK_STRING) (TOK_TABCOL identity TOK_STRING) (TOK_TABCOL user TOK_STRING) (TOK_TABCOL time TOK_STRING) (TOK_TABCOL request TOK_STRING) (TOK_TABCOL status TOK_STRING) (TOK_TABCOL size TOK_INT) (TOK_TABCOL referer TOK_STRING) (TOK_TABCOL agent TOK_STRING)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?"))))) TOK_TBLTEXTFILE)
|
||||||
|
|
||||||
|
STAGE DEPENDENCIES:
|
||||||
|
Stage-0 is a root stage
|
||||||
|
|
||||||
|
STAGE PLANS:
|
||||||
|
Stage: Stage-0
|
||||||
|
Create Table Operator:
|
||||||
|
Create Table
|
||||||
|
columns: host string, identity string, user string, time string, request string, status string, size int, referer string, agent string
|
||||||
|
if not exists: false
|
||||||
|
input format: org.apache.hadoop.mapred.TextInputFormat
|
||||||
|
# buckets: -1
|
||||||
|
output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
|
||||||
|
serde name: org.apache.hadoop.hive.serde2.RegexSerDe
|
||||||
|
serde properties:
|
||||||
|
input.regex ([^ ]*) ([^ ]*) ([^ ]*) (-|\[[^\]]*\]) ([^ "]*|"[^"]*") (-|[0-9]*) (-|[0-9]*)(?: ([^ "]*|"[^"]*") ([^ "]*|"[^"]*"))?
|
||||||
|
name: serde_regex
|
||||||
|
isExternal: false
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
NULL 0
|
||||||
|
NULL 0
|
||||||
|
-1234567890.123456789 -1234567890
|
||||||
|
-4400 4400
|
||||||
|
-1255.49 -1255
|
||||||
|
-1.122 -11
|
||||||
|
-1.12 -1
|
||||||
|
-1.12 -1
|
||||||
|
-0.333 0
|
||||||
|
-0.33 0
|
||||||
|
-0.3 0
|
||||||
|
0 0
|
||||||
|
0 0
|
||||||
|
0.01 0
|
||||||
|
0.02 0
|
||||||
|
0.1 0
|
||||||
|
0.2 0
|
||||||
|
0.3 0
|
||||||
|
0.33 0
|
||||||
|
0.333 0
|
||||||
|
0.9999999999999999999999999 1
|
||||||
|
1 1
|
||||||
|
1 1
|
||||||
|
1.12 1
|
||||||
|
1.122 1
|
||||||
|
2 2
|
||||||
|
2 2
|
||||||
|
3.14 3
|
||||||
|
3.14 3
|
||||||
|
3.14 3
|
||||||
|
3.14 4
|
||||||
|
10 10
|
||||||
|
20 20
|
||||||
|
100 100
|
||||||
|
124 124
|
||||||
|
125.2 125
|
||||||
|
200 200
|
||||||
|
1234567890.12345678 1234567890
|
|
@ -0,0 +1,2 @@
|
||||||
|
127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 NULL NULL
|
||||||
|
127.0.0.1 - - [26/May/2009:00:00:00 +0000] "GET /someurl/?track=Blabla(Main) HTTP/1.1" 200 5864 - "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.65 Safari/525.19"
|
|
@ -0,0 +1,2 @@
|
||||||
|
127.0.0.1 2326 200 [10/Oct/2000:13:55:36 -0700]
|
||||||
|
127.0.0.1 5864 200 [26/May/2009:00:00:00 +0000]
|
|
@ -0,0 +1,22 @@
|
||||||
|
ABSTRACT SYNTAX TREE:
|
||||||
|
(TOK_CREATETABLE (TOK_TABNAME serde_regex1) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_DECIMAL) (TOK_TABCOL value TOK_INT)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*)"))))) TOK_TBLTEXTFILE)
|
||||||
|
|
||||||
|
STAGE DEPENDENCIES:
|
||||||
|
Stage-0 is a root stage
|
||||||
|
|
||||||
|
STAGE PLANS:
|
||||||
|
Stage: Stage-0
|
||||||
|
Create Table Operator:
|
||||||
|
Create Table
|
||||||
|
columns: key decimal, value int
|
||||||
|
if not exists: false
|
||||||
|
input format: org.apache.hadoop.mapred.TextInputFormat
|
||||||
|
# buckets: -1
|
||||||
|
output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
|
||||||
|
serde name: org.apache.hadoop.hive.serde2.RegexSerDe
|
||||||
|
serde properties:
|
||||||
|
input.regex ([^ ]*) ([^ ]*)
|
||||||
|
name: serde_regex1
|
||||||
|
isExternal: false
|
||||||
|
|
||||||
|
|
|
@ -568,6 +568,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
|
||||||
"select_unquote_and",
|
"select_unquote_and",
|
||||||
"select_unquote_not",
|
"select_unquote_not",
|
||||||
"select_unquote_or",
|
"select_unquote_or",
|
||||||
|
"serde_regex",
|
||||||
"serde_reported_schema",
|
"serde_reported_schema",
|
||||||
"set_variable_sub",
|
"set_variable_sub",
|
||||||
"show_describe_func_quotes",
|
"show_describe_func_quotes",
|
||||||
|
|
Loading…
Reference in a new issue