[SPARK-30724][SQL] Support 'LIKE ANY' and 'LIKE ALL' operators
### What changes were proposed in this pull request? `LIKE ANY/SOME` and `LIKE ALL` operators are mostly used when we are matching a text field with numbers of patterns. For example: Teradata / Hive 3.0 / Snowflake: ```sql --like any select 'foo' LIKE ANY ('%foo%','%bar%'); --like all select 'foo' LIKE ALL ('%foo%','%bar%'); ``` PostgreSQL: ```sql -- like any select 'foo' LIKE ANY (array['%foo%','%bar%']); -- like all select 'foo' LIKE ALL (array['%foo%','%bar%']); ``` This PR add support these two operators. More details: https://docs.teradata.com/reader/756LNiPSFdY~4JcCCcR5Cw/4~AyrPNmDN0Xk4SALLo6aQ https://issues.apache.org/jira/browse/HIVE-15229 https://docs.snowflake.net/manuals/sql-reference/functions/like_any.html ### Why are the changes needed? To smoothly migrate SQLs to Spark SQL. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Unit test. Closes #27477 from wangyum/SPARK-30724. Authored-by: Yuming Wang <yumwang@ebay.com> Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
This commit is contained in:
parent
463c54419b
commit
b10263b8e5
|
@ -766,6 +766,7 @@ predicate
|
|||
| NOT? kind=IN '(' expression (',' expression)* ')'
|
||||
| NOT? kind=IN '(' query ')'
|
||||
| NOT? kind=RLIKE pattern=valueExpression
|
||||
| NOT? kind=LIKE quantifier=(ANY | SOME | ALL) ('('')' | '(' expression (',' expression)* ')')
|
||||
| NOT? kind=LIKE pattern=valueExpression (ESCAPE escapeChar=STRING)?
|
||||
| IS NOT? kind=NULL
|
||||
| IS NOT? kind=(TRUE | FALSE | UNKNOWN)
|
||||
|
|
|
@ -1373,7 +1373,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
|
|||
* Add a predicate to the given expression. Supported expressions are:
|
||||
* - (NOT) BETWEEN
|
||||
* - (NOT) IN
|
||||
* - (NOT) LIKE
|
||||
* - (NOT) LIKE (ANY | SOME | ALL)
|
||||
* - (NOT) RLIKE
|
||||
* - IS (NOT) NULL.
|
||||
* - IS (NOT) (TRUE | FALSE | UNKNOWN)
|
||||
|
@ -1391,6 +1391,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
|
|||
case other => Seq(other)
|
||||
}
|
||||
|
||||
def getLikeQuantifierExprs(expressions: java.util.List[ExpressionContext]): Seq[Expression] = {
|
||||
if (expressions.isEmpty) {
|
||||
throw new ParseException("Expected something between '(' and ')'.", ctx)
|
||||
} else {
|
||||
expressions.asScala.map(expression).map(p => invertIfNotDefined(new Like(e, p)))
|
||||
}
|
||||
}
|
||||
|
||||
// Create the predicate.
|
||||
ctx.kind.getType match {
|
||||
case SqlBaseParser.BETWEEN =>
|
||||
|
@ -1403,14 +1411,21 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
|
|||
case SqlBaseParser.IN =>
|
||||
invertIfNotDefined(In(e, ctx.expression.asScala.map(expression)))
|
||||
case SqlBaseParser.LIKE =>
|
||||
val escapeChar = Option(ctx.escapeChar).map(string).map { str =>
|
||||
if (str.length != 1) {
|
||||
throw new ParseException("Invalid escape string." +
|
||||
"Escape string must contains only one character.", ctx)
|
||||
}
|
||||
str.charAt(0)
|
||||
}.getOrElse('\\')
|
||||
invertIfNotDefined(Like(e, expression(ctx.pattern), escapeChar))
|
||||
Option(ctx.quantifier).map(_.getType) match {
|
||||
case Some(SqlBaseParser.ANY) | Some(SqlBaseParser.SOME) =>
|
||||
getLikeQuantifierExprs(ctx.expression).reduceLeft(Or)
|
||||
case Some(SqlBaseParser.ALL) =>
|
||||
getLikeQuantifierExprs(ctx.expression).reduceLeft(And)
|
||||
case _ =>
|
||||
val escapeChar = Option(ctx.escapeChar).map(string).map { str =>
|
||||
if (str.length != 1) {
|
||||
throw new ParseException("Invalid escape string." +
|
||||
"Escape string must contain only one character.", ctx)
|
||||
}
|
||||
str.charAt(0)
|
||||
}.getOrElse('\\')
|
||||
invertIfNotDefined(Like(e, expression(ctx.pattern), escapeChar))
|
||||
}
|
||||
case SqlBaseParser.RLIKE =>
|
||||
invertIfNotDefined(RLike(e, expression(ctx.pattern)))
|
||||
case SqlBaseParser.NULL if ctx.NOT != null =>
|
||||
|
|
|
@ -188,7 +188,7 @@ class ExpressionParserSuite extends AnalysisTest {
|
|||
}
|
||||
|
||||
test("like escape expressions") {
|
||||
val message = "Escape string must contains only one character."
|
||||
val message = "Escape string must contain only one character."
|
||||
assertEqual("a like 'pattern%' escape '#'", 'a.like("pattern%", '#'))
|
||||
assertEqual("a like 'pattern%' escape '\"'", 'a.like("pattern%", '\"'))
|
||||
intercept("a like 'pattern%' escape '##'", message)
|
||||
|
@ -208,6 +208,21 @@ class ExpressionParserSuite extends AnalysisTest {
|
|||
assertEqual("a rlike 'pattern\\t\\n'", 'a rlike "pattern\\t\\n", parser)
|
||||
}
|
||||
|
||||
test("(NOT) LIKE (ANY | SOME | ALL) expressions") {
|
||||
Seq("any", "some").foreach { quantifier =>
|
||||
assertEqual(s"a like $quantifier ('foo%', 'b%')", ('a like "foo%") || ('a like "b%"))
|
||||
assertEqual(s"a not like $quantifier ('foo%', 'b%')", !('a like "foo%") || !('a like "b%"))
|
||||
assertEqual(s"not (a like $quantifier ('foo%', 'b%'))", !(('a like "foo%") || ('a like "b%")))
|
||||
}
|
||||
assertEqual("a like all ('foo%', 'b%')", ('a like "foo%") && ('a like "b%"))
|
||||
assertEqual("a not like all ('foo%', 'b%')", !('a like "foo%") && !('a like "b%"))
|
||||
assertEqual("not (a like all ('foo%', 'b%'))", !(('a like "foo%") && ('a like "b%")))
|
||||
|
||||
Seq("any", "some", "all").foreach { quantifier =>
|
||||
intercept(s"a like $quantifier()", "Expected something between '(' and ')'")
|
||||
}
|
||||
}
|
||||
|
||||
test("is null expressions") {
|
||||
assertEqual("a is null", 'a.isNull)
|
||||
assertEqual("a is not null", 'a.isNotNull)
|
||||
|
|
39
sql/core/src/test/resources/sql-tests/inputs/like-all.sql
Normal file
39
sql/core/src/test/resources/sql-tests/inputs/like-all.sql
Normal file
|
@ -0,0 +1,39 @@
|
|||
CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES
|
||||
('google', '%oo%'),
|
||||
('facebook', '%oo%'),
|
||||
('linkedin', '%in'))
|
||||
as t1(company, pat);
|
||||
|
||||
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', '%go%');
|
||||
|
||||
SELECT company FROM like_all_table WHERE company LIKE ALL ('microsoft', '%yoo%');
|
||||
|
||||
SELECT
|
||||
company,
|
||||
CASE
|
||||
WHEN company LIKE ALL ('%oo%', '%go%') THEN 'Y'
|
||||
ELSE 'N'
|
||||
END AS is_available,
|
||||
CASE
|
||||
WHEN company LIKE ALL ('%oo%', 'go%') OR company LIKE ALL ('%in', 'ms%') THEN 'Y'
|
||||
ELSE 'N'
|
||||
END AS mix
|
||||
FROM like_all_table ;
|
||||
|
||||
-- Mix test with constant pattern and column value
|
||||
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', pat);
|
||||
|
||||
-- not like all test
|
||||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', '%in', 'fa%');
|
||||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('microsoft', '%yoo%');
|
||||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', 'fa%');
|
||||
SELECT company FROM like_all_table WHERE NOT company LIKE ALL ('%oo%', 'fa%');
|
||||
|
||||
-- null test
|
||||
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', NULL);
|
||||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', NULL);
|
||||
SELECT company FROM like_all_table WHERE company LIKE ALL (NULL, NULL);
|
||||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL (NULL, NULL);
|
||||
|
||||
-- negative case
|
||||
SELECT company FROM like_any_table WHERE company LIKE ALL ();
|
39
sql/core/src/test/resources/sql-tests/inputs/like-any.sql
Normal file
39
sql/core/src/test/resources/sql-tests/inputs/like-any.sql
Normal file
|
@ -0,0 +1,39 @@
|
|||
CREATE OR REPLACE TEMPORARY VIEW like_any_table AS SELECT * FROM (VALUES
|
||||
('google', '%oo%'),
|
||||
('facebook', '%oo%'),
|
||||
('linkedin', '%in'))
|
||||
as t1(company, pat);
|
||||
|
||||
SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', '%in', 'fa%');
|
||||
|
||||
SELECT company FROM like_any_table WHERE company LIKE ANY ('microsoft', '%yoo%');
|
||||
|
||||
select
|
||||
company,
|
||||
CASE
|
||||
WHEN company LIKE ANY ('%oo%', '%in', 'fa%') THEN 'Y'
|
||||
ELSE 'N'
|
||||
END AS is_available,
|
||||
CASE
|
||||
WHEN company LIKE ANY ('%oo%', 'fa%') OR company LIKE ANY ('%in', 'ms%') THEN 'Y'
|
||||
ELSE 'N'
|
||||
END AS mix
|
||||
FROM like_any_table;
|
||||
|
||||
-- Mix test with constant pattern and column value
|
||||
SELECT company FROM like_any_table WHERE company LIKE ANY ('%zz%', pat);
|
||||
|
||||
-- not like any test
|
||||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', '%in', 'fa%');
|
||||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('microsoft', '%yoo%');
|
||||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', 'fa%');
|
||||
SELECT company FROM like_any_table WHERE NOT company LIKE ANY ('%oo%', 'fa%');
|
||||
|
||||
-- null test
|
||||
SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', NULL);
|
||||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', NULL);
|
||||
SELECT company FROM like_any_table WHERE company LIKE ANY (NULL, NULL);
|
||||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY (NULL, NULL);
|
||||
|
||||
-- negative case
|
||||
SELECT company FROM like_any_table WHERE company LIKE ANY ();
|
140
sql/core/src/test/resources/sql-tests/results/like-all.sql.out
Normal file
140
sql/core/src/test/resources/sql-tests/results/like-all.sql.out
Normal file
|
@ -0,0 +1,140 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 14
|
||||
|
||||
|
||||
-- !query
|
||||
CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES
|
||||
('google', '%oo%'),
|
||||
('facebook', '%oo%'),
|
||||
('linkedin', '%in'))
|
||||
as t1(company, pat)
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', '%go%')
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
google
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_all_table WHERE company LIKE ALL ('microsoft', '%yoo%')
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT
|
||||
company,
|
||||
CASE
|
||||
WHEN company LIKE ALL ('%oo%', '%go%') THEN 'Y'
|
||||
ELSE 'N'
|
||||
END AS is_available,
|
||||
CASE
|
||||
WHEN company LIKE ALL ('%oo%', 'go%') OR company LIKE ALL ('%in', 'ms%') THEN 'Y'
|
||||
ELSE 'N'
|
||||
END AS mix
|
||||
FROM like_all_table
|
||||
-- !query schema
|
||||
struct<company:string,is_available:string,mix:string>
|
||||
-- !query output
|
||||
facebook N N
|
||||
google Y Y
|
||||
linkedin N N
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', pat)
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
facebook
|
||||
google
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', '%in', 'fa%')
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('microsoft', '%yoo%')
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
facebook
|
||||
google
|
||||
linkedin
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', 'fa%')
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
linkedin
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_all_table WHERE NOT company LIKE ALL ('%oo%', 'fa%')
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
google
|
||||
linkedin
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_all_table WHERE company LIKE ALL ('%oo%', NULL)
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL ('%oo%', NULL)
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_all_table WHERE company LIKE ALL (NULL, NULL)
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_all_table WHERE company NOT LIKE ALL (NULL, NULL)
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_any_table WHERE company LIKE ALL ()
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.sql.catalyst.parser.ParseException
|
||||
|
||||
Expected something between '(' and ')'.(line 1, pos 49)
|
||||
|
||||
== SQL ==
|
||||
SELECT company FROM like_any_table WHERE company LIKE ALL ()
|
||||
-------------------------------------------------^^^
|
146
sql/core/src/test/resources/sql-tests/results/like-any.sql.out
Normal file
146
sql/core/src/test/resources/sql-tests/results/like-any.sql.out
Normal file
|
@ -0,0 +1,146 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 14
|
||||
|
||||
|
||||
-- !query
|
||||
CREATE OR REPLACE TEMPORARY VIEW like_any_table AS SELECT * FROM (VALUES
|
||||
('google', '%oo%'),
|
||||
('facebook', '%oo%'),
|
||||
('linkedin', '%in'))
|
||||
as t1(company, pat)
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', '%in', 'fa%')
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
facebook
|
||||
google
|
||||
linkedin
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_any_table WHERE company LIKE ANY ('microsoft', '%yoo%')
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
|
||||
|
||||
|
||||
-- !query
|
||||
select
|
||||
company,
|
||||
CASE
|
||||
WHEN company LIKE ANY ('%oo%', '%in', 'fa%') THEN 'Y'
|
||||
ELSE 'N'
|
||||
END AS is_available,
|
||||
CASE
|
||||
WHEN company LIKE ANY ('%oo%', 'fa%') OR company LIKE ANY ('%in', 'ms%') THEN 'Y'
|
||||
ELSE 'N'
|
||||
END AS mix
|
||||
FROM like_any_table
|
||||
-- !query schema
|
||||
struct<company:string,is_available:string,mix:string>
|
||||
-- !query output
|
||||
facebook Y Y
|
||||
google Y Y
|
||||
linkedin Y Y
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_any_table WHERE company LIKE ANY ('%zz%', pat)
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
facebook
|
||||
google
|
||||
linkedin
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', '%in', 'fa%')
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
facebook
|
||||
google
|
||||
linkedin
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('microsoft', '%yoo%')
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
facebook
|
||||
google
|
||||
linkedin
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', 'fa%')
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
google
|
||||
linkedin
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_any_table WHERE NOT company LIKE ANY ('%oo%', 'fa%')
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
linkedin
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_any_table WHERE company LIKE ANY ('%oo%', NULL)
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
facebook
|
||||
google
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY ('%oo%', NULL)
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
linkedin
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_any_table WHERE company LIKE ANY (NULL, NULL)
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_any_table WHERE company NOT LIKE ANY (NULL, NULL)
|
||||
-- !query schema
|
||||
struct<company:string>
|
||||
-- !query output
|
||||
|
||||
|
||||
|
||||
-- !query
|
||||
SELECT company FROM like_any_table WHERE company LIKE ANY ()
|
||||
-- !query schema
|
||||
struct<>
|
||||
-- !query output
|
||||
org.apache.spark.sql.catalyst.parser.ParseException
|
||||
|
||||
Expected something between '(' and ')'.(line 1, pos 49)
|
||||
|
||||
== SQL ==
|
||||
SELECT company FROM like_any_table WHERE company LIKE ANY ()
|
||||
-------------------------------------------------^^^
|
Loading…
Reference in a new issue