[SPARK-14545][SQL] Improve LikeSimplification by adding a%b rule

## What changes were proposed in this pull request?

Current `LikeSimplification` handles the following four rules.
- 'a%' => expr.StartsWith("a")
- '%b' => expr.EndsWith("b")
- '%a%' => expr.Contains("a")
- 'a' => EqualTo("a")

This PR adds the following rule.
- 'a%b' => expr.Length() >= 2 && expr.StartsWith("a") && expr.EndsWith("b")

Here, 2 is statically calculated from "a".size + "b".size.

**Before**
```
scala> sql("select a from (select explode(array('abc','adc')) a) T where a like 'a%c'").explain()
== Physical Plan ==
WholeStageCodegen
:  +- Filter a#5 LIKE a%c
:     +- INPUT
+- Generate explode([abc,adc]), false, false, [a#5]
   +- Scan OneRowRelation[]
```

**After**
```
scala> sql("select a from (select explode(array('abc','adc')) a) T where a like 'a%c'").explain()
== Physical Plan ==
WholeStageCodegen
:  +- Filter ((length(a#5) >= 2) && (StartsWith(a#5, a) && EndsWith(a#5, c)))
:     +- INPUT
+- Generate explode([abc,adc]), false, false, [a#5]
   +- Scan OneRowRelation[]
```

## How was this patch tested?

Pass the Jenkins tests (including new testcase).

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #12312 from dongjoon-hyun/SPARK-14545.
This commit is contained in:
Dongjoon Hyun 2016-04-14 13:34:29 -07:00 committed by Reynold Xin
parent bc748b7b8f
commit d7e124edfe
2 changed files with 31 additions and 11 deletions

View file

@ -517,22 +517,28 @@ object LikeSimplification extends Rule[LogicalPlan] {
// Cases like "something\%" are not optimized, but this does not affect correctness.
private val startsWith = "([^_%]+)%".r
private val endsWith = "%([^_%]+)".r
private val startsAndEndsWith = "([^_%]+)%([^_%]+)".r
private val contains = "%([^_%]+)%".r
private val equalTo = "([^_%]*)".r
def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
case Like(l, Literal(utf, StringType)) =>
utf.toString match {
case startsWith(pattern) if !pattern.endsWith("\\") =>
StartsWith(l, Literal(pattern))
case endsWith(pattern) =>
EndsWith(l, Literal(pattern))
case contains(pattern) if !pattern.endsWith("\\") =>
Contains(l, Literal(pattern))
case equalTo(pattern) =>
EqualTo(l, Literal(pattern))
case Like(input, Literal(pattern, StringType)) =>
pattern.toString match {
case startsWith(prefix) if !prefix.endsWith("\\") =>
StartsWith(input, Literal(prefix))
case endsWith(postfix) =>
EndsWith(input, Literal(postfix))
// 'a%a' pattern is basically same with 'a%' && '%a'.
// However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
case startsAndEndsWith(prefix, postfix) if !prefix.endsWith("\\") =>
And(GreaterThanOrEqual(Length(input), Literal(prefix.size + postfix.size)),
And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix))))
case contains(infix) if !infix.endsWith("\\") =>
Contains(input, Literal(infix))
case equalTo(str) =>
EqualTo(input, Literal(str))
case _ =>
Like(l, Literal.create(utf, StringType))
Like(input, Literal.create(pattern, StringType))
}
}
}

View file

@ -61,6 +61,20 @@ class LikeSimplificationSuite extends PlanTest {
comparePlans(optimized, correctAnswer)
}
test("simplify Like into startsWith and EndsWith") {
val originalQuery =
testRelation
.where(('a like "abc\\%def") || ('a like "abc%def"))
val optimized = Optimize.execute(originalQuery.analyze)
val correctAnswer = testRelation
.where(('a like "abc\\%def") ||
(Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def"))))
.analyze
comparePlans(optimized, correctAnswer)
}
test("simplify Like into Contains") {
val originalQuery =
testRelation