[SPARK-36778][SQL] Support ILIKE API on Scala(dataframe)
### What changes were proposed in this pull request? Support ILIKE (case insensitive LIKE) API on Scala. ### Why are the changes needed? ILIKE statement on SQL interface is supported by SPARK-36674. This PR will support Scala(dataframe) API for it. ### Does this PR introduce _any_ user-facing change? Yes. Users can call `ilike` from dataframe. ### How was this patch tested? unit tests. Closes #34027 from yoda-mon/scala-ilike. Authored-by: Leona Yoda <yodal@oss.nttdata.com> Signed-off-by: Max Gekk <max.gekk@gmail.com>
This commit is contained in:
parent
4145498826
commit
1312a87365
|
@ -846,6 +846,14 @@ class Column(val expr: Expression) extends Logging {
|
||||||
*/
|
*/
|
||||||
def rlike(literal: String): Column = withExpr { RLike(expr, lit(literal).expr) }
|
def rlike(literal: String): Column = withExpr { RLike(expr, lit(literal).expr) }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* SQL ILIKE expression (case insensitive LIKE).
|
||||||
|
*
|
||||||
|
* @group expr_ops
|
||||||
|
* @since 3.3.0
|
||||||
|
*/
|
||||||
|
def ilike(literal: String): Column = withExpr { new ILike(expr, lit(literal).expr) }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An expression that gets an item at position `ordinal` out of an array,
|
* An expression that gets an item at position `ordinal` out of an array,
|
||||||
* or gets a value by key `key` in a `MapType`.
|
* or gets a value by key `key` in a `MapType`.
|
||||||
|
|
|
@ -2904,4 +2904,35 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("SPARK-36778: add ilike API for scala") {
|
||||||
|
// scalastyle:off
|
||||||
|
// non ascii characters are not allowed in the code, so we disable the scalastyle here.
|
||||||
|
// null handling
|
||||||
|
val nullDf = Seq("a", null).toDF("src")
|
||||||
|
checkAnswer(nullDf.filter($"src".ilike("A")), Row("a"))
|
||||||
|
checkAnswer(nullDf.filter($"src".ilike(null)), spark.emptyDataFrame)
|
||||||
|
// simple pattern
|
||||||
|
val simpleDf = Seq("a", "A", "abdef", "a_%b", "addb", "abC", "a\nb").toDF("src")
|
||||||
|
checkAnswer(simpleDf.filter($"src".ilike("a")), Seq("a", "A").toDF())
|
||||||
|
checkAnswer(simpleDf.filter($"src".ilike("A")), Seq("a", "A").toDF())
|
||||||
|
checkAnswer(simpleDf.filter($"src".ilike("b")), spark.emptyDataFrame)
|
||||||
|
checkAnswer(simpleDf.filter($"src".ilike("aBdef")), Seq("abdef").toDF())
|
||||||
|
checkAnswer(simpleDf.filter($"src".ilike("a\\__b")), Seq("a_%b").toDF())
|
||||||
|
checkAnswer(simpleDf.filter($"src".ilike("A_%b")), Seq("a_%b", "addb", "a\nb").toDF())
|
||||||
|
checkAnswer(simpleDf.filter($"src".ilike("a%")), simpleDf)
|
||||||
|
checkAnswer(simpleDf.filter($"src".ilike("a_b")), Seq("a\nb").toDF())
|
||||||
|
// double-escaping backslash
|
||||||
|
val dEscDf = Seq("""\__""", """\\__""").toDF("src")
|
||||||
|
checkAnswer(dEscDf.filter($"src".ilike("""\\\__""")), Seq("""\__""").toDF())
|
||||||
|
checkAnswer(dEscDf.filter($"src".ilike("""%\\%\%""")), spark.emptyDataFrame)
|
||||||
|
// unicode
|
||||||
|
val uncDf = Seq("a\u20ACA", "A€a", "a€AA", "a\u20ACaz", "ЀЁЂѺΏỀ").toDF("src")
|
||||||
|
checkAnswer(uncDf.filter($"src".ilike("_\u20AC_")), Seq("a\u20ACA", "A€a").toDF())
|
||||||
|
checkAnswer(uncDf.filter($"src".ilike("_€_")), Seq("a\u20ACA", "A€a").toDF())
|
||||||
|
checkAnswer(uncDf.filter($"src".ilike("_\u20AC_a")), Seq("a€AA").toDF())
|
||||||
|
checkAnswer(uncDf.filter($"src".ilike("_€_Z")), Seq("a\u20ACaz").toDF())
|
||||||
|
checkAnswer(uncDf.filter($"src".ilike("ѐёђѻώề")), Seq("ЀЁЂѺΏỀ").toDF())
|
||||||
|
// scalastyle:on
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue