diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index c45027158c..554f6a34b1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -846,6 +846,14 @@ class Column(val expr: Expression) extends Logging { */ def rlike(literal: String): Column = withExpr { RLike(expr, lit(literal).expr) } + /** + * SQL ILIKE expression (case insensitive LIKE). + * + * @group expr_ops + * @since 3.3.0 + */ + def ilike(literal: String): Column = withExpr { new ILike(expr, lit(literal).expr) } + /** * An expression that gets an item at position `ordinal` out of an array, * or gets a value by key `key` in a `MapType`. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index b0cd613410..d954dc12ac 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -2904,4 +2904,35 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } } } + + test("SPARK-36778: add ilike API for scala") { + // scalastyle:off + // non ascii characters are not allowed in the code, so we disable the scalastyle here. + // null handling + val nullDf = Seq("a", null).toDF("src") + checkAnswer(nullDf.filter($"src".ilike("A")), Row("a")) + checkAnswer(nullDf.filter($"src".ilike(null)), spark.emptyDataFrame) + // simple pattern + val simpleDf = Seq("a", "A", "abdef", "a_%b", "addb", "abC", "a\nb").toDF("src") + checkAnswer(simpleDf.filter($"src".ilike("a")), Seq("a", "A").toDF()) + checkAnswer(simpleDf.filter($"src".ilike("A")), Seq("a", "A").toDF()) + checkAnswer(simpleDf.filter($"src".ilike("b")), spark.emptyDataFrame) + checkAnswer(simpleDf.filter($"src".ilike("aBdef")), Seq("abdef").toDF()) + checkAnswer(simpleDf.filter($"src".ilike("a\\__b")), Seq("a_%b").toDF()) + checkAnswer(simpleDf.filter($"src".ilike("A_%b")), Seq("a_%b", "addb", "a\nb").toDF()) + checkAnswer(simpleDf.filter($"src".ilike("a%")), simpleDf) + checkAnswer(simpleDf.filter($"src".ilike("a_b")), Seq("a\nb").toDF()) + // double-escaping backslash + val dEscDf = Seq("""\__""", """\\__""").toDF("src") + checkAnswer(dEscDf.filter($"src".ilike("""\\\__""")), Seq("""\__""").toDF()) + checkAnswer(dEscDf.filter($"src".ilike("""%\\%\%""")), spark.emptyDataFrame) + // unicode + val uncDf = Seq("a\u20ACA", "A€a", "a€AA", "a\u20ACaz", "ЀЁЂѺΏỀ").toDF("src") + checkAnswer(uncDf.filter($"src".ilike("_\u20AC_")), Seq("a\u20ACA", "A€a").toDF()) + checkAnswer(uncDf.filter($"src".ilike("_€_")), Seq("a\u20ACA", "A€a").toDF()) + checkAnswer(uncDf.filter($"src".ilike("_\u20AC_a")), Seq("a€AA").toDF()) + checkAnswer(uncDf.filter($"src".ilike("_€_Z")), Seq("a\u20ACaz").toDF()) + checkAnswer(uncDf.filter($"src".ilike("ѐёђѻώề")), Seq("ЀЁЂѺΏỀ").toDF()) + // scalastyle:on + } }