[SPARK-20750][SQL] Built-in SQL Function Support - REPLACE
## What changes were proposed in this pull request? This PR adds built-in SQL function `(REPLACE(<string_expression>, <search_string> [, <replacement_string>])` `REPLACE()` return that string that is replaced all occurrences with given string. ## How was this patch tested? added new test suites Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com> Closes #18047 from kiszk/SPARK-20750.
This commit is contained in:
parent
f9b59abeae
commit
ef9fd920c3
|
@ -835,6 +835,15 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
|
|||
return res;
|
||||
}
|
||||
|
||||
public UTF8String replace(UTF8String search, UTF8String replace) {
|
||||
if (EMPTY_UTF8.equals(search)) {
|
||||
return this;
|
||||
}
|
||||
String replaced = toString().replace(
|
||||
search.toString(), replace.toString());
|
||||
return fromString(replaced);
|
||||
}
|
||||
|
||||
// TODO: Need to use `Code Point` here instead of Char in case the character longer than 2 bytes
|
||||
public UTF8String translate(Map<Character, Character> dict) {
|
||||
String srcStr = this.toString();
|
||||
|
|
|
@ -304,6 +304,7 @@ object FunctionRegistry {
|
|||
expression[RegExpExtract]("regexp_extract"),
|
||||
expression[RegExpReplace]("regexp_replace"),
|
||||
expression[StringRepeat]("repeat"),
|
||||
expression[StringReplace]("replace"),
|
||||
expression[StringReverse]("reverse"),
|
||||
expression[RLike]("rlike"),
|
||||
expression[StringRPad]("rpad"),
|
||||
|
|
|
@ -340,6 +340,48 @@ case class EndsWith(left: Expression, right: Expression) extends StringPredicate
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace all occurrences with string.
|
||||
*/
|
||||
// scalastyle:off line.size.limit
|
||||
@ExpressionDescription(
|
||||
usage = "_FUNC_(str, search[, replace]) - Replaces all occurrences of `search` with `replace`.",
|
||||
extended = """
|
||||
Arguments:
|
||||
str - a string expression
|
||||
search - a string expression. If `search` is not found in `str`, `str` is returned unchanged.
|
||||
replace - a string expression. If `replace` is not specified or is an empty string, nothing replaces
|
||||
the string that is removed from `str`.
|
||||
|
||||
Examples:
|
||||
> SELECT _FUNC_('ABCabc', 'abc', 'DEF');
|
||||
ABCDEF
|
||||
""")
|
||||
// scalastyle:on line.size.limit
|
||||
case class StringReplace(srcExpr: Expression, searchExpr: Expression, replaceExpr: Expression)
|
||||
extends TernaryExpression with ImplicitCastInputTypes {
|
||||
|
||||
def this(srcExpr: Expression, searchExpr: Expression) = {
|
||||
this(srcExpr, searchExpr, Literal(""))
|
||||
}
|
||||
|
||||
override def nullSafeEval(srcEval: Any, searchEval: Any, replaceEval: Any): Any = {
|
||||
srcEval.asInstanceOf[UTF8String].replace(
|
||||
searchEval.asInstanceOf[UTF8String], replaceEval.asInstanceOf[UTF8String])
|
||||
}
|
||||
|
||||
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
|
||||
nullSafeCodeGen(ctx, ev, (src, search, replace) => {
|
||||
s"""${ev.value} = $src.replace($search, $replace);"""
|
||||
})
|
||||
}
|
||||
|
||||
override def dataType: DataType = StringType
|
||||
override def inputTypes: Seq[DataType] = Seq(StringType, StringType, StringType)
|
||||
override def children: Seq[Expression] = srcExpr :: searchExpr :: replaceExpr :: Nil
|
||||
override def prettyName: String = "replace"
|
||||
}
|
||||
|
||||
object StringTranslate {
|
||||
|
||||
def buildDict(matchingString: UTF8String, replaceString: UTF8String)
|
||||
|
|
|
@ -372,6 +372,26 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
|
|||
checkEvaluation(SoundEx(Literal("!!")), "!!")
|
||||
}
|
||||
|
||||
test("replace") {
|
||||
checkEvaluation(
|
||||
StringReplace(Literal("replace"), Literal("pl"), Literal("123")), "re123ace")
|
||||
checkEvaluation(StringReplace(Literal("replace"), Literal("pl"), Literal("")), "reace")
|
||||
checkEvaluation(StringReplace(Literal("replace"), Literal(""), Literal("123")), "replace")
|
||||
checkEvaluation(StringReplace(Literal.create(null, StringType),
|
||||
Literal("pl"), Literal("123")), null)
|
||||
checkEvaluation(StringReplace(Literal("replace"),
|
||||
Literal.create(null, StringType), Literal("123")), null)
|
||||
checkEvaluation(StringReplace(Literal("replace"),
|
||||
Literal("pl"), Literal.create(null, StringType)), null)
|
||||
// test for multiple replace
|
||||
checkEvaluation(StringReplace(Literal("abcabc"), Literal("b"), Literal("12")), "a12ca12c")
|
||||
checkEvaluation(StringReplace(Literal("abcdabcd"), Literal("bc"), Literal("")), "adad")
|
||||
// scalastyle:off
|
||||
// non ascii characters are not allowed in the source code, so we disable the scalastyle.
|
||||
checkEvaluation(StringReplace(Literal("花花世界"), Literal("花世"), Literal("ab")), "花ab界")
|
||||
// scalastyle:on
|
||||
}
|
||||
|
||||
test("translate") {
|
||||
checkEvaluation(
|
||||
StringTranslate(Literal("translate"), Literal("rnlt"), Literal("123")), "1a2s3ae")
|
||||
|
|
|
@ -8,3 +8,7 @@ select 'a' || 'b' || 'c';
|
|||
-- Check if catalyst combine nested `Concat`s
|
||||
EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col
|
||||
FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) t;
|
||||
|
||||
-- replace function
|
||||
select replace('abc', 'b', '123');
|
||||
select replace('abc', 'b');
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
-- Automatically generated by SQLQueryTestSuite
|
||||
-- Number of queries: 4
|
||||
-- Number of queries: 6
|
||||
|
||||
|
||||
-- !query 0
|
||||
|
@ -54,3 +54,19 @@ Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as stri
|
|||
== Physical Plan ==
|
||||
*Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as string), cast(id#xL as string)) AS col#x]
|
||||
+- *Range (0, 10, step=1, splits=2)
|
||||
|
||||
|
||||
-- !query 4
|
||||
select replace('abc', 'b', '123')
|
||||
-- !query 4 schema
|
||||
struct<replace(abc, b, 123):string>
|
||||
-- !query 4 output
|
||||
a123c
|
||||
|
||||
|
||||
-- !query 5
|
||||
select replace('abc', 'b')
|
||||
-- !query 5 schema
|
||||
struct<replace(abc, b, ):string>
|
||||
-- !query 5 output
|
||||
ac
|
||||
|
|
Loading…
Reference in a new issue