[SPARK-20750][SQL] Built-in SQL Function Support - REPLACE
## What changes were proposed in this pull request? This PR adds built-in SQL function `(REPLACE(<string_expression>, <search_string> [, <replacement_string>])` `REPLACE()` return that string that is replaced all occurrences with given string. ## How was this patch tested? added new test suites Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com> Closes #18047 from kiszk/SPARK-20750.
This commit is contained in:
parent
f9b59abeae
commit
ef9fd920c3
|
@ -835,6 +835,15 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public UTF8String replace(UTF8String search, UTF8String replace) {
|
||||||
|
if (EMPTY_UTF8.equals(search)) {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
String replaced = toString().replace(
|
||||||
|
search.toString(), replace.toString());
|
||||||
|
return fromString(replaced);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: Need to use `Code Point` here instead of Char in case the character longer than 2 bytes
|
// TODO: Need to use `Code Point` here instead of Char in case the character longer than 2 bytes
|
||||||
public UTF8String translate(Map<Character, Character> dict) {
|
public UTF8String translate(Map<Character, Character> dict) {
|
||||||
String srcStr = this.toString();
|
String srcStr = this.toString();
|
||||||
|
|
|
@ -304,6 +304,7 @@ object FunctionRegistry {
|
||||||
expression[RegExpExtract]("regexp_extract"),
|
expression[RegExpExtract]("regexp_extract"),
|
||||||
expression[RegExpReplace]("regexp_replace"),
|
expression[RegExpReplace]("regexp_replace"),
|
||||||
expression[StringRepeat]("repeat"),
|
expression[StringRepeat]("repeat"),
|
||||||
|
expression[StringReplace]("replace"),
|
||||||
expression[StringReverse]("reverse"),
|
expression[StringReverse]("reverse"),
|
||||||
expression[RLike]("rlike"),
|
expression[RLike]("rlike"),
|
||||||
expression[StringRPad]("rpad"),
|
expression[StringRPad]("rpad"),
|
||||||
|
|
|
@ -340,6 +340,48 @@ case class EndsWith(left: Expression, right: Expression) extends StringPredicate
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace all occurrences with string.
|
||||||
|
*/
|
||||||
|
// scalastyle:off line.size.limit
|
||||||
|
@ExpressionDescription(
|
||||||
|
usage = "_FUNC_(str, search[, replace]) - Replaces all occurrences of `search` with `replace`.",
|
||||||
|
extended = """
|
||||||
|
Arguments:
|
||||||
|
str - a string expression
|
||||||
|
search - a string expression. If `search` is not found in `str`, `str` is returned unchanged.
|
||||||
|
replace - a string expression. If `replace` is not specified or is an empty string, nothing replaces
|
||||||
|
the string that is removed from `str`.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
> SELECT _FUNC_('ABCabc', 'abc', 'DEF');
|
||||||
|
ABCDEF
|
||||||
|
""")
|
||||||
|
// scalastyle:on line.size.limit
|
||||||
|
case class StringReplace(srcExpr: Expression, searchExpr: Expression, replaceExpr: Expression)
|
||||||
|
extends TernaryExpression with ImplicitCastInputTypes {
|
||||||
|
|
||||||
|
def this(srcExpr: Expression, searchExpr: Expression) = {
|
||||||
|
this(srcExpr, searchExpr, Literal(""))
|
||||||
|
}
|
||||||
|
|
||||||
|
override def nullSafeEval(srcEval: Any, searchEval: Any, replaceEval: Any): Any = {
|
||||||
|
srcEval.asInstanceOf[UTF8String].replace(
|
||||||
|
searchEval.asInstanceOf[UTF8String], replaceEval.asInstanceOf[UTF8String])
|
||||||
|
}
|
||||||
|
|
||||||
|
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
|
||||||
|
nullSafeCodeGen(ctx, ev, (src, search, replace) => {
|
||||||
|
s"""${ev.value} = $src.replace($search, $replace);"""
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
override def dataType: DataType = StringType
|
||||||
|
override def inputTypes: Seq[DataType] = Seq(StringType, StringType, StringType)
|
||||||
|
override def children: Seq[Expression] = srcExpr :: searchExpr :: replaceExpr :: Nil
|
||||||
|
override def prettyName: String = "replace"
|
||||||
|
}
|
||||||
|
|
||||||
object StringTranslate {
|
object StringTranslate {
|
||||||
|
|
||||||
def buildDict(matchingString: UTF8String, replaceString: UTF8String)
|
def buildDict(matchingString: UTF8String, replaceString: UTF8String)
|
||||||
|
|
|
@ -372,6 +372,26 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
|
||||||
checkEvaluation(SoundEx(Literal("!!")), "!!")
|
checkEvaluation(SoundEx(Literal("!!")), "!!")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("replace") {
|
||||||
|
checkEvaluation(
|
||||||
|
StringReplace(Literal("replace"), Literal("pl"), Literal("123")), "re123ace")
|
||||||
|
checkEvaluation(StringReplace(Literal("replace"), Literal("pl"), Literal("")), "reace")
|
||||||
|
checkEvaluation(StringReplace(Literal("replace"), Literal(""), Literal("123")), "replace")
|
||||||
|
checkEvaluation(StringReplace(Literal.create(null, StringType),
|
||||||
|
Literal("pl"), Literal("123")), null)
|
||||||
|
checkEvaluation(StringReplace(Literal("replace"),
|
||||||
|
Literal.create(null, StringType), Literal("123")), null)
|
||||||
|
checkEvaluation(StringReplace(Literal("replace"),
|
||||||
|
Literal("pl"), Literal.create(null, StringType)), null)
|
||||||
|
// test for multiple replace
|
||||||
|
checkEvaluation(StringReplace(Literal("abcabc"), Literal("b"), Literal("12")), "a12ca12c")
|
||||||
|
checkEvaluation(StringReplace(Literal("abcdabcd"), Literal("bc"), Literal("")), "adad")
|
||||||
|
// scalastyle:off
|
||||||
|
// non ascii characters are not allowed in the source code, so we disable the scalastyle.
|
||||||
|
checkEvaluation(StringReplace(Literal("花花世界"), Literal("花世"), Literal("ab")), "花ab界")
|
||||||
|
// scalastyle:on
|
||||||
|
}
|
||||||
|
|
||||||
test("translate") {
|
test("translate") {
|
||||||
checkEvaluation(
|
checkEvaluation(
|
||||||
StringTranslate(Literal("translate"), Literal("rnlt"), Literal("123")), "1a2s3ae")
|
StringTranslate(Literal("translate"), Literal("rnlt"), Literal("123")), "1a2s3ae")
|
||||||
|
|
|
@ -8,3 +8,7 @@ select 'a' || 'b' || 'c';
|
||||||
-- Check if catalyst combine nested `Concat`s
|
-- Check if catalyst combine nested `Concat`s
|
||||||
EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col
|
EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col
|
||||||
FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) t;
|
FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) t;
|
||||||
|
|
||||||
|
-- replace function
|
||||||
|
select replace('abc', 'b', '123');
|
||||||
|
select replace('abc', 'b');
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
-- Automatically generated by SQLQueryTestSuite
|
-- Automatically generated by SQLQueryTestSuite
|
||||||
-- Number of queries: 4
|
-- Number of queries: 6
|
||||||
|
|
||||||
|
|
||||||
-- !query 0
|
-- !query 0
|
||||||
|
@ -54,3 +54,19 @@ Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as stri
|
||||||
== Physical Plan ==
|
== Physical Plan ==
|
||||||
*Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as string), cast(id#xL as string)) AS col#x]
|
*Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as string), cast(id#xL as string)) AS col#x]
|
||||||
+- *Range (0, 10, step=1, splits=2)
|
+- *Range (0, 10, step=1, splits=2)
|
||||||
|
|
||||||
|
|
||||||
|
-- !query 4
|
||||||
|
select replace('abc', 'b', '123')
|
||||||
|
-- !query 4 schema
|
||||||
|
struct<replace(abc, b, 123):string>
|
||||||
|
-- !query 4 output
|
||||||
|
a123c
|
||||||
|
|
||||||
|
|
||||||
|
-- !query 5
|
||||||
|
select replace('abc', 'b')
|
||||||
|
-- !query 5 schema
|
||||||
|
struct<replace(abc, b, ):string>
|
||||||
|
-- !query 5 output
|
||||||
|
ac
|
||||||
|
|
Loading…
Reference in a new issue