[SPARK-16324][SQL] regexp_extract should doc that it returns empty string when match fails
## What changes were proposed in this pull request? Doc that regexp_extract returns empty string when regex or group does not match ## How was this patch tested? Jenkins test, with a few new test cases Author: Sean Owen <sowen@cloudera.com> Closes #14525 from srowen/SPARK-16324.
This commit is contained in:
parent
eca58755fb
commit
0578ff9681
|
@ -1440,11 +1440,15 @@ def split(str, pattern):
|
|||
@ignore_unicode_prefix
|
||||
@since(1.5)
|
||||
def regexp_extract(str, pattern, idx):
|
||||
"""Extract a specific(idx) group identified by a java regex, from the specified string column.
|
||||
"""Extract a specific group matched by a Java regex, from the specified string column.
|
||||
If the regex did not match, or the specified group did not match, an empty string is returned.
|
||||
|
||||
>>> df = spark.createDataFrame([('100-200',)], ['str'])
|
||||
>>> df.select(regexp_extract('str', '(\d+)-(\d+)', 1).alias('d')).collect()
|
||||
[Row(d=u'100')]
|
||||
>>> df = spark.createDataFrame([('foo',)], ['str'])
|
||||
>>> df.select(regexp_extract('str', '(\d+)', 1).alias('d')).collect()
|
||||
[Row(d=u'')]
|
||||
>>> df = spark.createDataFrame([('aaaac',)], ['str'])
|
||||
>>> df.select(regexp_extract('str', '(a+)(b)?(c)', 2).alias('d')).collect()
|
||||
[Row(d=u'')]
|
||||
|
|
|
@ -2175,7 +2175,8 @@ object functions {
|
|||
def ltrim(e: Column): Column = withExpr {StringTrimLeft(e.expr) }
|
||||
|
||||
/**
|
||||
* Extract a specific(idx) group identified by a java regex, from the specified string column.
|
||||
* Extract a specific group matched by a Java regex, from the specified string column.
|
||||
* If the regex did not match, or the specified group did not match, an empty string is returned.
|
||||
*
|
||||
* @group string_funcs
|
||||
* @since 1.5.0
|
||||
|
|
|
@ -96,6 +96,10 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext {
|
|||
|
||||
test("non-matching optional group") {
|
||||
val df = Seq(Tuple1("aaaac")).toDF("s")
|
||||
checkAnswer(
|
||||
df.select(regexp_extract($"s", "(foo)", 1)),
|
||||
Row("")
|
||||
)
|
||||
checkAnswer(
|
||||
df.select(regexp_extract($"s", "(a+)(b)?(c)", 2)),
|
||||
Row("")
|
||||
|
|
Loading…
Reference in a new issue