[SPARK-33468][SQL] ParseUrl in ANSI mode should fail if input string is not a valid url
### What changes were proposed in this pull request? With `ParseUrl`, instead of return null we throw exception if input string is not a vaild url. ### Why are the changes needed? For ANSI mode. ### Does this PR introduce _any_ user-facing change? Yes, user will get exception if `set spark.sql.ansi.enabled=true`. ### How was this patch tested? Add test. Closes #30399 from ulysses-you/SPARK-33468. Lead-authored-by: ulysses <youxiduo@weidian.com> Co-authored-by: ulysses-you <youxiduo@weidian.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
cbc8be24c8
commit
3384bda453
|
@ -135,6 +135,7 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql.
|
|||
- `element_at`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
|
||||
- `element_at`: This function throws `NoSuchElementException` if key does not exist in map.
|
||||
- `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
|
||||
- `parse_url`: This function throws `IllegalArgumentException` if an input string is not a valid url.
|
||||
|
||||
### SQL Operators
|
||||
|
||||
|
|
|
@ -1357,8 +1357,9 @@ object ParseUrl {
|
|||
1
|
||||
""",
|
||||
since = "2.0.0")
|
||||
case class ParseUrl(children: Seq[Expression])
|
||||
case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.get.ansiEnabled)
|
||||
extends Expression with ExpectsInputTypes with CodegenFallback {
|
||||
def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled)
|
||||
|
||||
override def nullable: Boolean = true
|
||||
override def inputTypes: Seq[DataType] = Seq.fill(children.size)(StringType)
|
||||
|
@ -1404,7 +1405,9 @@ case class ParseUrl(children: Seq[Expression])
|
|||
try {
|
||||
new URI(url.toString)
|
||||
} catch {
|
||||
case e: URISyntaxException => null
|
||||
case e: URISyntaxException if failOnError =>
|
||||
throw new IllegalArgumentException(s"Find an invaild url string ${url.toString}", e)
|
||||
case _: URISyntaxException => null
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -943,6 +943,20 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
|
|||
GenerateUnsafeProjection.generate(ParseUrl(Seq(Literal("\"quote"), Literal("\"quote"))) :: Nil)
|
||||
}
|
||||
|
||||
test("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url") {
|
||||
withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
|
||||
val msg = intercept[IllegalArgumentException] {
|
||||
evaluateWithoutCodegen(
|
||||
ParseUrl(Seq("https://a.b.c/index.php?params1=a|b¶ms2=x", "HOST")))
|
||||
}.getMessage
|
||||
assert(msg.contains("Find an invaild url string"))
|
||||
}
|
||||
withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
|
||||
checkEvaluation(
|
||||
ParseUrl(Seq("https://a.b.c/index.php?params1=a|b¶ms2=x", "HOST")), null)
|
||||
}
|
||||
}
|
||||
|
||||
test("Sentences") {
|
||||
val nullString = Literal.create(null, StringType)
|
||||
checkEvaluation(Sentences(nullString, nullString, nullString), null)
|
||||
|
|
Loading…
Reference in a new issue