[SPARK-33468][SQL] ParseUrl in ANSI mode should fail if input string is not a valid url

### What changes were proposed in this pull request?

With `ParseUrl`, instead of return null we throw exception if input string is not a vaild url.

### Why are the changes needed?

For ANSI mode.

### Does this PR introduce _any_ user-facing change?

Yes, user will get exception if `set spark.sql.ansi.enabled=true`.

### How was this patch tested?

Add test.

Closes #30399 from ulysses-you/SPARK-33468.

Lead-authored-by: ulysses <youxiduo@weidian.com>
Co-authored-by: ulysses-you <youxiduo@weidian.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
ulysses 2020-11-20 13:23:08 +00:00 committed by Wenchen Fan
parent cbc8be24c8
commit 3384bda453
3 changed files with 20 additions and 2 deletions

View file

@ -135,6 +135,7 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql.
- `element_at`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
- `element_at`: This function throws `NoSuchElementException` if key does not exist in map.
- `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
- `parse_url`: This function throws `IllegalArgumentException` if an input string is not a valid url.
### SQL Operators

View file

@ -1357,8 +1357,9 @@ object ParseUrl {
1
""",
since = "2.0.0")
case class ParseUrl(children: Seq[Expression])
case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.get.ansiEnabled)
extends Expression with ExpectsInputTypes with CodegenFallback {
def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled)
override def nullable: Boolean = true
override def inputTypes: Seq[DataType] = Seq.fill(children.size)(StringType)
@ -1404,7 +1405,9 @@ case class ParseUrl(children: Seq[Expression])
try {
new URI(url.toString)
} catch {
case e: URISyntaxException => null
case e: URISyntaxException if failOnError =>
throw new IllegalArgumentException(s"Find an invaild url string ${url.toString}", e)
case _: URISyntaxException => null
}
}

View file

@ -943,6 +943,20 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
GenerateUnsafeProjection.generate(ParseUrl(Seq(Literal("\"quote"), Literal("\"quote"))) :: Nil)
}
test("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url") {
withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
val msg = intercept[IllegalArgumentException] {
evaluateWithoutCodegen(
ParseUrl(Seq("https://a.b.c/index.php?params1=a|b&params2=x", "HOST")))
}.getMessage
assert(msg.contains("Find an invaild url string"))
}
withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
checkEvaluation(
ParseUrl(Seq("https://a.b.c/index.php?params1=a|b&params2=x", "HOST")), null)
}
}
test("Sentences") {
val nullString = Literal.create(null, StringType)
checkEvaluation(Sentences(nullString, nullString, nullString), null)