[SPARK-31595][SQL] Spark sql should allow unescaped quote mark in quoted string

### What changes were proposed in this pull request?
`def splitSemiColon` cannot handle unescaped quote mark like "'" or '"' correctly. When there are unmatched quotes in a string, `splitSemiColon` will not drop off semicolon as expected.

### Why are the changes needed?
Some regex expression will use quote mark in string. We should process semicolon correctly.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
Added Unit test and also manual test.

Closes #28393 from adrian-wang/unescaped.

Authored-by: Daoyuan Wang <me@daoyuan.wang>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Daoyuan Wang 2020-05-06 04:34:43 +00:00 committed by Wenchen Fan
parent ebdf41dd69
commit 53a9bf8fec
2 changed files with 16 additions and 2 deletions

View file

@ -507,6 +507,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
}
// Adapted splitSemiColon from Hive 2.3's CliDriver.splitSemiColon.
// Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
// string, the origin implementation from Hive will not drop the trailing semicolon as expected,
// hence we refined this function a little bit.
private def splitSemiColon(line: String): JList[String] = {
var insideSingleQuote = false
var insideDoubleQuote = false
@ -519,13 +522,15 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
for (index <- 0 until line.length) {
if (line.charAt(index) == '\'' && !insideComment) {
// take a look to see if it is escaped
if (!escape) {
// See the comment above about SPARK-31595
if (!escape && !insideDoubleQuote) {
// flip the boolean variable
insideSingleQuote = !insideSingleQuote
}
} else if (line.charAt(index) == '\"' && !insideComment) {
// take a look to see if it is escaped
if (!escape) {
// See the comment above about SPARK-31595
if (!escape && !insideSingleQuote) {
// flip the boolean variable
insideDoubleQuote = !insideDoubleQuote
}

View file

@ -500,4 +500,13 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE
| ;""".stripMargin -> "testcomment"
)
}
test("SPARK-31595 Should allow unescaped quote mark in quoted string") {
runCliWithin(1.minute)(
"SELECT '\"legal string a';select 1 + 234;".stripMargin -> "235"
)
runCliWithin(1.minute)(
"SELECT \"legal 'string b\";select 22222 + 1;".stripMargin -> "22223"
)
}
}