[SPARK-31595][SQL] Spark sql should allow unescaped quote mark in quoted string
### What changes were proposed in this pull request? `def splitSemiColon` cannot handle unescaped quote mark like "'" or '"' correctly. When there are unmatched quotes in a string, `splitSemiColon` will not drop off semicolon as expected. ### Why are the changes needed? Some regex expression will use quote mark in string. We should process semicolon correctly. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Added Unit test and also manual test. Closes #28393 from adrian-wang/unescaped. Authored-by: Daoyuan Wang <me@daoyuan.wang> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
ebdf41dd69
commit
53a9bf8fec
|
@ -507,6 +507,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
|
|||
}
|
||||
|
||||
// Adapted splitSemiColon from Hive 2.3's CliDriver.splitSemiColon.
|
||||
// Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
|
||||
// string, the origin implementation from Hive will not drop the trailing semicolon as expected,
|
||||
// hence we refined this function a little bit.
|
||||
private def splitSemiColon(line: String): JList[String] = {
|
||||
var insideSingleQuote = false
|
||||
var insideDoubleQuote = false
|
||||
|
@ -519,13 +522,15 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
|
|||
for (index <- 0 until line.length) {
|
||||
if (line.charAt(index) == '\'' && !insideComment) {
|
||||
// take a look to see if it is escaped
|
||||
if (!escape) {
|
||||
// See the comment above about SPARK-31595
|
||||
if (!escape && !insideDoubleQuote) {
|
||||
// flip the boolean variable
|
||||
insideSingleQuote = !insideSingleQuote
|
||||
}
|
||||
} else if (line.charAt(index) == '\"' && !insideComment) {
|
||||
// take a look to see if it is escaped
|
||||
if (!escape) {
|
||||
// See the comment above about SPARK-31595
|
||||
if (!escape && !insideSingleQuote) {
|
||||
// flip the boolean variable
|
||||
insideDoubleQuote = !insideDoubleQuote
|
||||
}
|
||||
|
|
|
@ -500,4 +500,13 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE
|
|||
| ;""".stripMargin -> "testcomment"
|
||||
)
|
||||
}
|
||||
|
||||
test("SPARK-31595 Should allow unescaped quote mark in quoted string") {
|
||||
runCliWithin(1.minute)(
|
||||
"SELECT '\"legal string a';select 1 + 234;".stripMargin -> "235"
|
||||
)
|
||||
runCliWithin(1.minute)(
|
||||
"SELECT \"legal 'string b\";select 22222 + 1;".stripMargin -> "22223"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue