[SPARK-33100][SQL][FOLLOWUP] Find correct bound of bracketed comment in spark-sql
### What changes were proposed in this pull request? This PR help find correct bound of bracketed comment in spark-sql. Here is the log for UT of SPARK-33100 in CliSuite before: ``` 2021-01-05 13:22:34.768 - stdout> spark-sql> /* SELECT 'test';*/ SELECT 'test'; 2021-01-05 13:22:41.523 - stderr> Time taken: 6.716 seconds, Fetched 1 row(s) 2021-01-05 13:22:41.599 - stdout> test 2021-01-05 13:22:41.6 - stdout> spark-sql> ;;/* SELECT 'test';*/ SELECT 'test'; 2021-01-05 13:22:41.709 - stdout> test 2021-01-05 13:22:41.709 - stdout> spark-sql> /* SELECT 'test';*/;; SELECT 'test'; 2021-01-05 13:22:41.902 - stdout> spark-sql> SELECT 'test'; -- SELECT 'test'; 2021-01-05 13:22:41.902 - stderr> Time taken: 0.129 seconds, Fetched 1 row(s) 2021-01-05 13:22:41.902 - stderr> Error in query: 2021-01-05 13:22:41.902 - stderr> mismatched input '<EOF>' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 19) 2021-01-05 13:22:42.006 - stderr> 2021-01-05 13:22:42.006 - stderr> == SQL == 2021-01-05 13:22:42.006 - stderr> /* SELECT 'test';*/ 2021-01-05 13:22:42.006 - stderr> -------------------^^^ 2021-01-05 13:22:42.006 - stderr> 2021-01-05 13:22:42.006 - stderr> Time taken: 0.226 seconds, Fetched 1 row(s) 2021-01-05 13:22:42.006 - stdout> test ``` The root cause is that the insideBracketedComment is not accurate. For `/* comment */`, the last character `/` is not insideBracketedComment and it would be treat as beginning of statements. In this PR, this issue is fixed. ### Why are the changes needed? To fix the issue described above. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #31054 from turboFei/SPARK-33100-followup. Authored-by: fwang12 <fwang12@ebay.com> Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
This commit is contained in:
parent
d36cdd5541
commit
7b06acc28b
|
@ -530,15 +530,24 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
|
|||
var bracketedCommentLevel = 0
|
||||
var escape = false
|
||||
var beginIndex = 0
|
||||
var includingStatement = false
|
||||
var leavingBracketedComment = false
|
||||
var isStatement = false
|
||||
val ret = new JArrayList[String]
|
||||
|
||||
def insideBracketedComment: Boolean = bracketedCommentLevel > 0
|
||||
def insideComment: Boolean = insideSimpleComment || insideBracketedComment
|
||||
def statementBegin(index: Int): Boolean = includingStatement || (!insideComment &&
|
||||
def statementInProgress(index: Int): Boolean = isStatement || (!insideComment &&
|
||||
index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty)
|
||||
|
||||
for (index <- 0 until line.length) {
|
||||
// Checks if we need to decrement a bracketed comment level; the last character '/' of
|
||||
// bracketed comments is still inside the comment, so `insideBracketedComment` must keep true
|
||||
// in the previous loop and we decrement the level here if needed.
|
||||
if (leavingBracketedComment) {
|
||||
bracketedCommentLevel -= 1
|
||||
leavingBracketedComment = false
|
||||
}
|
||||
|
||||
if (line.charAt(index) == '\'' && !insideComment) {
|
||||
// take a look to see if it is escaped
|
||||
// See the comment above about SPARK-31595
|
||||
|
@ -568,12 +577,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
|
|||
if (insideSingleQuote || insideDoubleQuote || insideComment) {
|
||||
// do not split
|
||||
} else {
|
||||
if (includingStatement) {
|
||||
if (isStatement) {
|
||||
// split, do not include ; itself
|
||||
ret.add(line.substring(beginIndex, index))
|
||||
}
|
||||
beginIndex = index + 1
|
||||
includingStatement = false
|
||||
isStatement = false
|
||||
}
|
||||
} else if (line.charAt(index) == '\n') {
|
||||
// with a new line the inline simple comment should end.
|
||||
|
@ -585,7 +594,8 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
|
|||
if (insideSingleQuote || insideDoubleQuote) {
|
||||
// Ignores '/' in any case of quotes
|
||||
} else if (insideBracketedComment && line.charAt(index - 1) == '*' ) {
|
||||
bracketedCommentLevel -= 1
|
||||
// Decrements `bracketedCommentLevel` at the beginning of the next loop
|
||||
leavingBracketedComment = true
|
||||
} else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') {
|
||||
bracketedCommentLevel += 1
|
||||
}
|
||||
|
@ -597,9 +607,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
|
|||
escape = true
|
||||
}
|
||||
|
||||
includingStatement = statementBegin(index)
|
||||
isStatement = statementInProgress(index)
|
||||
}
|
||||
if (includingStatement) {
|
||||
if (isStatement) {
|
||||
ret.add(line.substring(beginIndex))
|
||||
}
|
||||
ret
|
||||
|
|
|
@ -577,8 +577,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
|
|||
"/* SELECT 'test';*/ SELECT 'test';" -> "test",
|
||||
";;/* SELECT 'test';*/ SELECT 'test';" -> "test",
|
||||
"/* SELECT 'test';*/;; SELECT 'test';" -> "test",
|
||||
"SELECT 'test'; -- SELECT 'test';" -> "",
|
||||
"SELECT 'test'; /* SELECT 'test';*/;" -> "",
|
||||
"SELECT 'test'; -- SELECT 'test';" -> "test",
|
||||
"SELECT 'test'; /* SELECT 'test';*/;" -> "test",
|
||||
"/*$meta chars{^\\;}*/ SELECT 'test';" -> "test",
|
||||
"/*\nmulti-line\n*/ SELECT 'test';" -> "test",
|
||||
"/*/* multi-level bracketed*/ SELECT 'test';" -> "test"
|
||||
|
|
Loading…
Reference in a new issue