[SPARK-26339][SQL] Throws better exception when reading files that start with underscore
## What changes were proposed in this pull request?
My pull request #23288 was resolved and merged to master, but it turned out later that my change breaks another regression test. Because we cannot reopen pull request, I create a new pull request here.
Commit 92934b4 is only change after pull request #23288.
`CheckFileExist` was avoided at 239cfa4 after discussing #23288 (comment).
But, that change turned out to be wrong because we should not check if argument checkFileExist is false.
Test 27e42c1de5/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala (L2555)
failed when we avoided checkFileExist, but now successed after commit 92934b4 .
## How was this patch tested?
Both of below tests were passed.
```
testOnly org.apache.spark.sql.execution.datasources.csv.CSVSuite
testOnly org.apache.spark.sql.SQLQuerySuite
```
Closes #23446 from KeiichiHirobe/SPARK-26339.
Authored-by: Hirobe Keiichi <keiichi_hirobe@forcia.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
This commit is contained in:
parent
737f08949a
commit
9d8e9b394b
|
@ -543,7 +543,7 @@ case class DataSource(
|
|||
checkFilesExist: Boolean): Seq[Path] = {
|
||||
val allPaths = caseInsensitiveOptions.get("path") ++ paths
|
||||
val hadoopConf = sparkSession.sessionState.newHadoopConf()
|
||||
allPaths.flatMap { path =>
|
||||
val allGlobPath = allPaths.flatMap { path =>
|
||||
val hdfsPath = new Path(path)
|
||||
val fs = hdfsPath.getFileSystem(hadoopConf)
|
||||
val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
|
||||
|
@ -560,6 +560,23 @@ case class DataSource(
|
|||
}
|
||||
globPath
|
||||
}.toSeq
|
||||
|
||||
if (checkFilesExist) {
|
||||
val (filteredOut, filteredIn) = allGlobPath.partition { path =>
|
||||
InMemoryFileIndex.shouldFilterOut(path.getName)
|
||||
}
|
||||
if (filteredOut.nonEmpty) {
|
||||
if (filteredIn.isEmpty) {
|
||||
throw new AnalysisException(
|
||||
s"All paths were ignored:\n${filteredOut.mkString("\n ")}")
|
||||
} else {
|
||||
logDebug(
|
||||
s"Some paths were ignored:\n${filteredOut.mkString("\n ")}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
allGlobPath
|
||||
}
|
||||
}
|
||||
|
||||
|
|
7
sql/core/src/test/resources/test-data/_cars.csv
Normal file
7
sql/core/src/test/resources/test-data/_cars.csv
Normal file
|
@ -0,0 +1,7 @@
|
|||
|
||||
year,make,model,comment,blank
|
||||
"2012","Tesla","S","No comment",
|
||||
|
||||
1997,Ford,E350,"Go get one now they are going fast",
|
||||
2015,Chevy,Volt
|
||||
|
Can't render this file because it has a wrong number of fields in line 6.
|
|
@ -53,6 +53,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
|
|||
private val carsEmptyValueFile = "test-data/cars-empty-value.csv"
|
||||
private val carsBlankColName = "test-data/cars-blank-column-name.csv"
|
||||
private val carsCrlf = "test-data/cars-crlf.csv"
|
||||
private val carsFilteredOutFile = "test-data/_cars.csv"
|
||||
private val emptyFile = "test-data/empty.csv"
|
||||
private val commentsFile = "test-data/comments.csv"
|
||||
private val disableCommentsFile = "test-data/disable_comments.csv"
|
||||
|
@ -346,6 +347,25 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
|
|||
assert(result.schema.fieldNames.size === 1)
|
||||
}
|
||||
|
||||
test("SPARK-26339 Not throw an exception if some of specified paths are filtered in") {
|
||||
val cars = spark
|
||||
.read
|
||||
.option("header", "false")
|
||||
.csv(testFile(carsFile), testFile(carsFilteredOutFile))
|
||||
|
||||
verifyCars(cars, withHeader = false, checkTypes = false)
|
||||
}
|
||||
|
||||
test("SPARK-26339 Throw an exception only if all of the specified paths are filtered out") {
|
||||
val e = intercept[AnalysisException] {
|
||||
val cars = spark
|
||||
.read
|
||||
.option("header", "false")
|
||||
.csv(testFile(carsFilteredOutFile))
|
||||
}.getMessage
|
||||
assert(e.contains("All paths were ignored:"))
|
||||
}
|
||||
|
||||
test("DDL test with empty file") {
|
||||
withView("carsTable") {
|
||||
spark.sql(
|
||||
|
|
Loading…
Reference in a new issue