[SPARK-32810][SQL][TESTS][FOLLOWUP] Check path globbing in JSON/CSV datasources v1 and v2
### What changes were proposed in this pull request? In the PR, I propose to move the test `SPARK-32810: CSV and JSON data sources should be able to read files with escaped glob metacharacter in the paths` from `DataFrameReaderWriterSuite` to `CSVSuite` and to `JsonSuite`. This will allow to run the same test in `CSVv1Suite`/`CSVv2Suite` and in `JsonV1Suite`/`JsonV2Suite`. ### Why are the changes needed? To improve test coverage by checking JSON/CSV datasources v1 and v2. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running affected test suites: ``` $ build/sbt "sql/test:testOnly org.apache.spark.sql.execution.datasources.csv.*" $ build/sbt "sql/test:testOnly org.apache.spark.sql.execution.datasources.json.*" ``` Closes #29684 from MaxGekk/globbing-paths-when-inferring-schema-dsv2. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
parent
e8634d8f6f
commit
adc8d687ce
|
@ -2407,6 +2407,19 @@ abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvDa
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("SPARK-32810: CSV data source should be able to read files with " +
|
||||||
|
"escaped glob metacharacter in the paths") {
|
||||||
|
withTempDir { dir =>
|
||||||
|
val basePath = dir.getCanonicalPath
|
||||||
|
// test CSV writer / reader without specifying schema
|
||||||
|
val csvTableName = "[abc]"
|
||||||
|
spark.range(3).coalesce(1).write.csv(s"$basePath/$csvTableName")
|
||||||
|
val readback = spark.read
|
||||||
|
.csv(s"$basePath/${"""(\[|\]|\{|\})""".r.replaceAllIn(csvTableName, """\\$1""")}")
|
||||||
|
assert(readback.collect sameElements Array(Row("0"), Row("1"), Row("2")))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class CSVv1Suite extends CSVSuite {
|
class CSVv1Suite extends CSVSuite {
|
||||||
|
|
|
@ -2824,6 +2824,19 @@ abstract class JsonSuite extends QueryTest with SharedSparkSession with TestJson
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("SPARK-32810: JSON data source should be able to read files with " +
|
||||||
|
"escaped glob metacharacter in the paths") {
|
||||||
|
withTempDir { dir =>
|
||||||
|
val basePath = dir.getCanonicalPath
|
||||||
|
// test JSON writer / reader without specifying schema
|
||||||
|
val jsonTableName = "{def}"
|
||||||
|
spark.range(3).coalesce(1).write.json(s"$basePath/$jsonTableName")
|
||||||
|
val readback = spark.read
|
||||||
|
.json(s"$basePath/${"""(\[|\]|\{|\})""".r.replaceAllIn(jsonTableName, """\\$1""")}")
|
||||||
|
assert(readback.collect sameElements Array(Row(0), Row(1), Row(2)))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class JsonV1Suite extends JsonSuite {
|
class JsonV1Suite extends JsonSuite {
|
||||||
|
|
|
@ -1184,27 +1184,4 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
|
||||||
verifyLoadFails(df.write.option("path", path).parquet(path))
|
verifyLoadFails(df.write.option("path", path).parquet(path))
|
||||||
verifyLoadFails(df.write.option("path", path).format("parquet").save(path))
|
verifyLoadFails(df.write.option("path", path).format("parquet").save(path))
|
||||||
}
|
}
|
||||||
|
|
||||||
test("SPARK-32810: CSV and JSON data sources should be able to read files with " +
|
|
||||||
"escaped glob metacharacter in the paths") {
|
|
||||||
def escape(str: String): String = {
|
|
||||||
"""(\[|\]|\{|\})""".r.replaceAllIn(str, """\\$1""")
|
|
||||||
}
|
|
||||||
|
|
||||||
withTempDir { dir =>
|
|
||||||
val basePath = dir.getCanonicalPath
|
|
||||||
|
|
||||||
// test CSV writer / reader without specifying schema
|
|
||||||
val csvTableName = "[abc]"
|
|
||||||
spark.range(3).coalesce(1).write.csv(s"$basePath/$csvTableName")
|
|
||||||
val csvDf = spark.read.csv(s"$basePath/${escape(csvTableName)}")
|
|
||||||
assert(csvDf.collect sameElements Array(Row("0"), Row("1"), Row("2")))
|
|
||||||
|
|
||||||
// test JSON writer / reader without specifying schema
|
|
||||||
val jsonTableName = "{def}"
|
|
||||||
spark.range(3).coalesce(1).write.json(s"$basePath/$jsonTableName")
|
|
||||||
val jsonDf = spark.read.json(s"$basePath/${escape(jsonTableName)}")
|
|
||||||
assert(jsonDf.collect sameElements Array(Row(0), Row(1), Row(2)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue