[SPARK-23094][SPARK-23723][SPARK-23724][SQL][FOLLOW-UP] Support custom encoding for json files
## What changes were proposed in this pull request? This is to add a test case to check the behaviors when users write json in the specified UTF-16/UTF-32 encoding with multiline off. ## How was this patch tested? N/A Author: gatorsmile <gatorsmile@gmail.com> Closes #21254 from gatorsmile/followupSPARK-23094.
This commit is contained in:
parent
b54bbe57b3
commit
2f6fe7d679
|
@ -110,11 +110,12 @@ private[sql] class JSONOptions(
|
|||
val blacklist = Seq(Charset.forName("UTF-16"), Charset.forName("UTF-32"))
|
||||
val isBlacklisted = blacklist.contains(Charset.forName(enc))
|
||||
require(multiLine || !isBlacklisted,
|
||||
s"""The ${enc} encoding must not be included in the blacklist when multiLine is disabled:
|
||||
| ${blacklist.mkString(", ")}""".stripMargin)
|
||||
s"""The $enc encoding in the blacklist is not allowed when multiLine is disabled.
|
||||
|Blacklist: ${blacklist.mkString(", ")}""".stripMargin)
|
||||
|
||||
val isLineSepRequired =
|
||||
multiLine || Charset.forName(enc) == StandardCharsets.UTF_8 || lineSeparator.nonEmpty
|
||||
|
||||
val isLineSepRequired = !(multiLine == false &&
|
||||
Charset.forName(enc) != StandardCharsets.UTF_8 && lineSeparator.isEmpty)
|
||||
require(isLineSepRequired, s"The lineSep option must be specified for the $enc encoding")
|
||||
|
||||
enc
|
||||
|
|
|
@ -2313,6 +2313,25 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
|
|||
}
|
||||
}
|
||||
|
||||
test("SPARK-23723: write json in UTF-16/32 with multiline off") {
|
||||
Seq("UTF-16", "UTF-32").foreach { encoding =>
|
||||
withTempPath { path =>
|
||||
val ds = spark.createDataset(Seq(
|
||||
("a", 1), ("b", 2), ("c", 3))
|
||||
).repartition(2)
|
||||
val e = intercept[IllegalArgumentException] {
|
||||
ds.write
|
||||
.option("encoding", encoding)
|
||||
.option("multiline", "false")
|
||||
.format("json").mode("overwrite")
|
||||
.save(path.getCanonicalPath)
|
||||
}.getMessage
|
||||
assert(e.contains(
|
||||
s"$encoding encoding in the blacklist is not allowed when multiLine is disabled"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def checkReadJson(lineSep: String, encoding: String, inferSchema: Boolean, id: Int): Unit = {
|
||||
test(s"SPARK-23724: checks reading json in ${encoding} #${id}") {
|
||||
val schema = new StructType().add("f1", StringType).add("f2", IntegerType)
|
||||
|
|
Loading…
Reference in a new issue