[SPARK-23094][SPARK-23723][SPARK-23724][SQL][FOLLOW-UP] Support custom encoding for json files

## What changes were proposed in this pull request?
This is to add a test case to check the behaviors when users write json in the specified UTF-16/UTF-32 encoding with multiline off.

## How was this patch tested?
N/A

Author: gatorsmile <gatorsmile@gmail.com>

Closes #21254 from gatorsmile/followupSPARK-23094.
This commit is contained in:
gatorsmile 2018-05-08 21:24:35 +08:00 committed by hyukjinkwon
parent b54bbe57b3
commit 2f6fe7d679
2 changed files with 24 additions and 4 deletions

View file

@ -110,11 +110,12 @@ private[sql] class JSONOptions(
val blacklist = Seq(Charset.forName("UTF-16"), Charset.forName("UTF-32"))
val isBlacklisted = blacklist.contains(Charset.forName(enc))
require(multiLine || !isBlacklisted,
s"""The ${enc} encoding must not be included in the blacklist when multiLine is disabled:
| ${blacklist.mkString(", ")}""".stripMargin)
s"""The $enc encoding in the blacklist is not allowed when multiLine is disabled.
|Blacklist: ${blacklist.mkString(", ")}""".stripMargin)
val isLineSepRequired =
multiLine || Charset.forName(enc) == StandardCharsets.UTF_8 || lineSeparator.nonEmpty
val isLineSepRequired = !(multiLine == false &&
Charset.forName(enc) != StandardCharsets.UTF_8 && lineSeparator.isEmpty)
require(isLineSepRequired, s"The lineSep option must be specified for the $enc encoding")
enc

View file

@ -2313,6 +2313,25 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
}
}
test("SPARK-23723: write json in UTF-16/32 with multiline off") {
Seq("UTF-16", "UTF-32").foreach { encoding =>
withTempPath { path =>
val ds = spark.createDataset(Seq(
("a", 1), ("b", 2), ("c", 3))
).repartition(2)
val e = intercept[IllegalArgumentException] {
ds.write
.option("encoding", encoding)
.option("multiline", "false")
.format("json").mode("overwrite")
.save(path.getCanonicalPath)
}.getMessage
assert(e.contains(
s"$encoding encoding in the blacklist is not allowed when multiLine is disabled"))
}
}
}
def checkReadJson(lineSep: String, encoding: String, inferSchema: Boolean, id: Int): Unit = {
test(s"SPARK-23724: checks reading json in ${encoding} #${id}") {
val schema = new StructType().add("f1", StringType).add("f2", IntegerType)