[SPARK-13114][SQL] Add a test for tokens more than the fields in schema

https://issues.apache.org/jira/browse/SPARK-13114

This PR adds a test for tokens more than the fields in schema.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #11020 from HyukjinKwon/SPARK-13114.
This commit is contained in:
hyukjinkwon 2016-02-02 10:41:06 -08:00 committed by Reynold Xin
parent 29d92181d0
commit b93830126c
2 changed files with 18 additions and 0 deletions

View file

@ -0,0 +1,6 @@
~ All the rows here are malformed having tokens more than the schema (header).
year,make,model,comment,blank
"2012","Tesla","S","No comment",,null,null
1997,Ford,E350,"Go get one now they are going fast",,null,null
2015,Chevy,,,,
Can't render this file because it has a wrong number of fields in line 2.

View file

@ -28,6 +28,7 @@ import org.apache.spark.sql.types._
class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
private val carsFile = "cars.csv"
private val carsMalformedFile = "cars-malformed.csv"
private val carsFile8859 = "cars_iso-8859-1.csv"
private val carsTsvFile = "cars.tsv"
private val carsAltFile = "cars-alternative.csv"
@ -191,6 +192,17 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
assert(exception.getMessage.contains("Malformed line in FAILFAST mode: 2015,Chevy,Volt"))
}
test("test for tokens more than the fields in the schema") {
val cars = sqlContext
.read
.format("csv")
.option("header", "false")
.option("comment", "~")
.load(testFile(carsMalformedFile))
verifyCars(cars, withHeader = false, checkTypes = false)
}
test("test with null quote character") {
val cars = sqlContext.read
.format("csv")