[SPARK-13114][SQL] Add a test for tokens more than the fields in schema

https://issues.apache.org/jira/browse/SPARK-13114 This PR adds a test for tokens more than the fields in schema. Author: hyukjinkwon <gurwls223@gmail.com> Closes #11020 from HyukjinKwon/SPARK-13114.
2016-02-02 10:41:06 -08:00 · 2016-02-02 10:41:06 -08:00 · b93830126c
parent 29d92181d0
commit b93830126c
2 changed files with 18 additions and 0 deletions
--- a/sql/core/src/test/resources/cars-malformed.csv
+++ b/sql/core/src/test/resources/cars-malformed.csv
@ -0,0 +1,6 @@
+~ All the rows here are malformed having tokens more than the schema (header).
+year,make,model,comment,blank
+"2012","Tesla","S","No comment",,null,null
+
+1997,Ford,E350,"Go get one now they are going fast",,null,null
+2015,Chevy,,,,
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@ -28,6 +28,7 @@ import org.apache.spark.sql.types._

 class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
  private val carsFile = "cars.csv"
+  private val carsMalformedFile = "cars-malformed.csv"
  private val carsFile8859 = "cars_iso-8859-1.csv"
  private val carsTsvFile = "cars.tsv"
  private val carsAltFile = "cars-alternative.csv"
@ -191,6 +192,17 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
    assert(exception.getMessage.contains("Malformed line in FAILFAST mode: 2015,Chevy,Volt"))
  }

+  test("test for tokens more than the fields in the schema") {
+    val cars = sqlContext
+      .read
+      .format("csv")
+      .option("header", "false")
+      .option("comment", "~")
+      .load(testFile(carsMalformedFile))
+
+    verifyCars(cars, withHeader = false, checkTypes = false)
+  }
+
  test("test with null quote character") {
    val cars = sqlContext.read
      .format("csv")