[SPARK-8990] [SQL] SPARK-8990 DataFrameReader.parquet() should respect user specified options

Author: Cheng Lian <lian@databricks.com> Closes #7347 from liancheng/spark-8990 and squashes the following commits: 045698c [Cheng Lian] SPARK-8990 DataFrameReader.parquet() should respect user specified options
2015-07-10 16:49:45 -07:00 · 2015-07-10 16:49:45 -07:00 · 857e325f30
parent fb8807c9b0
commit 857e325f30
2 changed files with 22 additions and 1 deletions
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@ -263,7 +263,7 @@ class DataFrameReader private[sql](sqlContext: SQLContext) {
      val globbedPaths = paths.map(new Path(_)).flatMap(SparkHadoopUtil.get.globPath).toArray
      sqlContext.baseRelationToDataFrame(
        new ParquetRelation2(
-          globbedPaths.map(_.toString), None, None, Map.empty[String, String])(sqlContext))
+          globbedPaths.map(_.toString), None, None, extraOptions.toMap)(sqlContext))
    }
  }

--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
@ -142,6 +142,27 @@ class ParquetQuerySuiteBase extends QueryTest with ParquetTest {
      testSchemaMerging(2)
    }
  }
+
+  test("SPARK-8990 DataFrameReader.parquet() should respect user specified options") {
+    withTempPath { dir =>
+      val basePath = dir.getCanonicalPath
+      sqlContext.range(0, 10).toDF("a").write.parquet(new Path(basePath, "foo=1").toString)
+      sqlContext.range(0, 10).toDF("b").write.parquet(new Path(basePath, "foo=a").toString)
+
+      // Disables the global SQL option for schema merging
+      withSQLConf(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED.key -> "false") {
+        assertResult(2) {
+          // Disables schema merging via data source option
+          sqlContext.read.option("mergeSchema", "false").parquet(basePath).columns.length
+        }
+
+        assertResult(3) {
+          // Enables schema merging via data source option
+          sqlContext.read.option("mergeSchema", "true").parquet(basePath).columns.length
+        }
+      }
+    }
+  }
 }

 class ParquetDataSourceOnQuerySuite extends ParquetQuerySuiteBase with BeforeAndAfterAll {