[SPARK-8990] [SQL] SPARK-8990 DataFrameReader.parquet() should respect user specified options

Author: Cheng Lian <lian@databricks.com>

Closes #7347 from liancheng/spark-8990 and squashes the following commits:

045698c [Cheng Lian] SPARK-8990 DataFrameReader.parquet() should respect user specified options
This commit is contained in:
Cheng Lian 2015-07-10 16:49:45 -07:00 committed by Reynold Xin
parent fb8807c9b0
commit 857e325f30
2 changed files with 22 additions and 1 deletions

View file

@ -263,7 +263,7 @@ class DataFrameReader private[sql](sqlContext: SQLContext) {
val globbedPaths = paths.map(new Path(_)).flatMap(SparkHadoopUtil.get.globPath).toArray
sqlContext.baseRelationToDataFrame(
new ParquetRelation2(
globbedPaths.map(_.toString), None, None, Map.empty[String, String])(sqlContext))
globbedPaths.map(_.toString), None, None, extraOptions.toMap)(sqlContext))
}
}

View file

@ -142,6 +142,27 @@ class ParquetQuerySuiteBase extends QueryTest with ParquetTest {
testSchemaMerging(2)
}
}
test("SPARK-8990 DataFrameReader.parquet() should respect user specified options") {
withTempPath { dir =>
val basePath = dir.getCanonicalPath
sqlContext.range(0, 10).toDF("a").write.parquet(new Path(basePath, "foo=1").toString)
sqlContext.range(0, 10).toDF("b").write.parquet(new Path(basePath, "foo=a").toString)
// Disables the global SQL option for schema merging
withSQLConf(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED.key -> "false") {
assertResult(2) {
// Disables schema merging via data source option
sqlContext.read.option("mergeSchema", "false").parquet(basePath).columns.length
}
assertResult(3) {
// Enables schema merging via data source option
sqlContext.read.option("mergeSchema", "true").parquet(basePath).columns.length
}
}
}
}
}
class ParquetDataSourceOnQuerySuite extends ParquetQuerySuiteBase with BeforeAndAfterAll {