From 4afa39e223c70e91b6ee19e9ea76fa9115203d74 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Wed, 9 Nov 2016 15:00:46 +0800 Subject: [PATCH] [SPARK-18333][SQL] Revert hacks in parquet and orc reader to support case insensitive resolution ## What changes were proposed in this pull request? These are no longer needed after https://issues.apache.org/jira/browse/SPARK-17183 cc cloud-fan ## How was this patch tested? Existing parquet and orc tests. Author: Eric Liang Closes #15799 from ericl/sc-4929. --- .../parquet/ParquetReadSupport.scala | 6 +--- .../parquet/ParquetSchemaSuite.scala | 28 ------------------- .../spark/sql/hive/orc/OrcFileFormat.scala | 12 +------- 3 files changed, 2 insertions(+), 44 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala index 4dea8cf29e..f1a35dd8a6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala @@ -269,15 +269,11 @@ private[parquet] object ParquetReadSupport { */ private def clipParquetGroupFields( parquetRecord: GroupType, structType: StructType): Seq[Type] = { - val parquetFieldMap = parquetRecord.getFields.asScala - .map(f => f.getName -> f).toMap - val caseInsensitiveParquetFieldMap = parquetRecord.getFields.asScala - .map(f => f.getName.toLowerCase -> f).toMap + val parquetFieldMap = parquetRecord.getFields.asScala.map(f => f.getName -> f).toMap val toParquet = new ParquetSchemaConverter(writeLegacyParquetFormat = false) structType.map { f => parquetFieldMap .get(f.name) - .orElse(caseInsensitiveParquetFieldMap.get(f.name.toLowerCase)) .map(clipParquetType(_, f.dataType)) .getOrElse(toParquet.convertField(f)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala index c3d202ced2..8a980a7eb5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala @@ -1080,34 +1080,6 @@ class ParquetSchemaSuite extends ParquetSchemaTest { } } - testSchemaClipping( - "falls back to case insensitive resolution", - - parquetSchema = - """message root { - | required group A { - | optional int32 B; - | } - | optional int32 c; - |} - """.stripMargin, - - catalystSchema = { - val nestedType = new StructType().add("b", IntegerType, nullable = true) - new StructType() - .add("a", nestedType, nullable = true) - .add("c", IntegerType, nullable = true) - }, - - expectedSchema = - """message root { - | required group A { - | optional int32 B; - | } - | optional int32 c; - |} - """.stripMargin) - testSchemaClipping( "simple nested struct", diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index 7c519a0743..42c92ed5ca 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -305,17 +305,7 @@ private[orc] object OrcRelation extends HiveInspectors { def setRequiredColumns( conf: Configuration, physicalSchema: StructType, requestedSchema: StructType): Unit = { - val caseInsensitiveFieldMap: Map[String, Int] = physicalSchema.fieldNames - .zipWithIndex - .map(f => (f._1.toLowerCase, f._2)) - .toMap - val ids = requestedSchema.map { a => - val exactMatch: Option[Int] = physicalSchema.getFieldIndex(a.name) - val res = exactMatch.getOrElse( - caseInsensitiveFieldMap.getOrElse(a.name, - throw new IllegalArgumentException(s"""Field "$a.name" does not exist."""))) - res: Integer - } + val ids = requestedSchema.map(a => physicalSchema.fieldIndex(a.name): Integer) val (sortedIDs, sortedNames) = ids.zip(requestedSchema.fieldNames).sorted.unzip HiveShim.appendReadColumns(conf, sortedIDs, sortedNames) }