[SPARK-18333][SQL] Revert hacks in parquet and orc reader to support case insensitive resolution

## What changes were proposed in this pull request?

These are no longer needed after https://issues.apache.org/jira/browse/SPARK-17183

cc cloud-fan

## How was this patch tested?

Existing parquet and orc tests.

Author: Eric Liang <ekl@databricks.com>

Closes #15799 from ericl/sc-4929.
This commit is contained in:
Eric Liang 2016-11-09 15:00:46 +08:00 committed by Wenchen Fan
parent 55964c15a7
commit 4afa39e223
3 changed files with 2 additions and 44 deletions

View file

@ -269,15 +269,11 @@ private[parquet] object ParquetReadSupport {
*/
private def clipParquetGroupFields(
parquetRecord: GroupType, structType: StructType): Seq[Type] = {
val parquetFieldMap = parquetRecord.getFields.asScala
.map(f => f.getName -> f).toMap
val caseInsensitiveParquetFieldMap = parquetRecord.getFields.asScala
.map(f => f.getName.toLowerCase -> f).toMap
val parquetFieldMap = parquetRecord.getFields.asScala.map(f => f.getName -> f).toMap
val toParquet = new ParquetSchemaConverter(writeLegacyParquetFormat = false)
structType.map { f =>
parquetFieldMap
.get(f.name)
.orElse(caseInsensitiveParquetFieldMap.get(f.name.toLowerCase))
.map(clipParquetType(_, f.dataType))
.getOrElse(toParquet.convertField(f))
}

View file

@ -1080,34 +1080,6 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
}
}
testSchemaClipping(
"falls back to case insensitive resolution",
parquetSchema =
"""message root {
| required group A {
| optional int32 B;
| }
| optional int32 c;
|}
""".stripMargin,
catalystSchema = {
val nestedType = new StructType().add("b", IntegerType, nullable = true)
new StructType()
.add("a", nestedType, nullable = true)
.add("c", IntegerType, nullable = true)
},
expectedSchema =
"""message root {
| required group A {
| optional int32 B;
| }
| optional int32 c;
|}
""".stripMargin)
testSchemaClipping(
"simple nested struct",

View file

@ -305,17 +305,7 @@ private[orc] object OrcRelation extends HiveInspectors {
def setRequiredColumns(
conf: Configuration, physicalSchema: StructType, requestedSchema: StructType): Unit = {
val caseInsensitiveFieldMap: Map[String, Int] = physicalSchema.fieldNames
.zipWithIndex
.map(f => (f._1.toLowerCase, f._2))
.toMap
val ids = requestedSchema.map { a =>
val exactMatch: Option[Int] = physicalSchema.getFieldIndex(a.name)
val res = exactMatch.getOrElse(
caseInsensitiveFieldMap.getOrElse(a.name,
throw new IllegalArgumentException(s"""Field "$a.name" does not exist.""")))
res: Integer
}
val ids = requestedSchema.map(a => physicalSchema.fieldIndex(a.name): Integer)
val (sortedIDs, sortedNames) = ids.zip(requestedSchema.fieldNames).sorted.unzip
HiveShim.appendReadColumns(conf, sortedIDs, sortedNames)
}