[SPARK-18333][SQL] Revert hacks in parquet and orc reader to support case insensitive resolution
## What changes were proposed in this pull request? These are no longer needed after https://issues.apache.org/jira/browse/SPARK-17183 cc cloud-fan ## How was this patch tested? Existing parquet and orc tests. Author: Eric Liang <ekl@databricks.com> Closes #15799 from ericl/sc-4929.
This commit is contained in:
parent
55964c15a7
commit
4afa39e223
|
@ -269,15 +269,11 @@ private[parquet] object ParquetReadSupport {
|
|||
*/
|
||||
private def clipParquetGroupFields(
|
||||
parquetRecord: GroupType, structType: StructType): Seq[Type] = {
|
||||
val parquetFieldMap = parquetRecord.getFields.asScala
|
||||
.map(f => f.getName -> f).toMap
|
||||
val caseInsensitiveParquetFieldMap = parquetRecord.getFields.asScala
|
||||
.map(f => f.getName.toLowerCase -> f).toMap
|
||||
val parquetFieldMap = parquetRecord.getFields.asScala.map(f => f.getName -> f).toMap
|
||||
val toParquet = new ParquetSchemaConverter(writeLegacyParquetFormat = false)
|
||||
structType.map { f =>
|
||||
parquetFieldMap
|
||||
.get(f.name)
|
||||
.orElse(caseInsensitiveParquetFieldMap.get(f.name.toLowerCase))
|
||||
.map(clipParquetType(_, f.dataType))
|
||||
.getOrElse(toParquet.convertField(f))
|
||||
}
|
||||
|
|
|
@ -1080,34 +1080,6 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
|
|||
}
|
||||
}
|
||||
|
||||
testSchemaClipping(
|
||||
"falls back to case insensitive resolution",
|
||||
|
||||
parquetSchema =
|
||||
"""message root {
|
||||
| required group A {
|
||||
| optional int32 B;
|
||||
| }
|
||||
| optional int32 c;
|
||||
|}
|
||||
""".stripMargin,
|
||||
|
||||
catalystSchema = {
|
||||
val nestedType = new StructType().add("b", IntegerType, nullable = true)
|
||||
new StructType()
|
||||
.add("a", nestedType, nullable = true)
|
||||
.add("c", IntegerType, nullable = true)
|
||||
},
|
||||
|
||||
expectedSchema =
|
||||
"""message root {
|
||||
| required group A {
|
||||
| optional int32 B;
|
||||
| }
|
||||
| optional int32 c;
|
||||
|}
|
||||
""".stripMargin)
|
||||
|
||||
testSchemaClipping(
|
||||
"simple nested struct",
|
||||
|
||||
|
|
|
@ -305,17 +305,7 @@ private[orc] object OrcRelation extends HiveInspectors {
|
|||
|
||||
def setRequiredColumns(
|
||||
conf: Configuration, physicalSchema: StructType, requestedSchema: StructType): Unit = {
|
||||
val caseInsensitiveFieldMap: Map[String, Int] = physicalSchema.fieldNames
|
||||
.zipWithIndex
|
||||
.map(f => (f._1.toLowerCase, f._2))
|
||||
.toMap
|
||||
val ids = requestedSchema.map { a =>
|
||||
val exactMatch: Option[Int] = physicalSchema.getFieldIndex(a.name)
|
||||
val res = exactMatch.getOrElse(
|
||||
caseInsensitiveFieldMap.getOrElse(a.name,
|
||||
throw new IllegalArgumentException(s"""Field "$a.name" does not exist.""")))
|
||||
res: Integer
|
||||
}
|
||||
val ids = requestedSchema.map(a => physicalSchema.fieldIndex(a.name): Integer)
|
||||
val (sortedIDs, sortedNames) = ids.zip(requestedSchema.fieldNames).sorted.unzip
|
||||
HiveShim.appendReadColumns(conf, sortedIDs, sortedNames)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue