From 61b80d552aafb262b5f817f7bc9c0acd0328715b Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 3 Jun 2016 14:26:24 -0700 Subject: [PATCH] [SPARK-15547][SQL] nested case class in encoder can have different number of fields from the real schema ## What changes were proposed in this pull request? There are 2 kinds of `GetStructField`: 1. resolved from `UnresolvedExtractValue`, and it will have a `name` property. 2. created when we build deserializer expression for nested tuple, no `name` property. When we want to validate the ordinals of nested tuple, we should only catch `GetStructField` without the name property. ## How was this patch tested? new test in `EncoderResolutionSuite` Author: Wenchen Fan Closes #13474 from cloud-fan/ordinal-check. --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 7 ++++++- .../sql/catalyst/encoders/EncoderResolutionSuite.scala | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 4f6b4830cd..0e68656467 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1964,7 +1964,12 @@ class Analyzer( */ private def validateNestedTupleFields(deserializer: Expression): Unit = { val structChildToOrdinals = deserializer - .collect { case g: GetStructField => g } + // There are 2 kinds of `GetStructField`: + // 1. resolved from `UnresolvedExtractValue`, and it will have a `name` property. + // 2. created when we build deserializer expression for nested tuple, no `name` property. + // Here we want to validate the ordinals of nested tuple, so we should only catch + // `GetStructField` without the name property. + .collect { case g: GetStructField if g.name.isEmpty => g } .groupBy(_.child) .mapValues(_.map(_.ordinal).distinct.sorted) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala index 7251202c7b..802397d50e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala @@ -115,6 +115,12 @@ class EncoderResolutionSuite extends PlanTest { } } + test("nested case class can have different number of fields from the real schema") { + val encoder = ExpressionEncoder[(String, StringIntClass)] + val attrs = Seq('a.string, 'b.struct('a.string, 'b.int, 'c.int)) + encoder.resolveAndBind(attrs) + } + test("throw exception if real type is not compatible with encoder schema") { val msg1 = intercept[AnalysisException] { ExpressionEncoder[StringIntClass].resolveAndBind(Seq('a.string, 'b.long))