From d2a535f85b14cd34174c8f3de5cb105964759fd6 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 10 May 2021 23:26:39 +0800 Subject: [PATCH] [SPARK-34246][FOLLOWUP] Change the definition of `findTightestCommonType` for backward compatibility ### What changes were proposed in this pull request? Change the definition of `findTightestCommonType` from ``` def findTightestCommonType(t1: DataType, t2: DataType): Option[DataType] ``` to ``` val findTightestCommonType: (DataType, DataType) => Option[DataType] ``` ### Why are the changes needed? For backward compatibility. When running a MongoDB connector (built with Spark 3.1.1) with the latest master, there is such an error ``` java.lang.NoSuchMethodError: org.apache.spark.sql.catalyst.analysis.TypeCoercion$.findTightestCommonType()Lscala/Function2 ``` from https://github.com/mongodb/mongo-spark/blob/master/src/main/scala/com/mongodb/spark/sql/MongoInferSchema.scala#L150 In the previous release, the function was ``` static public scala.Function2> findTightestCommonType () ``` After https://github.com/apache/spark/pull/31349, the function becomes: ``` static public scala.Option findTightestCommonType (org.apache.spark.sql.types.DataType t1, org.apache.spark.sql.types.DataType t2) ``` This PR is to reduce the unnecessary API change. ### Does this PR introduce _any_ user-facing change? Yes, the definition of `TypeCoercion.findTightestCommonType` is consistent with previous release again. ### How was this patch tested? Existing unit tests Closes #32493 from gengliangwang/typecoercion. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- .../catalyst/analysis/AnsiTypeCoercion.scala | 49 +++++++++---------- .../sql/catalyst/analysis/TypeCoercion.scala | 6 +-- 2 files changed, 25 insertions(+), 30 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala index cbeff72d2d..3732b2eb0e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala @@ -93,36 +93,33 @@ object AnsiTypeCoercion extends TypeCoercionBase { WindowFrameCoercion :: StringLiteralCoercion :: Nil) :: Nil - override def findTightestCommonType(t1: DataType, t2: DataType): Option[DataType] = { - (t1, t2) match { - case (t1, t2) if t1 == t2 => Some(t1) - case (NullType, t1) => Some(t1) - case (t1, NullType) => Some(t1) + val findTightestCommonType: (DataType, DataType) => Option[DataType] = { + case (t1, t2) if t1 == t2 => Some(t1) + case (NullType, t1) => Some(t1) + case (t1, NullType) => Some(t1) - case (t1: IntegralType, t2: DecimalType) if t2.isWiderThan(t1) => - Some(t2) - case (t1: DecimalType, t2: IntegralType) if t1.isWiderThan(t2) => - Some(t1) + case (t1: IntegralType, t2: DecimalType) if t2.isWiderThan(t1) => + Some(t2) + case (t1: DecimalType, t2: IntegralType) if t1.isWiderThan(t2) => + Some(t1) - case (t1: NumericType, t2: NumericType) - if !t1.isInstanceOf[DecimalType] && !t2.isInstanceOf[DecimalType] => - val index = numericPrecedence.lastIndexWhere(t => t == t1 || t == t2) - val widerType = numericPrecedence(index) - if (widerType == FloatType) { - // If the input type is an Integral type and a Float type, simply return Double type as - // the tightest common type to avoid potential precision loss on converting the Integral - // type as Float type. - Some(DoubleType) - } else { - Some(widerType) - } + case (t1: NumericType, t2: NumericType) + if !t1.isInstanceOf[DecimalType] && !t2.isInstanceOf[DecimalType] => + val index = numericPrecedence.lastIndexWhere(t => t == t1 || t == t2) + val widerType = numericPrecedence(index) + if (widerType == FloatType) { + // If the input type is an Integral type and a Float type, simply return Double type as + // the tightest common type to avoid potential precision loss on converting the Integral + // type as Float type. + Some(DoubleType) + } else { + Some(widerType) + } - case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) => - Some(TimestampType) - - case (t1, t2) => findTypeForComplex(t1, t2, findTightestCommonType) - } + case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) => + Some(TimestampType) + case (t1, t2) => findTypeForComplex(t1, t2, findTightestCommonType) } override def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 6ad84651c2..2aa6543a55 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -44,7 +44,7 @@ abstract class TypeCoercionBase { * with primitive types, because in that case the precision and scale of the result depends on * the operation. Those rules are implemented in [[DecimalPrecision]]. */ - def findTightestCommonType(type1: DataType, type2: DataType): Option[DataType] + val findTightestCommonType: (DataType, DataType) => Option[DataType] /** * Looking for a widened data type of two given data types with some acceptable loss of precision. @@ -845,8 +845,7 @@ object TypeCoercion extends TypeCoercionBase { FloatType, DoubleType) - override def findTightestCommonType(t1: DataType, t2: DataType): Option[DataType] = { - (t1, t2) match { + override val findTightestCommonType: (DataType, DataType) => Option[DataType] = { case (t1, t2) if t1 == t2 => Some(t1) case (NullType, t1) => Some(t1) case (t1, NullType) => Some(t1) @@ -866,7 +865,6 @@ object TypeCoercion extends TypeCoercionBase { Some(TimestampType) case (t1, t2) => findTypeForComplex(t1, t2, findTightestCommonType) - } } /** Promotes all the way to StringType. */