From 771356555c1110b898ff09ea23fe0b00749caefd Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 29 Apr 2021 04:51:27 +0000 Subject: [PATCH] [SPARK-34786][SQL][FOLLOWUP] Explicitly declare DecimalType(20, 0) for Parquet UINT_64 ### What changes were proposed in this pull request? Explicitly declare DecimalType(20, 0) for Parquet UINT_64, avoid use DecimalType.LongDecimal which only happens to have 20 as precision. https://github.com/apache/spark/pull/31960#discussion_r622691560 ### Why are the changes needed? fix ambiguity ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? not needed, just current CI pass Closes #32390 from yaooqinn/SPARK-34786-F. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../datasources/parquet/ParquetSchemaConverter.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala index 8c4e0881e0..e751c97a60 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala @@ -141,7 +141,9 @@ class ParquetToSparkSchemaConverter( originalType match { case INT_64 | null => LongType case DECIMAL => makeDecimalType(Decimal.MAX_LONG_DIGITS) - case UINT_64 => DecimalType.LongDecimal + // The precision to hold the largest unsigned long is: + // `java.lang.Long.toUnsignedString(-1).length` = 20 + case UINT_64 => DecimalType(20, 0) case TIMESTAMP_MICROS => TimestampType case TIMESTAMP_MILLIS => TimestampType case _ => illegalType()