[SPARK-10113][SQL] Explicit error message for unsigned Parquet logical types
Parquet supports some unsigned datatypes. However, Since Spark does not support unsigned datatypes, it needs to emit an exception with a clear message rather then with the one saying illegal datatype. Author: hyukjinkwon <gurwls223@gmail.com> Closes #9646 from HyukjinKwon/SPARK-10113.
This commit is contained in:
parent
4fe99c72c6
commit
f5a9526fec
|
@ -108,6 +108,9 @@ private[parquet] class CatalystSchemaConverter(
|
||||||
def typeString =
|
def typeString =
|
||||||
if (originalType == null) s"$typeName" else s"$typeName ($originalType)"
|
if (originalType == null) s"$typeName" else s"$typeName ($originalType)"
|
||||||
|
|
||||||
|
def typeNotSupported() =
|
||||||
|
throw new AnalysisException(s"Parquet type not supported: $typeString")
|
||||||
|
|
||||||
def typeNotImplemented() =
|
def typeNotImplemented() =
|
||||||
throw new AnalysisException(s"Parquet type not yet supported: $typeString")
|
throw new AnalysisException(s"Parquet type not yet supported: $typeString")
|
||||||
|
|
||||||
|
@ -142,6 +145,9 @@ private[parquet] class CatalystSchemaConverter(
|
||||||
case INT_32 | null => IntegerType
|
case INT_32 | null => IntegerType
|
||||||
case DATE => DateType
|
case DATE => DateType
|
||||||
case DECIMAL => makeDecimalType(MAX_PRECISION_FOR_INT32)
|
case DECIMAL => makeDecimalType(MAX_PRECISION_FOR_INT32)
|
||||||
|
case UINT_8 => typeNotSupported()
|
||||||
|
case UINT_16 => typeNotSupported()
|
||||||
|
case UINT_32 => typeNotSupported()
|
||||||
case TIME_MILLIS => typeNotImplemented()
|
case TIME_MILLIS => typeNotImplemented()
|
||||||
case _ => illegalType()
|
case _ => illegalType()
|
||||||
}
|
}
|
||||||
|
@ -150,6 +156,7 @@ private[parquet] class CatalystSchemaConverter(
|
||||||
originalType match {
|
originalType match {
|
||||||
case INT_64 | null => LongType
|
case INT_64 | null => LongType
|
||||||
case DECIMAL => makeDecimalType(MAX_PRECISION_FOR_INT64)
|
case DECIMAL => makeDecimalType(MAX_PRECISION_FOR_INT64)
|
||||||
|
case UINT_64 => typeNotSupported()
|
||||||
case TIMESTAMP_MILLIS => typeNotImplemented()
|
case TIMESTAMP_MILLIS => typeNotImplemented()
|
||||||
case _ => illegalType()
|
case _ => illegalType()
|
||||||
}
|
}
|
||||||
|
|
|
@ -206,6 +206,30 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("SPARK-10113 Support for unsigned Parquet logical types") {
|
||||||
|
val parquetSchema = MessageTypeParser.parseMessageType(
|
||||||
|
"""message root {
|
||||||
|
| required int32 c(UINT_32);
|
||||||
|
|}
|
||||||
|
""".stripMargin)
|
||||||
|
|
||||||
|
withTempPath { location =>
|
||||||
|
val extraMetadata = Map.empty[String, String].asJava
|
||||||
|
val fileMetadata = new FileMetaData(parquetSchema, extraMetadata, "Spark")
|
||||||
|
val path = new Path(location.getCanonicalPath)
|
||||||
|
val footer = List(
|
||||||
|
new Footer(path, new ParquetMetadata(fileMetadata, Collections.emptyList()))
|
||||||
|
).asJava
|
||||||
|
|
||||||
|
ParquetFileWriter.writeMetadataFile(sparkContext.hadoopConfiguration, path, footer)
|
||||||
|
|
||||||
|
val errorMessage = intercept[Throwable] {
|
||||||
|
sqlContext.read.parquet(path.toString).printSchema()
|
||||||
|
}.toString
|
||||||
|
assert(errorMessage.contains("Parquet type not supported"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
test("compression codec") {
|
test("compression codec") {
|
||||||
def compressionCodecFor(path: String, codecName: String): String = {
|
def compressionCodecFor(path: String, codecName: String): String = {
|
||||||
val codecs = for {
|
val codecs = for {
|
||||||
|
|
Loading…
Reference in a new issue