[SPARK-31361][SQL][TESTS][FOLLOWUP] Check non-vectorized Parquet reader while date/timestamp rebasing
### What changes were proposed in this pull request? In PR, I propose to modify two tests of `ParquetIOSuite`: - SPARK-31159: rebasing timestamps in write - SPARK-31159: rebasing dates in write to check non-vectorized Parquet reader together with vectorized reader. ### Why are the changes needed? To improve test coverage and make sure that non-vectorized reader behaves similar to the vectorized reader. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running `PaquetIOSuite`: ``` $ ./build/sbt "test:testOnly *ParquetIOSuite" ``` Closes #28466 from MaxGekk/test-novec-rebase-ParquetIOSuite. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
b31ae7bb0b
commit
272d229005
|
@ -952,18 +952,24 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
|
|||
.write
|
||||
.parquet(path)
|
||||
}
|
||||
// The file metadata indicates if it needs rebase or not, so we can always get the
|
||||
// correct result regardless of the "rebaseInRead" config.
|
||||
Seq(true, false).foreach { rebase =>
|
||||
withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) {
|
||||
checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(tsStr)))
|
||||
}
|
||||
}
|
||||
|
||||
// Force to not rebase to prove the written datetime values are rebased and we will get
|
||||
// wrong result if we don't rebase while reading.
|
||||
withSQLConf("spark.test.forceNoRebase" -> "true") {
|
||||
checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(nonRebased)))
|
||||
Seq(false, true).foreach { vectorized =>
|
||||
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
|
||||
// The file metadata indicates if it needs rebase or not, so we can always get the
|
||||
// correct result regardless of the "rebaseInRead" config.
|
||||
Seq(true, false).foreach { rebase =>
|
||||
withSQLConf(
|
||||
SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) {
|
||||
checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(tsStr)))
|
||||
}
|
||||
}
|
||||
|
||||
// Force to not rebase to prove the written datetime values are rebased
|
||||
// and we will get wrong result if we don't rebase while reading.
|
||||
withSQLConf("spark.test.forceNoRebase" -> "true") {
|
||||
checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(nonRebased)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -981,18 +987,22 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
|
|||
.parquet(path)
|
||||
}
|
||||
|
||||
// The file metadata indicates if it needs rebase or not, so we can always get the correct
|
||||
// result regardless of the "rebaseInRead" config.
|
||||
Seq(true, false).foreach { rebase =>
|
||||
withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) {
|
||||
checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-01")))
|
||||
}
|
||||
}
|
||||
Seq(false, true).foreach { vectorized =>
|
||||
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
|
||||
// The file metadata indicates if it needs rebase or not, so we can always get the correct
|
||||
// result regardless of the "rebaseInRead" config.
|
||||
Seq(true, false).foreach { rebase =>
|
||||
withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) {
|
||||
checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-01")))
|
||||
}
|
||||
}
|
||||
|
||||
// Force to not rebase to prove the written datetime values are rebased and we will get
|
||||
// wrong result if we don't rebase while reading.
|
||||
withSQLConf("spark.test.forceNoRebase" -> "true") {
|
||||
checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-07")))
|
||||
// Force to not rebase to prove the written datetime values are rebased and we will get
|
||||
// wrong result if we don't rebase while reading.
|
||||
withSQLConf("spark.test.forceNoRebase" -> "true") {
|
||||
checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-07")))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue