[SPARK-31361][SQL][TESTS][FOLLOWUP] Check non-vectorized Parquet reader while date/timestamp rebasing

### What changes were proposed in this pull request?
In PR, I propose to modify two tests of `ParquetIOSuite`:
- SPARK-31159: rebasing timestamps in write
- SPARK-31159: rebasing dates in write

to check non-vectorized Parquet reader together with vectorized reader.

### Why are the changes needed?
To improve test coverage and make sure that non-vectorized reader behaves similar to the vectorized reader.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running `PaquetIOSuite`:
```
$ ./build/sbt "test:testOnly *ParquetIOSuite"
```

Closes #28466 from MaxGekk/test-novec-rebase-ParquetIOSuite.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Max Gekk 2020-05-07 07:52:29 +00:00 committed by Wenchen Fan
parent b31ae7bb0b
commit 272d229005

View file

@ -952,18 +952,24 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
.write
.parquet(path)
}
// The file metadata indicates if it needs rebase or not, so we can always get the
// correct result regardless of the "rebaseInRead" config.
Seq(true, false).foreach { rebase =>
withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) {
checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(tsStr)))
}
}
// Force to not rebase to prove the written datetime values are rebased and we will get
// wrong result if we don't rebase while reading.
withSQLConf("spark.test.forceNoRebase" -> "true") {
checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(nonRebased)))
Seq(false, true).foreach { vectorized =>
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
// The file metadata indicates if it needs rebase or not, so we can always get the
// correct result regardless of the "rebaseInRead" config.
Seq(true, false).foreach { rebase =>
withSQLConf(
SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) {
checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(tsStr)))
}
}
// Force to not rebase to prove the written datetime values are rebased
// and we will get wrong result if we don't rebase while reading.
withSQLConf("spark.test.forceNoRebase" -> "true") {
checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(nonRebased)))
}
}
}
}
}
@ -981,18 +987,22 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
.parquet(path)
}
// The file metadata indicates if it needs rebase or not, so we can always get the correct
// result regardless of the "rebaseInRead" config.
Seq(true, false).foreach { rebase =>
withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) {
checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-01")))
}
}
Seq(false, true).foreach { vectorized =>
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
// The file metadata indicates if it needs rebase or not, so we can always get the correct
// result regardless of the "rebaseInRead" config.
Seq(true, false).foreach { rebase =>
withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) {
checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-01")))
}
}
// Force to not rebase to prove the written datetime values are rebased and we will get
// wrong result if we don't rebase while reading.
withSQLConf("spark.test.forceNoRebase" -> "true") {
checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-07")))
// Force to not rebase to prove the written datetime values are rebased and we will get
// wrong result if we don't rebase while reading.
withSQLConf("spark.test.forceNoRebase" -> "true") {
checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-07")))
}
}
}
}
}