[SPARK-31989][SQL] Generate JSON rebasing files w/ 30 minutes step

### What changes were proposed in this pull request?
1. Change the max step from 1 week to 30 minutes in the tests `RebaseDateTimeSuite`.`generate 'gregorian-julian-rebase-micros.json'` and `generate 'julian-gregorian-rebase-micros.json'`.
2. Parallelise JSON files generation in the function `generateRebaseJson` by using `ThreadUtils.parmap`.

### Why are the changes needed?
1. To prevent the bugs that are fixed by https://github.com/apache/spark/pull/28787 and https://github.com/apache/spark/pull/28816.
2. The parallelisation speeds up JSON file generation.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
By generating the JSON file `julian-gregorian-rebase-micros.json`.

Closes #28827 from MaxGekk/rebase-30-min.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
This commit is contained in:
Max Gekk 2020-06-17 12:07:36 +09:00 committed by HyukjinKwon
parent eeb81200e2
commit afd8a8b964

View file

@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import org.apache.spark.sql.catalyst.util.RebaseDateTime._
import org.apache.spark.util.ThreadUtils
class RebaseDateTimeSuite extends SparkFunSuite with Matchers with SQLHelper {
@ -254,11 +255,7 @@ class RebaseDateTimeSuite extends SparkFunSuite with Matchers with SQLHelper {
import com.fasterxml.jackson.module.scala.{DefaultScalaModule, ScalaObjectMapper}
case class RebaseRecord(tz: String, switches: Array[Long], diffs: Array[Long])
val result = new ArrayBuffer[RebaseRecord]()
ALL_TIMEZONES
.sortBy(_.getId)
.foreach { zid =>
val rebaseRecords = ThreadUtils.parmap(ALL_TIMEZONES, "JSON-rebase-gen", 16) { zid =>
withDefaultTimeZone(zid) {
val tz = TimeZone.getTimeZone(zid)
val start = adjustFunc(
@ -272,7 +269,7 @@ class RebaseDateTimeSuite extends SparkFunSuite with Matchers with SQLHelper {
var micros = start
var diff = Long.MaxValue
val maxStep = DAYS_PER_WEEK * MICROS_PER_DAY
val maxStep = 30 * MICROS_PER_MINUTE
var step: Long = MICROS_PER_SECOND
val switches = new ArrayBuffer[Long]()
val diffs = new ArrayBuffer[Long]()
@ -294,9 +291,11 @@ class RebaseDateTimeSuite extends SparkFunSuite with Matchers with SQLHelper {
}
micros += step
}
result.append(RebaseRecord(zid.getId, switches.toArray, diffs.toArray))
RebaseRecord(zid.getId, switches.toArray, diffs.toArray)
}
}
val result = new ArrayBuffer[RebaseRecord]()
rebaseRecords.sortBy(_.tz).foreach(result.append(_))
val mapper = (new ObjectMapper() with ScalaObjectMapper)
.registerModule(DefaultScalaModule)
.writerWithDefaultPrettyPrinter()