[SPARK-23688][SS] Refactor tests away from rate source
## What changes were proposed in this pull request? Replace rate source with memory source in continuous mode test suite. Keep using "rate" source if the tests intend to put data periodically in background, or need to put short source name to load, since "memory" doesn't have provider for source. ## How was this patch tested? Ran relevant test suite from IDE. Author: Jungtaek Lim <kabhwan@gmail.com> Closes #21152 from HeartSaVioR/SPARK-23688.
This commit is contained in:
parent
8614edd445
commit
1fb46f30f8
|
@ -75,73 +75,50 @@ class ContinuousSuite extends ContinuousSuiteBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
test("map") {
|
test("map") {
|
||||||
val df = spark.readStream
|
val input = ContinuousMemoryStream[Int]
|
||||||
.format("rate")
|
val df = input.toDF().map(_.getInt(0) * 2)
|
||||||
.option("numPartitions", "5")
|
|
||||||
.option("rowsPerSecond", "5")
|
|
||||||
.load()
|
|
||||||
.select('value)
|
|
||||||
.map(r => r.getLong(0) * 2)
|
|
||||||
|
|
||||||
testStream(df, useV2Sink = true)(
|
testStream(df)(
|
||||||
StartStream(longContinuousTrigger),
|
AddData(input, 0, 1),
|
||||||
AwaitEpoch(0),
|
CheckAnswer(0, 2),
|
||||||
Execute(waitForRateSourceTriggers(_, 2)),
|
StopStream,
|
||||||
IncrementEpoch(),
|
AddData(input, 2, 3, 4),
|
||||||
Execute(waitForRateSourceTriggers(_, 4)),
|
StartStream(),
|
||||||
IncrementEpoch(),
|
CheckAnswer(0, 2, 4, 6, 8))
|
||||||
CheckAnswerRowsContains(scala.Range(0, 40, 2).map(Row(_))))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
test("flatMap") {
|
test("flatMap") {
|
||||||
val df = spark.readStream
|
val input = ContinuousMemoryStream[Int]
|
||||||
.format("rate")
|
val df = input.toDF().flatMap(r => Seq(0, r.getInt(0), r.getInt(0) * 2))
|
||||||
.option("numPartitions", "5")
|
|
||||||
.option("rowsPerSecond", "5")
|
|
||||||
.load()
|
|
||||||
.select('value)
|
|
||||||
.flatMap(r => Seq(0, r.getLong(0), r.getLong(0) * 2))
|
|
||||||
|
|
||||||
testStream(df, useV2Sink = true)(
|
testStream(df)(
|
||||||
StartStream(longContinuousTrigger),
|
AddData(input, 0, 1),
|
||||||
AwaitEpoch(0),
|
CheckAnswer((0 to 1).flatMap(n => Seq(0, n, n * 2)): _*),
|
||||||
Execute(waitForRateSourceTriggers(_, 2)),
|
StopStream,
|
||||||
IncrementEpoch(),
|
AddData(input, 2, 3, 4),
|
||||||
Execute(waitForRateSourceTriggers(_, 4)),
|
StartStream(),
|
||||||
IncrementEpoch(),
|
CheckAnswer((0 to 4).flatMap(n => Seq(0, n, n * 2)): _*))
|
||||||
CheckAnswerRowsContains(scala.Range(0, 20).flatMap(n => Seq(0, n, n * 2)).map(Row(_))))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
test("filter") {
|
test("filter") {
|
||||||
val df = spark.readStream
|
val input = ContinuousMemoryStream[Int]
|
||||||
.format("rate")
|
val df = input.toDF().where('value > 2)
|
||||||
.option("numPartitions", "5")
|
|
||||||
.option("rowsPerSecond", "5")
|
|
||||||
.load()
|
|
||||||
.select('value)
|
|
||||||
.where('value > 5)
|
|
||||||
|
|
||||||
testStream(df, useV2Sink = true)(
|
testStream(df)(
|
||||||
StartStream(longContinuousTrigger),
|
AddData(input, 0, 1),
|
||||||
AwaitEpoch(0),
|
CheckAnswer(),
|
||||||
Execute(waitForRateSourceTriggers(_, 2)),
|
StopStream,
|
||||||
IncrementEpoch(),
|
AddData(input, 2, 3, 4),
|
||||||
Execute(waitForRateSourceTriggers(_, 4)),
|
StartStream(),
|
||||||
IncrementEpoch(),
|
CheckAnswer(3, 4))
|
||||||
CheckAnswerRowsContains(scala.Range(6, 20).map(Row(_))))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
test("deduplicate") {
|
test("deduplicate") {
|
||||||
val df = spark.readStream
|
val input = ContinuousMemoryStream[Int]
|
||||||
.format("rate")
|
val df = input.toDF().dropDuplicates()
|
||||||
.option("numPartitions", "5")
|
|
||||||
.option("rowsPerSecond", "5")
|
|
||||||
.load()
|
|
||||||
.select('value)
|
|
||||||
.dropDuplicates()
|
|
||||||
|
|
||||||
val except = intercept[AnalysisException] {
|
val except = intercept[AnalysisException] {
|
||||||
testStream(df, useV2Sink = true)(StartStream(longContinuousTrigger))
|
testStream(df)(StartStream())
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(except.message.contains(
|
assert(except.message.contains(
|
||||||
|
@ -149,15 +126,11 @@ class ContinuousSuite extends ContinuousSuiteBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
test("timestamp") {
|
test("timestamp") {
|
||||||
val df = spark.readStream
|
val input = ContinuousMemoryStream[Int]
|
||||||
.format("rate")
|
val df = input.toDF().select(current_timestamp())
|
||||||
.option("numPartitions", "5")
|
|
||||||
.option("rowsPerSecond", "5")
|
|
||||||
.load()
|
|
||||||
.select(current_timestamp())
|
|
||||||
|
|
||||||
val except = intercept[AnalysisException] {
|
val except = intercept[AnalysisException] {
|
||||||
testStream(df, useV2Sink = true)(StartStream(longContinuousTrigger))
|
testStream(df)(StartStream())
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(except.message.contains(
|
assert(except.message.contains(
|
||||||
|
@ -165,58 +138,43 @@ class ContinuousSuite extends ContinuousSuiteBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
test("subquery alias") {
|
test("subquery alias") {
|
||||||
val df = spark.readStream
|
val input = ContinuousMemoryStream[Int]
|
||||||
.format("rate")
|
input.toDF().createOrReplaceTempView("memory")
|
||||||
.option("numPartitions", "5")
|
val test = spark.sql("select value from memory where value > 2")
|
||||||
.option("rowsPerSecond", "5")
|
|
||||||
.load()
|
|
||||||
.createOrReplaceTempView("rate")
|
|
||||||
val test = spark.sql("select value from rate where value > 5")
|
|
||||||
|
|
||||||
testStream(test, useV2Sink = true)(
|
testStream(test)(
|
||||||
StartStream(longContinuousTrigger),
|
AddData(input, 0, 1),
|
||||||
AwaitEpoch(0),
|
CheckAnswer(),
|
||||||
Execute(waitForRateSourceTriggers(_, 2)),
|
StopStream,
|
||||||
IncrementEpoch(),
|
AddData(input, 2, 3, 4),
|
||||||
Execute(waitForRateSourceTriggers(_, 4)),
|
StartStream(),
|
||||||
IncrementEpoch(),
|
CheckAnswer(3, 4))
|
||||||
CheckAnswerRowsContains(scala.Range(6, 20).map(Row(_))))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
test("repeatedly restart") {
|
test("repeatedly restart") {
|
||||||
val df = spark.readStream
|
val input = ContinuousMemoryStream[Int]
|
||||||
.format("rate")
|
val df = input.toDF()
|
||||||
.option("numPartitions", "5")
|
|
||||||
.option("rowsPerSecond", "5")
|
|
||||||
.load()
|
|
||||||
.select('value)
|
|
||||||
|
|
||||||
testStream(df, useV2Sink = true)(
|
testStream(df)(
|
||||||
StartStream(longContinuousTrigger),
|
StartStream(),
|
||||||
AwaitEpoch(0),
|
AddData(input, 0, 1),
|
||||||
Execute(waitForRateSourceTriggers(_, 2)),
|
CheckAnswer(0, 1),
|
||||||
IncrementEpoch(),
|
|
||||||
CheckAnswerRowsContains(scala.Range(0, 10).map(Row(_))),
|
|
||||||
StopStream,
|
StopStream,
|
||||||
StartStream(longContinuousTrigger),
|
StartStream(),
|
||||||
StopStream,
|
StopStream,
|
||||||
StartStream(longContinuousTrigger),
|
StartStream(),
|
||||||
StopStream,
|
StopStream,
|
||||||
StartStream(longContinuousTrigger),
|
StartStream(),
|
||||||
AwaitEpoch(2),
|
StopStream,
|
||||||
Execute(waitForRateSourceTriggers(_, 2)),
|
AddData(input, 2, 3),
|
||||||
IncrementEpoch(),
|
StartStream(),
|
||||||
CheckAnswerRowsContains(scala.Range(0, 20).map(Row(_))),
|
CheckAnswer(0, 1, 2, 3),
|
||||||
StopStream)
|
StopStream)
|
||||||
}
|
}
|
||||||
|
|
||||||
test("task failure kills the query") {
|
test("task failure kills the query") {
|
||||||
val df = spark.readStream
|
val input = ContinuousMemoryStream[Int]
|
||||||
.format("rate")
|
val df = input.toDF()
|
||||||
.option("numPartitions", "5")
|
|
||||||
.option("rowsPerSecond", "5")
|
|
||||||
.load()
|
|
||||||
.select('value)
|
|
||||||
|
|
||||||
// Get an arbitrary task from this query to kill. It doesn't matter which one.
|
// Get an arbitrary task from this query to kill. It doesn't matter which one.
|
||||||
var taskId: Long = -1
|
var taskId: Long = -1
|
||||||
|
@ -227,9 +185,9 @@ class ContinuousSuite extends ContinuousSuiteBase {
|
||||||
}
|
}
|
||||||
spark.sparkContext.addSparkListener(listener)
|
spark.sparkContext.addSparkListener(listener)
|
||||||
try {
|
try {
|
||||||
testStream(df, useV2Sink = true)(
|
testStream(df)(
|
||||||
StartStream(Trigger.Continuous(100)),
|
StartStream(Trigger.Continuous(100)),
|
||||||
Execute(waitForRateSourceTriggers(_, 2)),
|
AddData(input, 0, 1, 2, 3),
|
||||||
Execute { _ =>
|
Execute { _ =>
|
||||||
// Wait until a task is started, then kill its first attempt.
|
// Wait until a task is started, then kill its first attempt.
|
||||||
eventually(timeout(streamingTimeout)) {
|
eventually(timeout(streamingTimeout)) {
|
||||||
|
@ -252,6 +210,7 @@ class ContinuousSuite extends ContinuousSuiteBase {
|
||||||
.option("rowsPerSecond", "2")
|
.option("rowsPerSecond", "2")
|
||||||
.load()
|
.load()
|
||||||
.select('value)
|
.select('value)
|
||||||
|
|
||||||
val query = df.writeStream
|
val query = df.writeStream
|
||||||
.format("memory")
|
.format("memory")
|
||||||
.queryName("noharness")
|
.queryName("noharness")
|
||||||
|
|
Loading…
Reference in a new issue