[SPARK-33956][SQL] Add rowCount for Range operator
### What changes were proposed in this pull request?
This pr add rowCount for `Range` operator:
```scala
spark.sql("set spark.sql.cbo.enabled=true")
spark.sql("select id from range(100)").explain("cost")
```
Before this pr:
```
== Optimized Logical Plan ==
Range (0, 100, step=1, splits=None), Statistics(sizeInBytes=800.0 B)
```
After this pr:
```
== Optimized Logical Plan ==
Range (0, 100, step=1, splits=None), Statistics(sizeInBytes=800.0 B, rowCount=100)
```
### Why are the changes needed?
[`JoinEstimation.estimateInnerOuterJoin`](d6a68e0b67/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala (L55-L156)
) need the row count.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Unit test.
Closes #30989 from wangyum/SPARK-33956.
Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
bd346f4a2d
commit
4cd680581a
|
@ -603,7 +603,7 @@ case class Range(
|
||||||
}
|
}
|
||||||
|
|
||||||
override def computeStats(): Statistics = {
|
override def computeStats(): Statistics = {
|
||||||
Statistics(sizeInBytes = LongType.defaultSize * numElements)
|
Statistics(sizeInBytes = LongType.defaultSize * numElements, rowCount = Some(numElements))
|
||||||
}
|
}
|
||||||
|
|
||||||
override def outputOrdering: Seq[SortOrder] = {
|
override def outputOrdering: Seq[SortOrder] = {
|
||||||
|
|
|
@ -44,7 +44,7 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
|
||||||
|
|
||||||
test("range") {
|
test("range") {
|
||||||
val range = Range(1, 5, 1, None)
|
val range = Range(1, 5, 1, None)
|
||||||
val rangeStats = Statistics(sizeInBytes = 4 * 8)
|
val rangeStats = Statistics(sizeInBytes = 4 * 8, Some(4))
|
||||||
checkStats(
|
checkStats(
|
||||||
range,
|
range,
|
||||||
expectedStatsCboOn = rangeStats,
|
expectedStatsCboOn = rangeStats,
|
||||||
|
|
Loading…
Reference in a new issue