[SPARK-30622][SQL] commands should return dummy statistics
### What changes were proposed in this pull request? override `Command.stats` to return a dummy statistics (Long.Max). ### Why are the changes needed? Commands are eagerly executed. They will be converted to LocalRelation after the DataFrame is created. That said, the statistics of a command is useless. We should avoid unnecessary statistics calculation of command's children. ### Does this PR introduce any user-facing change? no ### How was this patch tested? new test Closes #27344 from cloud-fan/command. Authored-by: Wenchen Fan <wenchen@databricks.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
b0db6231fd
commit
7503e76af0
|
@ -27,4 +27,8 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
|
|||
trait Command extends LogicalPlan {
|
||||
override def output: Seq[Attribute] = Seq.empty
|
||||
override def children: Seq[LogicalPlan] = Seq.empty
|
||||
// Commands are eagerly executed. They will be converted to LocalRelation after the DataFrame
|
||||
// is created. That said, the statistics of a command is useless. Here we just return a dummy
|
||||
// statistics to avoid unnecessary statistics calculation of command's children.
|
||||
override def stats: Statistics = Statistics.DUMMY
|
||||
}
|
||||
|
|
|
@ -35,6 +35,9 @@ import org.apache.spark.sql.internal.SQLConf
|
|||
import org.apache.spark.sql.types._
|
||||
import org.apache.spark.util.Utils
|
||||
|
||||
object Statistics {
|
||||
val DUMMY = Statistics(Long.MaxValue)
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimates of various statistics. The default estimation logic simply lazily multiplies the
|
||||
|
|
|
@ -17,11 +17,15 @@
|
|||
|
||||
package org.apache.spark.sql.catalyst.statsEstimation
|
||||
|
||||
import org.mockito.Mockito.mock
|
||||
|
||||
import org.apache.spark.sql.catalyst.analysis.ResolvedNamespace
|
||||
import org.apache.spark.sql.catalyst.dsl.expressions._
|
||||
import org.apache.spark.sql.catalyst.dsl.plans._
|
||||
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Literal}
|
||||
import org.apache.spark.sql.catalyst.plans.PlanTest
|
||||
import org.apache.spark.sql.catalyst.plans.logical._
|
||||
import org.apache.spark.sql.connector.catalog.SupportsNamespaces
|
||||
import org.apache.spark.sql.internal.SQLConf
|
||||
import org.apache.spark.sql.types.IntegerType
|
||||
|
||||
|
@ -115,6 +119,15 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
|
|||
plan, expectedStatsCboOn = expectedCboStats, expectedStatsCboOff = expectedDefaultStats)
|
||||
}
|
||||
|
||||
test("command should report a dummy stats") {
|
||||
val plan = CommentOnNamespace(
|
||||
ResolvedNamespace(mock(classOf[SupportsNamespaces]), Array("ns")), "comment")
|
||||
checkStats(
|
||||
plan,
|
||||
expectedStatsCboOn = Statistics.DUMMY,
|
||||
expectedStatsCboOff = Statistics.DUMMY)
|
||||
}
|
||||
|
||||
/** Check estimated stats when cbo is turned on/off. */
|
||||
private def checkStats(
|
||||
plan: LogicalPlan,
|
||||
|
|
Loading…
Reference in a new issue