[SPARK-30622][SQL] commands should return dummy statistics

### What changes were proposed in this pull request?

override `Command.stats` to return a dummy statistics (Long.Max).

### Why are the changes needed?

Commands are eagerly executed. They will be converted to LocalRelation after the DataFrame is created. That said, the statistics of a command is useless. We should avoid unnecessary statistics calculation of command's children.

### Does this PR introduce any user-facing change?

no

### How was this patch tested?

new test

Closes #27344 from cloud-fan/command.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
Wenchen Fan 2020-01-30 10:27:35 -08:00 committed by Dongjoon Hyun
parent b0db6231fd
commit 7503e76af0
3 changed files with 20 additions and 0 deletions

View file

@ -27,4 +27,8 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
trait Command extends LogicalPlan {
override def output: Seq[Attribute] = Seq.empty
override def children: Seq[LogicalPlan] = Seq.empty
// Commands are eagerly executed. They will be converted to LocalRelation after the DataFrame
// is created. That said, the statistics of a command is useless. Here we just return a dummy
// statistics to avoid unnecessary statistics calculation of command's children.
override def stats: Statistics = Statistics.DUMMY
}

View file

@ -35,6 +35,9 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils
object Statistics {
val DUMMY = Statistics(Long.MaxValue)
}
/**
* Estimates of various statistics. The default estimation logic simply lazily multiplies the

View file

@ -17,11 +17,15 @@
package org.apache.spark.sql.catalyst.statsEstimation
import org.mockito.Mockito.mock
import org.apache.spark.sql.catalyst.analysis.ResolvedNamespace
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Literal}
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.connector.catalog.SupportsNamespaces
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.IntegerType
@ -115,6 +119,15 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
plan, expectedStatsCboOn = expectedCboStats, expectedStatsCboOff = expectedDefaultStats)
}
test("command should report a dummy stats") {
val plan = CommentOnNamespace(
ResolvedNamespace(mock(classOf[SupportsNamespaces]), Array("ns")), "comment")
checkStats(
plan,
expectedStatsCboOn = Statistics.DUMMY,
expectedStatsCboOff = Statistics.DUMMY)
}
/** Check estimated stats when cbo is turned on/off. */
private def checkStats(
plan: LogicalPlan,