From 985d532812cf176d0e12b799c723f917282b6813 Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Thu, 2 Jun 2016 16:21:33 -0700 Subject: [PATCH] [SPARK-15734][SQL] Avoids printing internal row in explain output ## What changes were proposed in this pull request? This PR avoids printing internal rows in explain output for some operators. **Before change:** ``` scala> (1 to 10).toSeq.map(_ => (1,2,3)).toDF().createTempView("df3") scala> spark.sql("select * from df3 where 1=2").explain(true) ... == Analyzed Logical Plan == _1: int, _2: int, _3: int Project [_1#37,_2#38,_3#39] +- Filter (1 = 2) +- SubqueryAlias df3 +- LocalRelation [_1#37,_2#38,_3#39], [[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3]] ... == Physical Plan == LocalTableScan [_1#37,_2#38,_3#39] ``` **After change:** ``` scala> spark.sql("select * from df3 where 1=2").explain(true) ... == Analyzed Logical Plan == _1: int, _2: int, _3: int Project [_1#58,_2#59,_3#60] +- Filter (1 = 2) +- SubqueryAlias df3 +- LocalRelation [_1#58,_2#59,_3#60] ... == Physical Plan == LocalTableScan , [_1#58,_2#59,_3#60] ``` ## How was this patch tested? Manual test. Author: Sean Zhong Closes #13471 from clockfly/verbose_breakdown_5. --- .../spark/sql/catalyst/plans/logical/LocalRelation.scala | 8 +++++++- .../org/apache/spark/sql/execution/ExistingRDD.scala | 2 ++ .../apache/spark/sql/execution/LocalTableScanExec.scala | 8 ++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala index 5813b74c77..87b8647655 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala @@ -57,7 +57,13 @@ case class LocalRelation(output: Seq[Attribute], data: Seq[InternalRow] = Nil) LocalRelation(output.map(_.newInstance()), data).asInstanceOf[this.type] } - override protected def stringArgs = Iterator(output) + override protected def stringArgs: Iterator[Any] = { + if (data.isEmpty) { + Iterator("", output) + } else { + Iterator(output) + } + } override def sameResult(plan: LogicalPlan): Boolean = plan match { case LocalRelation(otherOutput, otherData) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala index fef3255c73..b8b392608d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala @@ -91,6 +91,8 @@ private[sql] case class LogicalRDD( case _ => false } + override protected def stringArgs: Iterator[Any] = Iterator(output) + override def producedAttributes: AttributeSet = outputSet @transient override lazy val statistics: Statistics = Statistics( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala index c5e78b0333..df2f238d8c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala @@ -48,6 +48,14 @@ private[sql] case class LocalTableScanExec( } } + override protected def stringArgs: Iterator[Any] = { + if (rows.isEmpty) { + Iterator("", output) + } else { + Iterator(output) + } + } + override def executeCollect(): Array[InternalRow] = { unsafeRows }