[SPARK-24788][SQL] RelationalGroupedDataset.toString with unresolved exprs should not fail

## What changes were proposed in this pull request?
In the current master, `toString` throws an exception when `RelationalGroupedDataset` has unresolved expressions;
```
scala> spark.range(0, 10).groupBy("id")
res4: org.apache.spark.sql.RelationalGroupedDataset = RelationalGroupedDataset: [grouping expressions: [id: bigint], value: [id: bigint], type: GroupBy]

scala> spark.range(0, 10).groupBy('id)
org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to dataType on unresolved object, tree: 'id
  at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.dataType(unresolved.scala:105)
  at org.apache.spark.sql.RelationalGroupedDataset$$anonfun$12.apply(RelationalGroupedDataset.scala:474)
  at org.apache.spark.sql.RelationalGroupedDataset$$anonfun$12.apply(RelationalGroupedDataset.scala:473)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
  at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
  at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  at scala.collection.AbstractTraversable.map(Traversable.scala:104)
  at org.apache.spark.sql.RelationalGroupedDataset.toString(RelationalGroupedDataset.scala:473)
  at scala.runtime.ScalaRunTime$.scala$runtime$ScalaRunTime$$inner$1(ScalaRunTime.scala:332)
  at scala.runtime.ScalaRunTime$.stringOf(ScalaRunTime.scala:337)
  at scala.runtime.ScalaRunTime$.replStringOf(ScalaRunTime.scala:345)
```
This pr fixed code to handle the unresolved case in `RelationalGroupedDataset.toString`.

Closes #21752

## How was this patch tested?
Added tests in `DataFrameAggregateSuite`.

Author: Chris Horn <chorn4033@gmail.com>
Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #21964 from maropu/SPARK-24788.
This commit is contained in:
Chris Horn 2018-08-02 22:40:58 -07:00 committed by Xiao Li
parent f45d60a5a1
commit b0d6967d45
2 changed files with 15 additions and 2 deletions

View file

@ -469,8 +469,11 @@ class RelationalGroupedDataset protected[sql](
override def toString: String = {
val builder = new StringBuilder
builder.append("RelationalGroupedDataset: [grouping expressions: [")
val kFields = groupingExprs.map(_.asInstanceOf[NamedExpression]).map {
case f => s"${f.name}: ${f.dataType.simpleString(2)}"
val kFields = groupingExprs.collect {
case expr: NamedExpression if expr.resolved =>
s"${expr.name}: ${expr.dataType.simpleString(2)}"
case expr: NamedExpression => expr.name
case o => o.toString
}
builder.append(kFields.take(2).mkString(", "))
if (kFields.length > 2) {

View file

@ -717,4 +717,14 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
Row(1, 2, 1) :: Row(2, 2, 2) :: Row(3, 2, 3) :: Nil)
}
test("SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") {
// Checks if these raise no exception
assert(testData.groupBy('key).toString.contains(
"[grouping expressions: [key], value: [key: int, value: string], type: GroupBy]"))
assert(testData.groupBy(col("key")).toString.contains(
"[grouping expressions: [key], value: [key: int, value: string], type: GroupBy]"))
assert(testData.groupBy(current_date()).toString.contains(
"grouping expressions: [current_date(None)], value: [key: int, value: string], " +
"type: GroupBy]"))
}
}