[SPARK-24788][SQL] RelationalGroupedDataset.toString with unresolved exprs should not fail
## What changes were proposed in this pull request? In the current master, `toString` throws an exception when `RelationalGroupedDataset` has unresolved expressions; ``` scala> spark.range(0, 10).groupBy("id") res4: org.apache.spark.sql.RelationalGroupedDataset = RelationalGroupedDataset: [grouping expressions: [id: bigint], value: [id: bigint], type: GroupBy] scala> spark.range(0, 10).groupBy('id) org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to dataType on unresolved object, tree: 'id at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.dataType(unresolved.scala:105) at org.apache.spark.sql.RelationalGroupedDataset$$anonfun$12.apply(RelationalGroupedDataset.scala:474) at org.apache.spark.sql.RelationalGroupedDataset$$anonfun$12.apply(RelationalGroupedDataset.scala:473) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) at scala.collection.AbstractTraversable.map(Traversable.scala:104) at org.apache.spark.sql.RelationalGroupedDataset.toString(RelationalGroupedDataset.scala:473) at scala.runtime.ScalaRunTime$.scala$runtime$ScalaRunTime$$inner$1(ScalaRunTime.scala:332) at scala.runtime.ScalaRunTime$.stringOf(ScalaRunTime.scala:337) at scala.runtime.ScalaRunTime$.replStringOf(ScalaRunTime.scala:345) ``` This pr fixed code to handle the unresolved case in `RelationalGroupedDataset.toString`. Closes #21752 ## How was this patch tested? Added tests in `DataFrameAggregateSuite`. Author: Chris Horn <chorn4033@gmail.com> Author: Takeshi Yamamuro <yamamuro@apache.org> Closes #21964 from maropu/SPARK-24788.
This commit is contained in:
parent
f45d60a5a1
commit
b0d6967d45
|
@ -469,8 +469,11 @@ class RelationalGroupedDataset protected[sql](
|
|||
override def toString: String = {
|
||||
val builder = new StringBuilder
|
||||
builder.append("RelationalGroupedDataset: [grouping expressions: [")
|
||||
val kFields = groupingExprs.map(_.asInstanceOf[NamedExpression]).map {
|
||||
case f => s"${f.name}: ${f.dataType.simpleString(2)}"
|
||||
val kFields = groupingExprs.collect {
|
||||
case expr: NamedExpression if expr.resolved =>
|
||||
s"${expr.name}: ${expr.dataType.simpleString(2)}"
|
||||
case expr: NamedExpression => expr.name
|
||||
case o => o.toString
|
||||
}
|
||||
builder.append(kFields.take(2).mkString(", "))
|
||||
if (kFields.length > 2) {
|
||||
|
|
|
@ -717,4 +717,14 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
|
|||
Row(1, 2, 1) :: Row(2, 2, 2) :: Row(3, 2, 3) :: Nil)
|
||||
}
|
||||
|
||||
test("SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") {
|
||||
// Checks if these raise no exception
|
||||
assert(testData.groupBy('key).toString.contains(
|
||||
"[grouping expressions: [key], value: [key: int, value: string], type: GroupBy]"))
|
||||
assert(testData.groupBy(col("key")).toString.contains(
|
||||
"[grouping expressions: [key], value: [key: int, value: string], type: GroupBy]"))
|
||||
assert(testData.groupBy(current_date()).toString.contains(
|
||||
"grouping expressions: [current_date(None)], value: [key: int, value: string], " +
|
||||
"type: GroupBy]"))
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue