[SPARK-5373][SQL] Literal in agg grouping expressions leads to incorrect result
`select key, count( * ) from src group by key, 1` will get the wrong answer. e.g. for this table ``` val testData2 = TestSQLContext.sparkContext.parallelize( TestData2(1, 1) :: TestData2(1, 2) :: TestData2(2, 1) :: TestData2(2, 2) :: TestData2(3, 1) :: TestData2(3, 2) :: Nil, 2).toSchemaRDD testData2.registerTempTable("testData2") ``` result of `SELECT a, count(1) FROM testData2 GROUP BY a, 1` is ``` [1,1] [2,2] [3,1] ``` Author: wangfei <wangfei1@huawei.com> Closes #4169 from scwf/agg-bug and squashes the following commits: 05751db [wangfei] fix bugs when literal in agg grouping expressioons
This commit is contained in:
parent
fbaf9e0896
commit
c1b3eebf97
|
@ -141,10 +141,11 @@ object PartialAggregation {
|
|||
// We need to pass all grouping expressions though so the grouping can happen a second
|
||||
// time. However some of them might be unnamed so we alias them allowing them to be
|
||||
// referenced in the second aggregation.
|
||||
val namedGroupingExpressions: Map[Expression, NamedExpression] = groupingExpressions.map {
|
||||
case n: NamedExpression => (n, n)
|
||||
case other => (other, Alias(other, "PartialGroup")())
|
||||
}.toMap
|
||||
val namedGroupingExpressions: Map[Expression, NamedExpression] =
|
||||
groupingExpressions.filter(!_.isInstanceOf[Literal]).map {
|
||||
case n: NamedExpression => (n, n)
|
||||
case other => (other, Alias(other, "PartialGroup")())
|
||||
}.toMap
|
||||
|
||||
// Replace aggregations with a new expression that computes the result from the already
|
||||
// computed partial evaluations and grouping values.
|
||||
|
|
|
@ -186,6 +186,15 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
|
|||
Seq(Row(1,3), Row(2,3), Row(3,3)))
|
||||
}
|
||||
|
||||
test("literal in agg grouping expressions") {
|
||||
checkAnswer(
|
||||
sql("SELECT a, count(1) FROM testData2 GROUP BY a, 1"),
|
||||
Seq(Row(1,2), Row(2,2), Row(3,2)))
|
||||
checkAnswer(
|
||||
sql("SELECT a, count(2) FROM testData2 GROUP BY a, 2"),
|
||||
Seq(Row(1,2), Row(2,2), Row(3,2)))
|
||||
}
|
||||
|
||||
test("aggregates with nulls") {
|
||||
checkAnswer(
|
||||
sql("SELECT MIN(a), MAX(a), AVG(a), SUM(a), COUNT(a) FROM nullInts"),
|
||||
|
|
Loading…
Reference in a new issue