From 2cdd92a7cd6f85186c846635b422b977bdafbcdd Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Tue, 23 Aug 2016 09:11:47 +0800 Subject: [PATCH] [SPARK-17182][SQL] Mark Collect as non-deterministic ## What changes were proposed in this pull request? This PR marks the abstract class `Collect` as non-deterministic since the results of `CollectList` and `CollectSet` depend on the actual order of input rows. ## How was this patch tested? Existing test cases should be enough. Author: Cheng Lian Closes #14749 from liancheng/spark-17182-non-deterministic-collect. --- .../spark/sql/catalyst/expressions/aggregate/collect.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala index ac2cefaddc..896ff61b23 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala @@ -54,6 +54,10 @@ abstract class Collect extends ImperativeAggregate { override def inputAggBufferAttributes: Seq[AttributeReference] = Nil + // Both `CollectList` and `CollectSet` are non-deterministic since their results depend on the + // actual order of input rows. + override def deterministic: Boolean = false + protected[this] val buffer: Growable[Any] with Iterable[Any] override def initialize(b: MutableRow): Unit = {