[SPARK-12578][SQL] Distinct should not be silently ignored when used in an aggregate function with OVER clause
JIRA: https://issues.apache.org/jira/browse/SPARK-12578 Slightly update to Hive parser. We should keep the distinct keyword when used in an aggregate function with OVER clause. So the CheckAnalysis will detect it and throw exception later. Author: Liang-Chi Hsieh <viirya@gmail.com> Closes #10557 from viirya/keep-distinct-hivesql.
This commit is contained in:
parent
d1fea41363
commit
b2467b3810
|
@ -195,7 +195,7 @@ function
|
|||
RPAREN (KW_OVER ws=window_specification)?
|
||||
-> {$star != null}? ^(TOK_FUNCTIONSTAR functionName $ws?)
|
||||
-> {$dist == null}? ^(TOK_FUNCTION functionName (selectExpression+)? $ws?)
|
||||
-> ^(TOK_FUNCTIONDI functionName (selectExpression+)?)
|
||||
-> ^(TOK_FUNCTIONDI functionName (selectExpression+)? $ws?)
|
||||
;
|
||||
|
||||
functionName
|
||||
|
|
|
@ -915,6 +915,27 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|
|||
).map(i => Row(i._1, i._2, i._3, i._4)))
|
||||
}
|
||||
|
||||
test("window function: distinct should not be silently ignored") {
|
||||
val data = Seq(
|
||||
WindowData(1, "a", 5),
|
||||
WindowData(2, "a", 6),
|
||||
WindowData(3, "b", 7),
|
||||
WindowData(4, "b", 8),
|
||||
WindowData(5, "c", 9),
|
||||
WindowData(6, "c", 10)
|
||||
)
|
||||
sparkContext.parallelize(data).toDF().registerTempTable("windowData")
|
||||
|
||||
val e = intercept[AnalysisException] {
|
||||
sql(
|
||||
"""
|
||||
|select month, area, product, sum(distinct product + 1) over (partition by 1 order by 2)
|
||||
|from windowData
|
||||
""".stripMargin)
|
||||
}
|
||||
assert(e.getMessage.contains("Distinct window functions are not supported"))
|
||||
}
|
||||
|
||||
test("window function: expressions in arguments of a window functions") {
|
||||
val data = Seq(
|
||||
WindowData(1, "a", 5),
|
||||
|
|
Loading…
Reference in a new issue