[SPARK-29863][SQL] Rename EveryAgg/AnyAgg to BoolAnd/BoolOr

### What changes were proposed in this pull request?

rename EveryAgg/AnyAgg to BoolAnd/BoolOr

### Why are the changes needed?

Under ansi mode, `every`, `any` and `some` are reserved keywords and can't be used as function names. `EveryAgg`/`AnyAgg` has several aliases and I think it's better to not pick  reserved keywords  as the primary name.

### Does this PR introduce any user-facing change?

no

### How was this patch tested?

existing tests

Closes #26486 from cloud-fan/naming.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
Wenchen Fan 2019-11-13 21:42:42 +08:00
parent 942753a44b
commit 4dcbdcd265
7 changed files with 45 additions and 45 deletions

View file

@ -313,11 +313,11 @@ object FunctionRegistry {
expression[CollectList]("collect_list"),
expression[CollectSet]("collect_set"),
expression[CountMinSketchAgg]("count_min_sketch"),
expression[EveryAgg]("every"),
expression[EveryAgg]("bool_and"),
expression[AnyAgg]("any"),
expression[AnyAgg]("some"),
expression[AnyAgg]("bool_or"),
expression[BoolAnd]("every"),
expression[BoolAnd]("bool_and"),
expression[BoolOr]("any"),
expression[BoolOr]("some"),
expression[BoolOr]("bool_or"),
// string functions
expression[Ascii]("ascii"),

View file

@ -52,8 +52,8 @@ abstract class UnevaluableBooleanAggBase(arg: Expression)
false
""",
since = "3.0.0")
case class EveryAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
override def nodeName: String = "Every"
case class BoolAnd(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
override def nodeName: String = "bool_and"
}
@ExpressionDescription(
@ -68,6 +68,6 @@ case class EveryAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
false
""",
since = "3.0.0")
case class AnyAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
override def nodeName: String = "Any"
case class BoolOr(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
override def nodeName: String = "bool_or"
}

View file

@ -47,8 +47,8 @@ object ReplaceExpressions extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
case e: RuntimeReplaceable => e.child
case CountIf(predicate) => Count(new NullIf(predicate, Literal.FalseLiteral))
case AnyAgg(arg) => Max(arg)
case EveryAgg(arg) => Min(arg)
case BoolOr(arg) => Max(arg)
case BoolAnd(arg) => Min(arg)
}
}

View file

@ -144,8 +144,8 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
assertSuccess(Sum('stringField))
assertSuccess(Average('stringField))
assertSuccess(Min('arrayField))
assertSuccess(new EveryAgg('booleanField))
assertSuccess(new AnyAgg('booleanField))
assertSuccess(new BoolAnd('booleanField))
assertSuccess(new BoolOr('booleanField))
assertError(Min('mapField), "min does not support ordering on type")
assertError(Max('mapField), "max does not support ordering on type")

View file

@ -293,7 +293,7 @@ struct<>
-- !query 31
SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0
-- !query 31 schema
struct<every(v):boolean,any(v):boolean,any(v):boolean,every(v):boolean,any(v):boolean>
struct<bool_and(v):boolean,bool_or(v):boolean,bool_or(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
-- !query 31 output
NULL NULL NULL NULL NULL
@ -301,7 +301,7 @@ NULL NULL NULL NULL NULL
-- !query 32
SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4
-- !query 32 schema
struct<every(v):boolean,any(v):boolean,any(v):boolean,every(v):boolean,any(v):boolean>
struct<bool_and(v):boolean,bool_or(v):boolean,bool_or(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
-- !query 32 output
NULL NULL NULL NULL NULL
@ -309,7 +309,7 @@ NULL NULL NULL NULL NULL
-- !query 33
SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5
-- !query 33 schema
struct<every(v):boolean,any(v):boolean,any(v):boolean,every(v):boolean,any(v):boolean>
struct<bool_and(v):boolean,bool_or(v):boolean,bool_or(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
-- !query 33 output
false true true false true
@ -317,7 +317,7 @@ false true true false true
-- !query 34
SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k
-- !query 34 schema
struct<k:int,every(v):boolean,any(v):boolean,any(v):boolean,every(v):boolean,any(v):boolean>
struct<k:int,bool_and(v):boolean,bool_or(v):boolean,bool_or(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
-- !query 34 output
1 false true true false true
2 true true true true true
@ -329,7 +329,7 @@ struct<k:int,every(v):boolean,any(v):boolean,any(v):boolean,every(v):boolean,any
-- !query 35
SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) = false
-- !query 35 schema
struct<k:int,every(v):boolean>
struct<k:int,bool_and(v):boolean>
-- !query 35 output
1 false
3 false
@ -339,7 +339,7 @@ struct<k:int,every(v):boolean>
-- !query 36
SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) IS NULL
-- !query 36 schema
struct<k:int,every(v):boolean>
struct<k:int,bool_and(v):boolean>
-- !query 36 output
4 NULL
@ -380,7 +380,7 @@ SELECT every(1)
struct<>
-- !query 39 output
org.apache.spark.sql.AnalysisException
cannot resolve 'every(1)' due to data type mismatch: Input to function 'every' should have been boolean, but it's [int].; line 1 pos 7
cannot resolve 'bool_and(1)' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [int].; line 1 pos 7
-- !query 40
@ -389,7 +389,7 @@ SELECT some(1S)
struct<>
-- !query 40 output
org.apache.spark.sql.AnalysisException
cannot resolve 'any(1S)' due to data type mismatch: Input to function 'any' should have been boolean, but it's [smallint].; line 1 pos 7
cannot resolve 'bool_or(1S)' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [smallint].; line 1 pos 7
-- !query 41
@ -398,7 +398,7 @@ SELECT any(1L)
struct<>
-- !query 41 output
org.apache.spark.sql.AnalysisException
cannot resolve 'any(1L)' due to data type mismatch: Input to function 'any' should have been boolean, but it's [bigint].; line 1 pos 7
cannot resolve 'bool_or(1L)' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [bigint].; line 1 pos 7
-- !query 42
@ -407,7 +407,7 @@ SELECT every("true")
struct<>
-- !query 42 output
org.apache.spark.sql.AnalysisException
cannot resolve 'every('true')' due to data type mismatch: Input to function 'every' should have been boolean, but it's [string].; line 1 pos 7
cannot resolve 'bool_and('true')' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [string].; line 1 pos 7
-- !query 43
@ -416,7 +416,7 @@ SELECT bool_and(1.0)
struct<>
-- !query 43 output
org.apache.spark.sql.AnalysisException
cannot resolve 'every(1.0BD)' due to data type mismatch: Input to function 'every' should have been boolean, but it's [decimal(2,1)].; line 1 pos 7
cannot resolve 'bool_and(1.0BD)' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [decimal(2,1)].; line 1 pos 7
-- !query 44
@ -425,13 +425,13 @@ SELECT bool_or(1.0D)
struct<>
-- !query 44 output
org.apache.spark.sql.AnalysisException
cannot resolve 'any(1.0D)' due to data type mismatch: Input to function 'any' should have been boolean, but it's [double].; line 1 pos 7
cannot resolve 'bool_or(1.0D)' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [double].; line 1 pos 7
-- !query 45
SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
-- !query 45 schema
struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
struct<k:int,v:boolean,bool_and(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
-- !query 45 output
1 false false
1 true false
@ -448,7 +448,7 @@ struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST
-- !query 46
SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
-- !query 46 schema
struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
struct<k:int,v:boolean,bool_or(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
-- !query 46 output
1 false false
1 true true
@ -465,7 +465,7 @@ struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RA
-- !query 47
SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
-- !query 47 schema
struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
struct<k:int,v:boolean,bool_or(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
-- !query 47 output
1 false false
1 true true
@ -482,7 +482,7 @@ struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RA
-- !query 48
SELECT k, v, bool_and(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
-- !query 48 schema
struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
struct<k:int,v:boolean,bool_and(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
-- !query 48 output
1 false false
1 true false
@ -499,7 +499,7 @@ struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST
-- !query 49
SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
-- !query 49 schema
struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
struct<k:int,v:boolean,bool_or(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
-- !query 49 output
1 false false
1 true true

View file

@ -293,7 +293,7 @@ struct<>
-- !query 31
SELECT udf(every(v)), udf(some(v)), any(v) FROM test_agg WHERE 1 = 0
-- !query 31 schema
struct<CAST(udf(cast(every(v) as string)) AS BOOLEAN):boolean,CAST(udf(cast(any(v) as string)) AS BOOLEAN):boolean,any(v):boolean>
struct<CAST(udf(cast(bool_and(v) as string)) AS BOOLEAN):boolean,CAST(udf(cast(bool_or(v) as string)) AS BOOLEAN):boolean,bool_or(v):boolean>
-- !query 31 output
NULL NULL NULL
@ -301,7 +301,7 @@ NULL NULL NULL
-- !query 32
SELECT udf(every(udf(v))), some(v), any(v) FROM test_agg WHERE k = 4
-- !query 32 schema
struct<CAST(udf(cast(every(cast(udf(cast(v as string)) as boolean)) as string)) AS BOOLEAN):boolean,any(v):boolean,any(v):boolean>
struct<CAST(udf(cast(bool_and(cast(udf(cast(v as string)) as boolean)) as string)) AS BOOLEAN):boolean,bool_or(v):boolean,bool_or(v):boolean>
-- !query 32 output
NULL NULL NULL
@ -309,7 +309,7 @@ NULL NULL NULL
-- !query 33
SELECT every(v), udf(some(v)), any(v) FROM test_agg WHERE k = 5
-- !query 33 schema
struct<every(v):boolean,CAST(udf(cast(any(v) as string)) AS BOOLEAN):boolean,any(v):boolean>
struct<bool_and(v):boolean,CAST(udf(cast(bool_or(v) as string)) AS BOOLEAN):boolean,bool_or(v):boolean>
-- !query 33 output
false true true
@ -317,7 +317,7 @@ false true true
-- !query 34
SELECT udf(k), every(v), udf(some(v)), any(v) FROM test_agg GROUP BY udf(k)
-- !query 34 schema
struct<CAST(udf(cast(k as string)) AS INT):int,every(v):boolean,CAST(udf(cast(any(v) as string)) AS BOOLEAN):boolean,any(v):boolean>
struct<CAST(udf(cast(k as string)) AS INT):int,bool_and(v):boolean,CAST(udf(cast(bool_or(v) as string)) AS BOOLEAN):boolean,bool_or(v):boolean>
-- !query 34 output
1 false true true
2 true true true
@ -329,7 +329,7 @@ struct<CAST(udf(cast(k as string)) AS INT):int,every(v):boolean,CAST(udf(cast(an
-- !query 35
SELECT udf(k), every(v) FROM test_agg GROUP BY k HAVING every(v) = false
-- !query 35 schema
struct<CAST(udf(cast(k as string)) AS INT):int,every(v):boolean>
struct<CAST(udf(cast(k as string)) AS INT):int,bool_and(v):boolean>
-- !query 35 output
1 false
3 false
@ -339,7 +339,7 @@ struct<CAST(udf(cast(k as string)) AS INT):int,every(v):boolean>
-- !query 36
SELECT udf(k), udf(every(v)) FROM test_agg GROUP BY udf(k) HAVING every(v) IS NULL
-- !query 36 schema
struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(every(v) as string)) AS BOOLEAN):boolean>
struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(bool_and(v) as string)) AS BOOLEAN):boolean>
-- !query 36 output
4 NULL
@ -380,7 +380,7 @@ SELECT every(udf(1))
struct<>
-- !query 39 output
org.apache.spark.sql.AnalysisException
cannot resolve 'every(CAST(udf(cast(1 as string)) AS INT))' due to data type mismatch: Input to function 'every' should have been boolean, but it's [int].; line 1 pos 7
cannot resolve 'bool_and(CAST(udf(cast(1 as string)) AS INT))' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [int].; line 1 pos 7
-- !query 40
@ -389,7 +389,7 @@ SELECT some(udf(1S))
struct<>
-- !query 40 output
org.apache.spark.sql.AnalysisException
cannot resolve 'any(CAST(udf(cast(1 as string)) AS SMALLINT))' due to data type mismatch: Input to function 'any' should have been boolean, but it's [smallint].; line 1 pos 7
cannot resolve 'bool_or(CAST(udf(cast(1 as string)) AS SMALLINT))' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [smallint].; line 1 pos 7
-- !query 41
@ -398,7 +398,7 @@ SELECT any(udf(1L))
struct<>
-- !query 41 output
org.apache.spark.sql.AnalysisException
cannot resolve 'any(CAST(udf(cast(1 as string)) AS BIGINT))' due to data type mismatch: Input to function 'any' should have been boolean, but it's [bigint].; line 1 pos 7
cannot resolve 'bool_or(CAST(udf(cast(1 as string)) AS BIGINT))' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [bigint].; line 1 pos 7
-- !query 42
@ -407,13 +407,13 @@ SELECT udf(every("true"))
struct<>
-- !query 42 output
org.apache.spark.sql.AnalysisException
cannot resolve 'every('true')' due to data type mismatch: Input to function 'every' should have been boolean, but it's [string].; line 1 pos 11
cannot resolve 'bool_and('true')' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [string].; line 1 pos 11
-- !query 43
SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
-- !query 43 schema
struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
struct<k:int,v:boolean,bool_and(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
-- !query 43 output
1 false false
1 true false
@ -430,7 +430,7 @@ struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST
-- !query 44
SELECT k, udf(udf(v)), some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
-- !query 44 schema
struct<k:int,CAST(udf(cast(cast(udf(cast(v as string)) as boolean) as string)) AS BOOLEAN):boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
struct<k:int,CAST(udf(cast(cast(udf(cast(v as string)) as boolean) as string)) AS BOOLEAN):boolean,bool_or(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
-- !query 44 output
1 false false
1 true true
@ -447,7 +447,7 @@ struct<k:int,CAST(udf(cast(cast(udf(cast(v as string)) as boolean) as string)) A
-- !query 45
SELECT udf(udf(k)), v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
-- !query 45 schema
struct<CAST(udf(cast(cast(udf(cast(k as string)) as int) as string)) AS INT):int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
struct<CAST(udf(cast(cast(udf(cast(k as string)) as int) as string)) AS INT):int,v:boolean,bool_or(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
-- !query 45 output
1 false false
1 true true

View file

@ -95,8 +95,8 @@ class ExplainSuite extends QueryTest with SharedSparkSession {
// plan should show the rewritten aggregate expression.
val df = sql("SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k")
checkKeywordsExistsInExplain(df,
"Aggregate [k#x], [k#x, min(v#x) AS every(v)#x, max(v#x) AS any(v)#x, " +
"max(v#x) AS any(v)#x]")
"Aggregate [k#x], [k#x, min(v#x) AS bool_and(v)#x, max(v#x) AS bool_or(v)#x, " +
"max(v#x) AS bool_or(v)#x]")
}
}