[MINOR][SQL] Rename config name to spark.sql.analyzer.failAmbiguousSelfJoin.enabled
### What changes were proposed in this pull request? add `.enabled` postfix to `spark.sql.analyzer.failAmbiguousSelfJoin`. ### Why are the changes needed? to follow the existing naming style ### Does this PR introduce any user-facing change? no ### How was this patch tested? not needed Closes #26694 from cloud-fan/conf. Authored-by: Wenchen Fan <wenchen@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
4e073f3c50
commit
e271664a01
|
@ -111,7 +111,7 @@ license: |
|
|||
|
||||
- The result of `java.lang.Math`'s `log`, `log1p`, `exp`, `expm1`, and `pow` may vary across platforms. In Spark 3.0, the result of the equivalent SQL functions (including related SQL functions like `LOG10`) return values consistent with `java.lang.StrictMath`. In virtually all cases this makes no difference in the return value, and the difference is very small, but may not exactly match `java.lang.Math` on x86 platforms in cases like, for example, `log(3.0)`, whose value varies between `Math.log()` and `StrictMath.log()`.
|
||||
|
||||
- Since Spark 3.0, Dataset query fails if it contains ambiguous column reference that is caused by self join. A typical example: `val df1 = ...; val df2 = df1.filter(...);`, then `df1.join(df2, df1("a") > df2("a"))` returns an empty result which is quite confusing. This is because Spark cannot resolve Dataset column references that point to tables being self joined, and `df1("a")` is exactly the same as `df2("a")` in Spark. To restore the behavior before Spark 3.0, you can set `spark.sql.analyzer.failAmbiguousSelfJoin` to `false`.
|
||||
- Since Spark 3.0, Dataset query fails if it contains ambiguous column reference that is caused by self join. A typical example: `val df1 = ...; val df2 = df1.filter(...);`, then `df1.join(df2, df1("a") > df2("a"))` returns an empty result which is quite confusing. This is because Spark cannot resolve Dataset column references that point to tables being self joined, and `df1("a")` is exactly the same as `df2("a")` in Spark. To restore the behavior before Spark 3.0, you can set `spark.sql.analyzer.failAmbiguousSelfJoin.enabled` to `false`.
|
||||
|
||||
- Since Spark 3.0, `Cast` function processes string literals such as 'Infinity', '+Infinity', '-Infinity', 'NaN', 'Inf', '+Inf', '-Inf' in case insensitive manner when casting the literals to `Double` or `Float` type to ensure greater compatibility with other database systems. This behaviour change is illustrated in the table below:
|
||||
<table class="table">
|
||||
|
|
|
@ -875,8 +875,8 @@ object SQLConf {
|
|||
.booleanConf
|
||||
.createWithDefault(true)
|
||||
|
||||
val FAIL_AMBIGUOUS_SELF_JOIN =
|
||||
buildConf("spark.sql.analyzer.failAmbiguousSelfJoin")
|
||||
val FAIL_AMBIGUOUS_SELF_JOIN_ENABLED =
|
||||
buildConf("spark.sql.analyzer.failAmbiguousSelfJoin.enabled")
|
||||
.doc("When true, fail the Dataset query if it contains ambiguous self-join.")
|
||||
.internal()
|
||||
.booleanConf
|
||||
|
|
|
@ -229,7 +229,7 @@ class Dataset[T] private[sql](
|
|||
case _ =>
|
||||
queryExecution.analyzed
|
||||
}
|
||||
if (sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN)) {
|
||||
if (sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) {
|
||||
plan.setTagValue(Dataset.DATASET_ID_TAG, id)
|
||||
}
|
||||
plan
|
||||
|
@ -1337,7 +1337,7 @@ class Dataset[T] private[sql](
|
|||
private def addDataFrameIdToCol(expr: NamedExpression): NamedExpression = {
|
||||
val newExpr = expr transform {
|
||||
case a: AttributeReference
|
||||
if sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN) =>
|
||||
if sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED) =>
|
||||
val metadata = new MetadataBuilder()
|
||||
.withMetadata(a.metadata)
|
||||
.putLong(Dataset.DATASET_ID_KEY, id)
|
||||
|
|
|
@ -71,7 +71,7 @@ class DetectAmbiguousSelfJoin(conf: SQLConf) extends Rule[LogicalPlan] {
|
|||
}
|
||||
|
||||
override def apply(plan: LogicalPlan): LogicalPlan = {
|
||||
if (!conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN)) return plan
|
||||
if (!conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) return plan
|
||||
|
||||
// We always remove the special metadata from `AttributeReference` at the end of this rule, so
|
||||
// Dataset column reference only exists in the root node via Dataset transformations like
|
||||
|
@ -149,7 +149,7 @@ class DetectAmbiguousSelfJoin(conf: SQLConf) extends Rule[LogicalPlan] {
|
|||
"to figure out which one. Please alias the Datasets with different names via " +
|
||||
"`Dataset.as` before joining them, and specify the column using qualified name, e.g. " +
|
||||
"""`df.as("a").join(df.as("b"), $"a.id" > $"b.id")`. You can also set """ +
|
||||
s"${SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key} to false to disable this check.")
|
||||
s"${SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key} to false to disable this check.")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -96,7 +96,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
|
|||
val df2 = df1.filter($"id" > 0)
|
||||
|
||||
withSQLConf(
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "false",
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "false",
|
||||
SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
|
||||
// `df1("id") > df2("id")` is always false.
|
||||
checkAnswer(df1.join(df2, df1("id") > df2("id")), Nil)
|
||||
|
@ -110,7 +110,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
|
|||
}
|
||||
|
||||
withSQLConf(
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
|
||||
SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
|
||||
assertAmbiguousSelfJoin(df1.join(df2, df1("id") > df2("id")))
|
||||
}
|
||||
|
@ -121,7 +121,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
|
|||
val df2 = df1.filter($"id" > 0)
|
||||
|
||||
withSQLConf(
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
|
||||
SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
|
||||
assertAmbiguousSelfJoin(df1.join(df2, df1.colRegex("id") > df2.colRegex("id")))
|
||||
}
|
||||
|
@ -132,7 +132,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
|
|||
val df2 = df1.filter($"a.b" > 0)
|
||||
|
||||
withSQLConf(
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
|
||||
SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
|
||||
assertAmbiguousSelfJoin(df1.join(df2, df1("a.b") > df2("a.c")))
|
||||
}
|
||||
|
@ -143,7 +143,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
|
|||
val df2 = df1.filter($"id" > 0)
|
||||
|
||||
withSQLConf(
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "false",
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "false",
|
||||
SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
|
||||
// `df2("id")` actually points to the column of `df1`.
|
||||
checkAnswer(df1.join(df2).select(df2("id")), Seq(0, 0, 1, 1, 2, 2).map(Row(_)))
|
||||
|
@ -157,7 +157,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
|
|||
}
|
||||
|
||||
withSQLConf(
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
|
||||
SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
|
||||
assertAmbiguousSelfJoin(df1.join(df2).select(df2("id")))
|
||||
}
|
||||
|
@ -170,7 +170,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
|
|||
val df4 = spark.range(1)
|
||||
|
||||
withSQLConf(
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "false",
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "false",
|
||||
SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
|
||||
// `df2("id") < df3("id")` is always false
|
||||
checkAnswer(df1.join(df2).join(df3, df2("id") < df3("id")), Nil)
|
||||
|
@ -196,7 +196,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
|
|||
}
|
||||
|
||||
withSQLConf(
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
|
||||
SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
|
||||
SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
|
||||
assertAmbiguousSelfJoin(df1.join(df2).join(df3, df2("id") < df3("id")))
|
||||
assertAmbiguousSelfJoin(df1.join(df4).join(df2).select(df2("id")))
|
||||
|
|
Loading…
Reference in a new issue