[SPARK-35381][R] Fix lambda variable name issues in nested higher order functions at R APIs
### What changes were proposed in this pull request? This PR fixes the same issue as https://github.com/apache/spark/pull/32424 ```r df <- sql("SELECT array(1, 2, 3) as numbers, array('a', 'b', 'c') as letters") collect(select( df, array_transform("numbers", function(number) { array_transform("letters", function(latter) { struct(alias(number, "n"), alias(latter, "l")) }) }) )) ``` **Before:** ``` ... a, a, b, b, c, c, a, a, b, b, c, c, a, a, b, b, c, c ``` **After:** ``` ... 1, a, 1, b, 1, c, 2, a, 2, b, 2, c, 3, a, 3, b, 3, c ``` ### Why are the changes needed? To produce the correct results. ### Does this PR introduce _any_ user-facing change? Yes, it fixes the results to be correct as mentioned above. ### How was this patch tested? Manually tested as above, and unit test was added. Closes #32517 from HyukjinKwon/SPARK-35381. Authored-by: Hyukjin Kwon <gurwls223@apache.org> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
This commit is contained in:
parent
7e3446a204
commit
ecb48ccb7d
|
@ -3670,7 +3670,12 @@ unresolved_named_lambda_var <- function(...) {
|
|||
"org.apache.spark.sql.Column",
|
||||
newJObject(
|
||||
"org.apache.spark.sql.catalyst.expressions.UnresolvedNamedLambdaVariable",
|
||||
list(...)
|
||||
lapply(list(...), function(x) {
|
||||
handledCallJStatic(
|
||||
"org.apache.spark.sql.catalyst.expressions.UnresolvedNamedLambdaVariable",
|
||||
"freshVarName",
|
||||
x)
|
||||
})
|
||||
)
|
||||
)
|
||||
column(jc)
|
||||
|
|
|
@ -2161,6 +2161,20 @@ test_that("higher order functions", {
|
|||
expect_error(array_transform("xs", function(...) 42))
|
||||
})
|
||||
|
||||
test_that("SPARK-34794: lambda vars must be resolved properly in nested higher order functions", {
|
||||
df <- sql("SELECT array(1, 2, 3) as numbers, array('a', 'b', 'c') as letters")
|
||||
ret <- first(select(
|
||||
df,
|
||||
array_transform("numbers", function(number) {
|
||||
array_transform("letters", function(latter) {
|
||||
struct(alias(number, "n"), alias(latter, "l"))
|
||||
})
|
||||
})
|
||||
))
|
||||
|
||||
expect_equal(1, ret[[1]][[1]][[1]][[1]]$n)
|
||||
})
|
||||
|
||||
test_that("group by, agg functions", {
|
||||
df <- read.json(jsonPath)
|
||||
df1 <- agg(df, name = "max", age = "sum")
|
||||
|
|
Loading…
Reference in a new issue