f54b65c15a
## What changes were proposed in this pull request? A fix to https://issues.apache.org/jira/browse/SPARK-21727, "Operating on an ArrayType in a SparkR DataFrame throws error" ## How was this patch tested? - Ran tests at R\pkg\tests\run-all.R (see below attached results) - Tested the following lines in SparkR, which now seem to execute without error: ``` indices <- 1:4 myDf <- data.frame(indices) myDf$data <- list(rep(0, 20)) mySparkDf <- as.DataFrame(myDf) collect(mySparkDf) ``` [2018-01-22 SPARK-21727 Test Results.txt](https://github.com/apache/spark/files/1653535/2018-01-22.SPARK-21727.Test.Results.txt) felixcheung yanboliang sun-rui shivaram _The contribution is my original work and I license the work to the project under the project’s open source license_ Author: neilalex <neil@neilalex.com> Closes #20352 from neilalex/neilalex-sparkr-arraytype.
127 lines
4.2 KiB
R
127 lines
4.2 KiB
R
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
context("SerDe functionality")
|
|
|
|
sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
|
|
|
|
test_that("SerDe of primitive types", {
|
|
x <- callJStatic("SparkRHandler", "echo", 1L)
|
|
expect_equal(x, 1L)
|
|
expect_equal(class(x), "integer")
|
|
|
|
x <- callJStatic("SparkRHandler", "echo", 1)
|
|
expect_equal(x, 1)
|
|
expect_equal(class(x), "numeric")
|
|
|
|
x <- callJStatic("SparkRHandler", "echo", TRUE)
|
|
expect_true(x)
|
|
expect_equal(class(x), "logical")
|
|
|
|
x <- callJStatic("SparkRHandler", "echo", "abc")
|
|
expect_equal(x, "abc")
|
|
expect_equal(class(x), "character")
|
|
})
|
|
|
|
test_that("SerDe of multi-element primitive vectors inside R data.frame", {
|
|
# vector of integers embedded in R data.frame
|
|
indices <- 1L:3L
|
|
myDf <- data.frame(indices)
|
|
myDf$data <- list(rep(0L, 3L))
|
|
mySparkDf <- as.DataFrame(myDf)
|
|
myResultingDf <- collect(mySparkDf)
|
|
myDfListedData <- data.frame(indices)
|
|
myDfListedData$data <- list(as.list(rep(0L, 3L)))
|
|
expect_equal(myResultingDf, myDfListedData)
|
|
expect_equal(class(myResultingDf[["data"]][[1]]), "list")
|
|
expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "integer")
|
|
|
|
# vector of numeric embedded in R data.frame
|
|
myDf <- data.frame(indices)
|
|
myDf$data <- list(rep(0, 3L))
|
|
mySparkDf <- as.DataFrame(myDf)
|
|
myResultingDf <- collect(mySparkDf)
|
|
myDfListedData <- data.frame(indices)
|
|
myDfListedData$data <- list(as.list(rep(0, 3L)))
|
|
expect_equal(myResultingDf, myDfListedData)
|
|
expect_equal(class(myResultingDf[["data"]][[1]]), "list")
|
|
expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "numeric")
|
|
|
|
# vector of logical embedded in R data.frame
|
|
myDf <- data.frame(indices)
|
|
myDf$data <- list(rep(TRUE, 3L))
|
|
mySparkDf <- as.DataFrame(myDf)
|
|
myResultingDf <- collect(mySparkDf)
|
|
myDfListedData <- data.frame(indices)
|
|
myDfListedData$data <- list(as.list(rep(TRUE, 3L)))
|
|
expect_equal(myResultingDf, myDfListedData)
|
|
expect_equal(class(myResultingDf[["data"]][[1]]), "list")
|
|
expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "logical")
|
|
|
|
# vector of character embedded in R data.frame
|
|
myDf <- data.frame(indices)
|
|
myDf$data <- list(rep("abc", 3L))
|
|
mySparkDf <- as.DataFrame(myDf)
|
|
myResultingDf <- collect(mySparkDf)
|
|
myDfListedData <- data.frame(indices)
|
|
myDfListedData$data <- list(as.list(rep("abc", 3L)))
|
|
expect_equal(myResultingDf, myDfListedData)
|
|
expect_equal(class(myResultingDf[["data"]][[1]]), "list")
|
|
expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "character")
|
|
})
|
|
|
|
test_that("SerDe of list of primitive types", {
|
|
x <- list(1L, 2L, 3L)
|
|
y <- callJStatic("SparkRHandler", "echo", x)
|
|
expect_equal(x, y)
|
|
expect_equal(class(y[[1]]), "integer")
|
|
|
|
x <- list(1, 2, 3)
|
|
y <- callJStatic("SparkRHandler", "echo", x)
|
|
expect_equal(x, y)
|
|
expect_equal(class(y[[1]]), "numeric")
|
|
|
|
x <- list(TRUE, FALSE)
|
|
y <- callJStatic("SparkRHandler", "echo", x)
|
|
expect_equal(x, y)
|
|
expect_equal(class(y[[1]]), "logical")
|
|
|
|
x <- list("a", "b", "c")
|
|
y <- callJStatic("SparkRHandler", "echo", x)
|
|
expect_equal(x, y)
|
|
expect_equal(class(y[[1]]), "character")
|
|
|
|
# Empty list
|
|
x <- list()
|
|
y <- callJStatic("SparkRHandler", "echo", x)
|
|
expect_equal(x, y)
|
|
})
|
|
|
|
test_that("SerDe of list of lists", {
|
|
x <- list(list(1L, 2L, 3L), list(1, 2, 3),
|
|
list(TRUE, FALSE), list("a", "b", "c"))
|
|
y <- callJStatic("SparkRHandler", "echo", x)
|
|
expect_equal(x, y)
|
|
|
|
# List of empty lists
|
|
x <- list(list(), list())
|
|
y <- callJStatic("SparkRHandler", "echo", x)
|
|
expect_equal(x, y)
|
|
})
|
|
|
|
sparkR.session.stop()
|