spark-instrumented-optimizer/R/pkg/tests/fulltests/test_Serde.R
neilalex f54b65c15a [SPARK-21727][R] Allow multi-element atomic vector as column type in SparkR DataFrame
## What changes were proposed in this pull request?

A fix to https://issues.apache.org/jira/browse/SPARK-21727, "Operating on an ArrayType in a SparkR DataFrame throws error"

## How was this patch tested?

- Ran tests at R\pkg\tests\run-all.R (see below attached results)
- Tested the following lines in SparkR, which now seem to execute without error:

```
indices <- 1:4
myDf <- data.frame(indices)
myDf$data <- list(rep(0, 20))
mySparkDf <- as.DataFrame(myDf)
collect(mySparkDf)
```

[2018-01-22 SPARK-21727 Test Results.txt](https://github.com/apache/spark/files/1653535/2018-01-22.SPARK-21727.Test.Results.txt)

felixcheung yanboliang sun-rui shivaram

_The contribution is my original work and I license the work to the project under the project’s open source license_

Author: neilalex <neil@neilalex.com>

Closes #20352 from neilalex/neilalex-sparkr-arraytype.
2018-01-23 22:31:14 -08:00

127 lines
4.2 KiB
R

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
context("SerDe functionality")
sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
test_that("SerDe of primitive types", {
x <- callJStatic("SparkRHandler", "echo", 1L)
expect_equal(x, 1L)
expect_equal(class(x), "integer")
x <- callJStatic("SparkRHandler", "echo", 1)
expect_equal(x, 1)
expect_equal(class(x), "numeric")
x <- callJStatic("SparkRHandler", "echo", TRUE)
expect_true(x)
expect_equal(class(x), "logical")
x <- callJStatic("SparkRHandler", "echo", "abc")
expect_equal(x, "abc")
expect_equal(class(x), "character")
})
test_that("SerDe of multi-element primitive vectors inside R data.frame", {
# vector of integers embedded in R data.frame
indices <- 1L:3L
myDf <- data.frame(indices)
myDf$data <- list(rep(0L, 3L))
mySparkDf <- as.DataFrame(myDf)
myResultingDf <- collect(mySparkDf)
myDfListedData <- data.frame(indices)
myDfListedData$data <- list(as.list(rep(0L, 3L)))
expect_equal(myResultingDf, myDfListedData)
expect_equal(class(myResultingDf[["data"]][[1]]), "list")
expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "integer")
# vector of numeric embedded in R data.frame
myDf <- data.frame(indices)
myDf$data <- list(rep(0, 3L))
mySparkDf <- as.DataFrame(myDf)
myResultingDf <- collect(mySparkDf)
myDfListedData <- data.frame(indices)
myDfListedData$data <- list(as.list(rep(0, 3L)))
expect_equal(myResultingDf, myDfListedData)
expect_equal(class(myResultingDf[["data"]][[1]]), "list")
expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "numeric")
# vector of logical embedded in R data.frame
myDf <- data.frame(indices)
myDf$data <- list(rep(TRUE, 3L))
mySparkDf <- as.DataFrame(myDf)
myResultingDf <- collect(mySparkDf)
myDfListedData <- data.frame(indices)
myDfListedData$data <- list(as.list(rep(TRUE, 3L)))
expect_equal(myResultingDf, myDfListedData)
expect_equal(class(myResultingDf[["data"]][[1]]), "list")
expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "logical")
# vector of character embedded in R data.frame
myDf <- data.frame(indices)
myDf$data <- list(rep("abc", 3L))
mySparkDf <- as.DataFrame(myDf)
myResultingDf <- collect(mySparkDf)
myDfListedData <- data.frame(indices)
myDfListedData$data <- list(as.list(rep("abc", 3L)))
expect_equal(myResultingDf, myDfListedData)
expect_equal(class(myResultingDf[["data"]][[1]]), "list")
expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "character")
})
test_that("SerDe of list of primitive types", {
x <- list(1L, 2L, 3L)
y <- callJStatic("SparkRHandler", "echo", x)
expect_equal(x, y)
expect_equal(class(y[[1]]), "integer")
x <- list(1, 2, 3)
y <- callJStatic("SparkRHandler", "echo", x)
expect_equal(x, y)
expect_equal(class(y[[1]]), "numeric")
x <- list(TRUE, FALSE)
y <- callJStatic("SparkRHandler", "echo", x)
expect_equal(x, y)
expect_equal(class(y[[1]]), "logical")
x <- list("a", "b", "c")
y <- callJStatic("SparkRHandler", "echo", x)
expect_equal(x, y)
expect_equal(class(y[[1]]), "character")
# Empty list
x <- list()
y <- callJStatic("SparkRHandler", "echo", x)
expect_equal(x, y)
})
test_that("SerDe of list of lists", {
x <- list(list(1L, 2L, 3L), list(1, 2, 3),
list(TRUE, FALSE), list("a", "b", "c"))
y <- callJStatic("SparkRHandler", "echo", x)
expect_equal(x, y)
# List of empty lists
x <- list(list(), list())
y <- callJStatic("SparkRHandler", "echo", x)
expect_equal(x, y)
})
sparkR.session.stop()