dc4c351837
## What changes were proposed in this pull request? Move all existing tests to non-installed directory so that it will never run by installing SparkR package For a follow-up PR: - remove all skip_on_cran() calls in tests - clean up test timer - improve or change basic tests that do run on CRAN (if anyone has suggestion) It looks like `R CMD build pkg` will still put pkg\tests (ie. the full tests) into the source package but `R CMD INSTALL` on such source package does not install these tests (and so `R CMD check` does not run them) ## How was this patch tested? - [x] unit tests, Jenkins - [x] AppVeyor - [x] make a source package, install it, `R CMD check` it - verify the full tests are not installed or run Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #18264 from felixcheung/rtestset.
91 lines
3.4 KiB
R
91 lines
3.4 KiB
R
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
context("basic tests for CRAN")
|
|
|
|
test_that("create DataFrame from list or data.frame", {
|
|
sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
|
|
|
|
i <- 4
|
|
df <- createDataFrame(data.frame(dummy = 1:i))
|
|
expect_equal(count(df), i)
|
|
|
|
l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
|
|
df <- createDataFrame(l)
|
|
expect_equal(columns(df), c("a", "b"))
|
|
|
|
a <- 1:3
|
|
b <- c("a", "b", "c")
|
|
ldf <- data.frame(a, b)
|
|
df <- createDataFrame(ldf)
|
|
expect_equal(columns(df), c("a", "b"))
|
|
expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
|
|
expect_equal(count(df), 3)
|
|
ldf2 <- collect(df)
|
|
expect_equal(ldf$a, ldf2$a)
|
|
|
|
mtcarsdf <- createDataFrame(mtcars)
|
|
expect_equivalent(collect(mtcarsdf), mtcars)
|
|
|
|
bytes <- as.raw(c(1, 2, 3))
|
|
df <- createDataFrame(list(list(bytes)))
|
|
expect_equal(collect(df)[[1]][[1]], bytes)
|
|
|
|
sparkR.session.stop()
|
|
})
|
|
|
|
test_that("spark.glm and predict", {
|
|
sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
|
|
|
|
training <- suppressWarnings(createDataFrame(iris))
|
|
# gaussian family
|
|
model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species)
|
|
prediction <- predict(model, training)
|
|
expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
|
|
vals <- collect(select(prediction, "prediction"))
|
|
rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
|
|
expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
|
|
|
|
# Gamma family
|
|
x <- runif(100, -1, 1)
|
|
y <- rgamma(100, rate = 10 / exp(0.5 + 1.2 * x), shape = 10)
|
|
df <- as.DataFrame(as.data.frame(list(x = x, y = y)))
|
|
model <- glm(y ~ x, family = Gamma, df)
|
|
out <- capture.output(print(summary(model)))
|
|
expect_true(any(grepl("Dispersion parameter for gamma family", out)))
|
|
|
|
# tweedie family
|
|
model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species,
|
|
family = "tweedie", var.power = 1.2, link.power = 0.0)
|
|
prediction <- predict(model, training)
|
|
expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
|
|
vals <- collect(select(prediction, "prediction"))
|
|
|
|
# manual calculation of the R predicted values to avoid dependence on statmod
|
|
#' library(statmod)
|
|
#' rModel <- glm(Sepal.Width ~ Sepal.Length + Species, data = iris,
|
|
#' family = tweedie(var.power = 1.2, link.power = 0.0))
|
|
#' print(coef(rModel))
|
|
|
|
rCoef <- c(0.6455409, 0.1169143, -0.3224752, -0.3282174)
|
|
rVals <- exp(as.numeric(model.matrix(Sepal.Width ~ Sepal.Length + Species,
|
|
data = iris) %*% rCoef))
|
|
expect_true(all(abs(rVals - vals) < 1e-5), rVals - vals)
|
|
|
|
sparkR.session.stop()
|
|
})
|