[SPARK-12034][SPARKR] Eliminate warnings in SparkR test cases.

This PR:
1. Suppress all known warnings.
2. Cleanup test cases and fix some errors in test cases.
3. Fix errors in HiveContext related test cases. These test cases are actually not run previously due to a bug of creating TestHiveContext.
4. Support 'testthat' package version 0.11.0 which prefers that test cases be under 'tests/testthat'
5. Make sure the default Hadoop file system is local when running test cases.
6. Turn on warnings into errors.

Author: Sun Rui <rui.sun@intel.com>

Closes #10030 from sun-rui/SPARK-12034.
This commit is contained in:
Sun Rui 2015-12-07 10:38:17 -08:00 committed by Shivaram Venkataraman
parent 9cde7d5fa8
commit 39d677c8f1
20 changed files with 56 additions and 45 deletions

View file

@ -20,7 +20,7 @@ runScript <- function() {
sparkHome <- Sys.getenv("SPARK_HOME")
sparkTestJarPath <- "R/lib/SparkR/test_support/sparktestjar_2.10-1.0.jar"
jarPath <- paste("--jars", shQuote(file.path(sparkHome, sparkTestJarPath)))
scriptPath <- file.path(sparkHome, "R/lib/SparkR/tests/jarTest.R")
scriptPath <- file.path(sparkHome, "R/lib/SparkR/tests/testthat/jarTest.R")
submitPath <- file.path(sparkHome, "bin/spark-submit")
res <- system2(command = submitPath,
args = c(jarPath, scriptPath),

View file

@ -26,7 +26,7 @@ sc <- sparkR.init()
sqlContext <- sparkRSQL.init(sc)
test_that("glm and predict", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
test <- select(training, "Sepal_Length")
model <- glm(Sepal_Width ~ Sepal_Length, training, family = "gaussian")
prediction <- predict(model, test)
@ -39,7 +39,7 @@ test_that("glm and predict", {
})
test_that("glm should work with long formula", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
training$LongLongLongLongLongName <- training$Sepal_Width
training$VeryLongLongLongLonLongName <- training$Sepal_Length
training$AnotherLongLongLongLongName <- training$Species
@ -51,7 +51,7 @@ test_that("glm should work with long formula", {
})
test_that("predictions match with native glm", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)
vals <- collect(select(predict(model, training), "prediction"))
rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
@ -59,7 +59,7 @@ test_that("predictions match with native glm", {
})
test_that("dot minus and intercept vs native glm", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
model <- glm(Sepal_Width ~ . - Species + 0, data = training)
vals <- collect(select(predict(model, training), "prediction"))
rVals <- predict(glm(Sepal.Width ~ . - Species + 0, data = iris), iris)
@ -67,7 +67,7 @@ test_that("dot minus and intercept vs native glm", {
})
test_that("feature interaction vs native glm", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
model <- glm(Sepal_Width ~ Species:Sepal_Length, data = training)
vals <- collect(select(predict(model, training), "prediction"))
rVals <- predict(glm(Sepal.Width ~ Species:Sepal.Length, data = iris), iris)
@ -75,7 +75,7 @@ test_that("feature interaction vs native glm", {
})
test_that("summary coefficients match with native glm", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
stats <- summary(glm(Sepal_Width ~ Sepal_Length + Species, data = training, solver = "normal"))
coefs <- unlist(stats$coefficients)
devianceResiduals <- unlist(stats$devianceResiduals)
@ -92,7 +92,7 @@ test_that("summary coefficients match with native glm", {
})
test_that("summary coefficients match with native glm of family 'binomial'", {
df <- createDataFrame(sqlContext, iris)
df <- suppressWarnings(createDataFrame(sqlContext, iris))
training <- filter(df, df$Species != "setosa")
stats <- summary(glm(Species ~ Sepal_Length + Sepal_Width, data = training,
family = "binomial"))

View file

@ -133,7 +133,32 @@ test_that("create DataFrame from RDD", {
expect_equal(columns(df), c("a", "b"))
expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
df <- jsonFile(sqlContext, jsonPathNa)
schema <- structType(structField("name", "string"), structField("age", "integer"),
structField("height", "float"))
df <- read.df(sqlContext, jsonPathNa, "json", schema)
df2 <- createDataFrame(sqlContext, toRDD(df), schema)
df2AsDF <- as.DataFrame(sqlContext, toRDD(df), schema)
expect_equal(columns(df2), c("name", "age", "height"))
expect_equal(columns(df2AsDF), c("name", "age", "height"))
expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(dtypes(df2AsDF), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(as.list(collect(where(df2, df2$name == "Bob"))),
list(name = "Bob", age = 16, height = 176.5))
expect_equal(as.list(collect(where(df2AsDF, df2AsDF$name == "Bob"))),
list(name = "Bob", age = 16, height = 176.5))
localDF <- data.frame(name=c("John", "Smith", "Sarah"),
age=c(19L, 23L, 18L),
height=c(176.5, 181.4, 173.7))
df <- createDataFrame(sqlContext, localDF, schema)
expect_is(df, "DataFrame")
expect_equal(count(df), 3)
expect_equal(columns(df), c("name", "age", "height"))
expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(as.list(collect(where(df, df$name == "John"))),
list(name = "John", age = 19L, height = 176.5))
ssc <- callJMethod(sc, "sc")
hiveCtx <- tryCatch({
newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
},
@ -141,30 +166,12 @@ test_that("create DataFrame from RDD", {
skip("Hive is not build with SparkSQL, skipped")
})
sql(hiveCtx, "CREATE TABLE people (name string, age double, height float)")
insertInto(df, "people")
expect_equal(sql(hiveCtx, "SELECT age from people WHERE name = 'Bob'"), c(16))
expect_equal(sql(hiveCtx, "SELECT height from people WHERE name ='Bob'"), c(176.5))
schema <- structType(structField("name", "string"), structField("age", "integer"),
structField("height", "float"))
df2 <- createDataFrame(sqlContext, df.toRDD, schema)
df2AsDF <- as.DataFrame(sqlContext, df.toRDD, schema)
expect_equal(columns(df2), c("name", "age", "height"))
expect_equal(columns(df2AsDF), c("name", "age", "height"))
expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(dtypes(df2AsDF), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5))
expect_equal(collect(where(df2AsDF, df2$name == "Bob")), c("Bob", 16, 176.5))
localDF <- data.frame(name=c("John", "Smith", "Sarah"),
age=c(19, 23, 18),
height=c(164.10, 181.4, 173.7))
df <- createDataFrame(sqlContext, localDF, schema)
expect_is(df, "DataFrame")
expect_equal(count(df), 3)
expect_equal(columns(df), c("name", "age", "height"))
expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(collect(where(df, df$name == "John")), c("John", 19, 164.10))
df <- read.df(hiveCtx, jsonPathNa, "json", schema)
invisible(insertInto(df, "people"))
expect_equal(collect(sql(hiveCtx, "SELECT age from people WHERE name = 'Bob'"))$age,
c(16))
expect_equal(collect(sql(hiveCtx, "SELECT height from people WHERE name ='Bob'"))$height,
c(176.5))
})
test_that("convert NAs to null type in DataFrames", {
@ -250,7 +257,7 @@ test_that("create DataFrame from list or data.frame", {
ldf2 <- collect(df)
expect_equal(ldf$a, ldf2$a)
irisdf <- createDataFrame(sqlContext, iris)
irisdf <- suppressWarnings(createDataFrame(sqlContext, iris))
iris_collected <- collect(irisdf)
expect_equivalent(iris_collected[,-5], iris[,-5])
expect_equal(iris_collected$Species, as.character(iris$Species))
@ -463,7 +470,7 @@ test_that("union on two RDDs created from DataFrames returns an RRDD", {
RDD2 <- toRDD(df)
unioned <- unionRDD(RDD1, RDD2)
expect_is(unioned, "RDD")
expect_equal(SparkR:::getSerializedMode(unioned), "byte")
expect_equal(getSerializedMode(unioned), "byte")
expect_equal(collect(unioned)[[2]]$name, "Andy")
})
@ -485,13 +492,13 @@ test_that("union on mixed serialization types correctly returns a byte RRDD", {
unionByte <- unionRDD(rdd, dfRDD)
expect_is(unionByte, "RDD")
expect_equal(SparkR:::getSerializedMode(unionByte), "byte")
expect_equal(getSerializedMode(unionByte), "byte")
expect_equal(collect(unionByte)[[1]], 1)
expect_equal(collect(unionByte)[[12]]$name, "Andy")
unionString <- unionRDD(textRDD, dfRDD)
expect_is(unionString, "RDD")
expect_equal(SparkR:::getSerializedMode(unionString), "byte")
expect_equal(getSerializedMode(unionString), "byte")
expect_equal(collect(unionString)[[1]], "Michael")
expect_equal(collect(unionString)[[5]]$name, "Andy")
})
@ -504,7 +511,7 @@ test_that("objectFile() works with row serialization", {
objectIn <- objectFile(sc, objectPath)
expect_is(objectIn, "RDD")
expect_equal(SparkR:::getSerializedMode(objectIn), "byte")
expect_equal(getSerializedMode(objectIn), "byte")
expect_equal(collect(objectIn)[[2]]$age, 30)
})
@ -849,6 +856,7 @@ test_that("write.df() as parquet file", {
})
test_that("test HiveContext", {
ssc <- callJMethod(sc, "sc")
hiveCtx <- tryCatch({
newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
},
@ -863,10 +871,10 @@ test_that("test HiveContext", {
expect_equal(count(df2), 3)
jsonPath2 <- tempfile(pattern="sparkr-test", fileext=".tmp")
saveAsTable(df, "json", "json", "append", path = jsonPath2)
df3 <- sql(hiveCtx, "select * from json")
invisible(saveAsTable(df, "json2", "json", "append", path = jsonPath2))
df3 <- sql(hiveCtx, "select * from json2")
expect_is(df3, "DataFrame")
expect_equal(count(df3), 6)
expect_equal(count(df3), 3)
})
test_that("column operators", {
@ -1311,7 +1319,7 @@ test_that("toJSON() returns an RDD of the correct values", {
df <- jsonFile(sqlContext, jsonPath)
testRDD <- toJSON(df)
expect_is(testRDD, "RDD")
expect_equal(SparkR:::getSerializedMode(testRDD), "string")
expect_equal(getSerializedMode(testRDD), "string")
expect_equal(collect(testRDD)[[1]], mockLines[1])
})
@ -1641,7 +1649,7 @@ test_that("SQL error message is returned from JVM", {
expect_equal(grepl("Table not found: blah", retError), TRUE)
})
irisDF <- createDataFrame(sqlContext, iris)
irisDF <- suppressWarnings(createDataFrame(sqlContext, iris))
test_that("Method as.data.frame as a synonym for collect()", {
expect_equal(as.data.frame(irisDF), collect(irisDF))
@ -1670,7 +1678,7 @@ test_that("attach() on a DataFrame", {
})
test_that("with() on a DataFrame", {
df <- createDataFrame(sqlContext, iris)
df <- suppressWarnings(createDataFrame(sqlContext, iris))
expect_error(Sepal_Length)
sum1 <- with(df, list(summary(Sepal_Length), summary(Sepal_Width)))
expect_equal(collect(sum1[[1]])[1, "Sepal_Length"], "150")

View file

@ -18,4 +18,7 @@
library(testthat)
library(SparkR)
# Turn all warnings into errors
options("warn" = 2)
test_package("SparkR")

View file

@ -23,7 +23,7 @@ FAILED=0
LOGFILE=$FWDIR/unit-tests.out
rm -f $LOGFILE
SPARK_TESTING=1 $FWDIR/../bin/sparkR --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
SPARK_TESTING=1 $FWDIR/../bin/sparkR --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.default.name="file:///" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
FAILED=$((PIPESTATUS[0]||$FAILED))
if [[ $FAILED != 0 ]]; then