[SPARK-15929] Fix portability of DataFrameSuite path globbing tests

The DataFrameSuite regression tests for SPARK-13774 fail in my environment because they attempt to glob over all of `/mnt` and some of the subdirectories restrictive permissions which cause the test to fail.

This patch rewrites those tests to remove all environment-specific assumptions; the tests now create their own unique temporary paths for use in the tests.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #13649 from JoshRosen/SPARK-15929.
This commit is contained in:
Josh Rosen 2016-06-13 17:06:22 -07:00 committed by Cheng Lian
parent ced8d669b3
commit a6babca1bf

View file

@ -19,6 +19,7 @@ package org.apache.spark.sql
import java.io.File import java.io.File
import java.nio.charset.StandardCharsets import java.nio.charset.StandardCharsets
import java.util.UUID
import scala.language.postfixOps import scala.language.postfixOps
import scala.util.Random import scala.util.Random
@ -35,6 +36,7 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSQLContext} import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSQLContext}
import org.apache.spark.sql.test.SQLTestData.TestData2 import org.apache.spark.sql.test.SQLTestData.TestData2
import org.apache.spark.sql.types._ import org.apache.spark.sql.types._
import org.apache.spark.util.Utils
class DataFrameSuite extends QueryTest with SharedSQLContext { class DataFrameSuite extends QueryTest with SharedSQLContext {
import testImplicits._ import testImplicits._
@ -1495,18 +1497,43 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
} }
test("SPARK-13774: Check error message for non existent path without globbed paths") { test("SPARK-13774: Check error message for non existent path without globbed paths") {
val e = intercept[AnalysisException] (spark.read.format("csv"). val uuid = UUID.randomUUID().toString
load("/xyz/file2", "/xyz/file21", "/abc/files555", "a")).getMessage() val baseDir = Utils.createTempDir()
assert(e.startsWith("Path does not exist")) try {
val e = intercept[AnalysisException] {
spark.read.format("csv").load(
new File(baseDir, "file").getAbsolutePath,
new File(baseDir, "file2").getAbsolutePath,
new File(uuid, "file3").getAbsolutePath,
uuid).rdd
}
assert(e.getMessage.startsWith("Path does not exist"))
} finally {
}
} }
test("SPARK-13774: Check error message for not existent globbed paths") { test("SPARK-13774: Check error message for not existent globbed paths") {
val e = intercept[AnalysisException] (spark.read.format("text"). // Non-existent initial path component:
load( "/xyz/*")).getMessage() val nonExistentBasePath = "/" + UUID.randomUUID().toString
assert(e.startsWith("Path does not exist")) assert(!new File(nonExistentBasePath).exists())
val e = intercept[AnalysisException] {
spark.read.format("text").load(s"$nonExistentBasePath/*")
}
assert(e.getMessage.startsWith("Path does not exist"))
val e1 = intercept[AnalysisException] (spark.read.json("/mnt/*/*-xyz.json").rdd). // Existent initial path component, but no matching files:
getMessage() val baseDir = Utils.createTempDir()
assert(e1.startsWith("Path does not exist")) val childDir = Utils.createTempDir(baseDir.getAbsolutePath)
assert(childDir.exists())
try {
val e1 = intercept[AnalysisException] {
spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
}
assert(e1.getMessage.startsWith("Path does not exist"))
} finally {
Utils.deleteRecursively(baseDir)
}
} }
} }