[SPARK-8498] [SQL] Add regression test for SPARK-8470
**Summary of the problem in SPARK-8470.** When using `HiveContext` to create a data frame of a user case class, Spark throws `scala.reflect.internal.MissingRequirementError` when it tries to infer the schema using reflection. This is caused by `HiveContext` silently overwriting the context class loader containing the user classes.
**What this issue is about.** This issue adds regression tests for SPARK-8470, which is already fixed in #6891. We closed SPARK-8470 as a duplicate because it is a different manifestation of the same problem in SPARK-8368. Due to the complexity of the reproduction, this requires us to pre-package a special test jar and include it in the Spark project itself.
I tested this with and without the fix in #6891 and verified that it passes only if the fix is present.
Author: Andrew Or <andrew@databricks.com>
Closes #6909 from andrewor14/SPARK-8498 and squashes the following commits:
5e9d688 [Andrew Or] Add regression test for SPARK-8470
(cherry picked from commit 093c34838d
)
Signed-off-by: Yin Huai <yhuai@databricks.com>
This commit is contained in:
parent
2510365faa
commit
2248ad8b70
|
@ -0,0 +1,43 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.spark.{SparkConf, SparkContext}
|
||||||
|
import org.apache.spark.sql.hive.HiveContext
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Entry point in test application for SPARK-8498.
|
||||||
|
*
|
||||||
|
* This file is not meant to be compiled during tests. It is already included
|
||||||
|
* in a pre-built "test.jar" located in the same directory as this file.
|
||||||
|
* This is included here for reference only and should NOT be modified without
|
||||||
|
* rebuilding the test jar itself.
|
||||||
|
*
|
||||||
|
* This is used in org.apache.spark.sql.hive.HiveSparkSubmitSuite.
|
||||||
|
*/
|
||||||
|
object Main {
|
||||||
|
def main(args: Array[String]) {
|
||||||
|
println("Running regression test for SPARK-8498.")
|
||||||
|
val sc = new SparkContext("local", "testing")
|
||||||
|
val hc = new HiveContext(sc)
|
||||||
|
// This line should not throw scala.reflect.internal.MissingRequirementError.
|
||||||
|
// See SPARK-8470 for more detail.
|
||||||
|
val df = hc.createDataFrame(Seq(MyCoolClass("1", "2", "3")))
|
||||||
|
df.collect()
|
||||||
|
println("Regression test for SPARK-8498 success!")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Dummy class used in regression test SPARK-8498. */
|
||||||
|
case class MyCoolClass(past: String, present: String, future: String)
|
||||||
|
|
BIN
sql/hive/src/test/resources/regression-test-SPARK-8498/test.jar
Normal file
BIN
sql/hive/src/test/resources/regression-test-SPARK-8498/test.jar
Normal file
Binary file not shown.
|
@ -35,6 +35,8 @@ class HiveSparkSubmitSuite
|
||||||
with ResetSystemProperties
|
with ResetSystemProperties
|
||||||
with Timeouts {
|
with Timeouts {
|
||||||
|
|
||||||
|
// TODO: rewrite these or mark them as slow tests to be run sparingly
|
||||||
|
|
||||||
def beforeAll() {
|
def beforeAll() {
|
||||||
System.setProperty("spark.testing", "true")
|
System.setProperty("spark.testing", "true")
|
||||||
}
|
}
|
||||||
|
@ -65,6 +67,17 @@ class HiveSparkSubmitSuite
|
||||||
runSparkSubmit(args)
|
runSparkSubmit(args)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("SPARK-8498: MissingRequirementError during reflection") {
|
||||||
|
// This test uses a pre-built jar to test SPARK-8498. In a nutshell, this test creates
|
||||||
|
// a HiveContext and uses it to create a data frame from an RDD using reflection.
|
||||||
|
// Before the fix in SPARK-8470, this results in a MissingRequirementError because
|
||||||
|
// the HiveContext code mistakenly overrides the class loader that contains user classes.
|
||||||
|
// For more detail, see sql/hive/src/test/resources/regression-test-SPARK-8498/*scala.
|
||||||
|
val testJar = "sql/hive/src/test/resources/regression-test-SPARK-8498/test.jar"
|
||||||
|
val args = Seq("--class", "Main", testJar)
|
||||||
|
runSparkSubmit(args)
|
||||||
|
}
|
||||||
|
|
||||||
// NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
|
// NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
|
||||||
// This is copied from org.apache.spark.deploy.SparkSubmitSuite
|
// This is copied from org.apache.spark.deploy.SparkSubmitSuite
|
||||||
private def runSparkSubmit(args: Seq[String]): Unit = {
|
private def runSparkSubmit(args: Seq[String]): Unit = {
|
||||||
|
|
Loading…
Reference in a new issue