[SPARK-34789][TEST] Introduce Jetty based construct for integration tests where HTTP server is used
### What changes were proposed in this pull request? Introducing a new test construct: ``` withHttpServer() { baseURL => ... } ``` Which starts and stops a Jetty server to serve files via HTTP. Moreover this PR uses this new construct in the test `Run SparkRemoteFileTest using a remote data file`. ### Why are the changes needed? Before this PR github URLs was used like "https://raw.githubusercontent.com/apache/spark/master/data/mllib/pagerank_data.txt". This connects two Spark version in an unhealthy way like connecting the "master" branch which is moving part with the committed test code which is a non-moving (as it might be even released). So this way a test running for an earlier version of Spark expects something (filename, content, path) from a the latter release and what is worse when the moving version is changed the earlier test will break. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit test. Closes #31935 from attilapiros/SPARK-34789. Authored-by: “attilapiros” <piros.attila.zsolt@gmail.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
7ff9d2e3ee
commit
8a3815f722
|
@ -18,7 +18,7 @@
|
|||
package org.apache.spark
|
||||
|
||||
import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream}
|
||||
import java.net.{HttpURLConnection, URI, URL}
|
||||
import java.net.{HttpURLConnection, InetSocketAddress, URI, URL}
|
||||
import java.nio.charset.StandardCharsets
|
||||
import java.nio.file.{Files => JavaFiles, Paths}
|
||||
import java.nio.file.attribute.PosixFilePermission.{OWNER_EXECUTE, OWNER_READ, OWNER_WRITE}
|
||||
|
@ -41,6 +41,11 @@ import scala.util.Try
|
|||
import com.google.common.io.{ByteStreams, Files}
|
||||
import org.apache.commons.lang3.StringUtils
|
||||
import org.apache.log4j.PropertyConfigurator
|
||||
import org.eclipse.jetty.server.Handler
|
||||
import org.eclipse.jetty.server.Server
|
||||
import org.eclipse.jetty.server.handler.DefaultHandler
|
||||
import org.eclipse.jetty.server.handler.HandlerList
|
||||
import org.eclipse.jetty.server.handler.ResourceHandler
|
||||
import org.json4s.JsonAST.JValue
|
||||
import org.json4s.jackson.JsonMethods.{compact, render}
|
||||
|
||||
|
@ -364,6 +369,22 @@ private[spark] object TestUtils {
|
|||
}
|
||||
}
|
||||
|
||||
def withHttpServer(resBaseDir: String = ".")(body: URL => Unit): Unit = {
|
||||
// 0 as port means choosing randomly from the available ports
|
||||
val server = new Server(new InetSocketAddress(Utils.localCanonicalHostName, 0))
|
||||
val resHandler = new ResourceHandler()
|
||||
resHandler.setResourceBase(resBaseDir)
|
||||
val handlers = new HandlerList()
|
||||
handlers.setHandlers(Array[Handler](resHandler, new DefaultHandler()))
|
||||
server.setHandler(handlers)
|
||||
server.start()
|
||||
try {
|
||||
body(server.getURI.toURL)
|
||||
} finally {
|
||||
server.stop()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait until at least `numExecutors` executors are up, or throw `TimeoutException` if the waiting
|
||||
* time elapsed before `numExecutors` executors up. Exposed for testing.
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.spark.deploy.k8s.integrationtest
|
|||
|
||||
import io.fabric8.kubernetes.api.model.Pod
|
||||
|
||||
import org.apache.spark.TestUtils
|
||||
import org.apache.spark.launcher.SparkLauncher
|
||||
|
||||
private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
|
||||
|
@ -99,9 +100,12 @@ private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
|
|||
}
|
||||
|
||||
test("Run SparkRemoteFileTest using a remote data file", k8sTestTag) {
|
||||
sparkAppConf
|
||||
.set("spark.files", REMOTE_PAGE_RANK_DATA_FILE)
|
||||
runSparkRemoteCheckAndVerifyCompletion(appArgs = Array(REMOTE_PAGE_RANK_FILE_NAME))
|
||||
assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
|
||||
TestUtils.withHttpServer(sys.props("spark.test.home")) { baseURL =>
|
||||
sparkAppConf
|
||||
.set("spark.files", baseURL.toString + REMOTE_PAGE_RANK_DATA_FILE)
|
||||
runSparkRemoteCheckAndVerifyCompletion(appArgs = Array(REMOTE_PAGE_RANK_FILE_NAME))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -110,7 +114,6 @@ private[spark] object BasicTestsSuite {
|
|||
val CONTAINER_LOCAL_FILE_DOWNLOAD_PATH = "/var/spark-data/spark-files"
|
||||
val CONTAINER_LOCAL_DOWNLOADED_PAGE_RANK_DATA_FILE =
|
||||
s"$CONTAINER_LOCAL_FILE_DOWNLOAD_PATH/pagerank_data.txt"
|
||||
val REMOTE_PAGE_RANK_DATA_FILE =
|
||||
"https://raw.githubusercontent.com/apache/spark/master/data/mllib/pagerank_data.txt"
|
||||
val REMOTE_PAGE_RANK_DATA_FILE = "data/mllib/pagerank_data.txt"
|
||||
val REMOTE_PAGE_RANK_FILE_NAME = "pagerank_data.txt"
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue