[MINOR] Avoid hardcoded py4j-0.10.8.1-src.zip in Scala
## What changes were proposed in this pull request? This PR targets to deduplicate hardcoded `py4j-0.10.8.1-src.zip` in order to make py4j upgrade easier. ## How was this patch tested? N/A Closes #24770 from HyukjinKwon/minor-py4j-dedup. Authored-by: HyukjinKwon <gurwls223@apache.org> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
809821a283
commit
8b18ef5c7b
|
@ -27,13 +27,15 @@ import org.apache.spark.SparkContext
|
|||
import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
|
||||
|
||||
private[spark] object PythonUtils {
|
||||
val PY4J_ZIP_NAME = "py4j-0.10.8.1-src.zip"
|
||||
|
||||
/** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or from our JAR */
|
||||
def sparkPythonPath: String = {
|
||||
val pythonPath = new ArrayBuffer[String]
|
||||
for (sparkHome <- sys.env.get("SPARK_HOME")) {
|
||||
pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
|
||||
pythonPath +=
|
||||
Seq(sparkHome, "python", "lib", "py4j-0.10.8.1-src.zip").mkString(File.separator)
|
||||
Seq(sparkHome, "python", "lib", PY4J_ZIP_NAME).mkString(File.separator)
|
||||
}
|
||||
pythonPath ++= SparkContext.jarOfObject(this)
|
||||
pythonPath.mkString(File.pathSeparator)
|
||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.security.AMRMTokenIdentifier
|
|||
import org.apache.hadoop.yarn.util.Records
|
||||
|
||||
import org.apache.spark.{SecurityManager, SparkConf, SparkException}
|
||||
import org.apache.spark.api.python.PythonUtils
|
||||
import org.apache.spark.deploy.{SparkApplication, SparkHadoopUtil}
|
||||
import org.apache.spark.deploy.security.HadoopDelegationTokenManager
|
||||
import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
|
||||
|
@ -1201,7 +1202,7 @@ private[spark] class Client(
|
|||
val pyArchivesFile = new File(pyLibPath, "pyspark.zip")
|
||||
require(pyArchivesFile.exists(),
|
||||
s"$pyArchivesFile not found; cannot run pyspark application in YARN mode.")
|
||||
val py4jFile = new File(pyLibPath, "py4j-0.10.8.1-src.zip")
|
||||
val py4jFile = new File(pyLibPath, PythonUtils.PY4J_ZIP_NAME)
|
||||
require(py4jFile.exists(),
|
||||
s"$py4jFile not found; cannot run pyspark application in YARN mode.")
|
||||
Seq(pyArchivesFile.getAbsolutePath(), py4jFile.getAbsolutePath())
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.scalatest.Matchers
|
|||
import org.scalatest.concurrent.Eventually._
|
||||
|
||||
import org.apache.spark._
|
||||
import org.apache.spark.api.python.PythonUtils
|
||||
import org.apache.spark.deploy.SparkHadoopUtil
|
||||
import org.apache.spark.deploy.yarn.config._
|
||||
import org.apache.spark.internal.Logging
|
||||
|
@ -269,7 +270,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
|
|||
// needed locations.
|
||||
val sparkHome = sys.props("spark.test.home")
|
||||
val pythonPath = Seq(
|
||||
s"$sparkHome/python/lib/py4j-0.10.8.1-src.zip",
|
||||
s"$sparkHome/python/lib/${PythonUtils.PY4J_ZIP_NAME}",
|
||||
s"$sparkHome/python")
|
||||
val extraEnvVars = Map(
|
||||
"PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator),
|
||||
|
|
|
@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
|
|||
import scala.util.Try
|
||||
|
||||
import org.apache.spark.TestUtils
|
||||
import org.apache.spark.api.python.{PythonBroadcast, PythonEvalType, PythonFunction}
|
||||
import org.apache.spark.api.python.{PythonBroadcast, PythonEvalType, PythonFunction, PythonUtils}
|
||||
import org.apache.spark.broadcast.Broadcast
|
||||
import org.apache.spark.internal.config.Tests
|
||||
import org.apache.spark.sql.catalyst.plans.SQLHelper
|
||||
|
@ -74,7 +74,7 @@ object IntegratedUDFTestUtils extends SQLHelper {
|
|||
// It is possible the test is being ran without the build.
|
||||
private lazy val sourcePath = Paths.get(sparkHome, "python").toAbsolutePath
|
||||
private lazy val py4jPath = Paths.get(
|
||||
sparkHome, "python", "lib", "py4j-0.10.8.1-src.zip").toAbsolutePath
|
||||
sparkHome, "python", "lib", PythonUtils.PY4J_ZIP_NAME).toAbsolutePath
|
||||
private lazy val pysparkPythonPath = s"$py4jPath:$sourcePath"
|
||||
|
||||
private lazy val isPythonAvailable: Boolean = TestUtils.testCommandAvailable(pythonExec)
|
||||
|
|
Loading…
Reference in a new issue