[SPARK-25891][PYTHON] Upgrade to Py4J 0.10.8.1

## What changes were proposed in this pull request?

Py4J 0.10.8.1 is released on October 21st and is the first release of Py4J to support Python 3.7 officially. We had better have this to get the official support. Also, there are some patches related to garbage collections.

https://www.py4j.org/changelog.html#py4j-0-10-8-and-py4j-0-10-8-1

## How was this patch tested?

Pass the Jenkins.

Closes #22901 from dongjoon-hyun/SPARK-25891.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
This commit is contained in:
Dongjoon Hyun 2018-10-31 09:55:03 -07:00
parent b3af917e76
commit e4cb42ad89
No known key found for this signature in database
GPG key ID: EDA00CE834F0FC5C
14 changed files with 13 additions and 12 deletions

View file

@ -57,7 +57,7 @@ export PYSPARK_PYTHON
# Add the PySpark classes to the Python path:
export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:$PYTHONPATH"
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.8.1-src.zip:$PYTHONPATH"
# Load the PySpark shell.py script when ./pyspark is used interactively:
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"

View file

@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
)
set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.7-src.zip;%PYTHONPATH%
set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.8.1-src.zip;%PYTHONPATH%
set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py

View file

@ -350,7 +350,7 @@
<dependency>
<groupId>net.sf.py4j</groupId>
<artifactId>py4j</artifactId>
<version>0.10.7</version>
<version>0.10.8.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>

View file

@ -32,7 +32,8 @@ private[spark] object PythonUtils {
val pythonPath = new ArrayBuffer[String]
for (sparkHome <- sys.env.get("SPARK_HOME")) {
pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.7-src.zip").mkString(File.separator)
pythonPath +=
Seq(sparkHome, "python", "lib", "py4j-0.10.8.1-src.zip").mkString(File.separator)
}
pythonPath ++= SparkContext.jarOfObject(this)
pythonPath.mkString(File.pathSeparator)

View file

@ -168,7 +168,7 @@ parquet-hadoop-1.10.0.jar
parquet-hadoop-bundle-1.6.0.jar
parquet-jackson-1.10.0.jar
protobuf-java-2.5.0.jar
py4j-0.10.7.jar
py4j-0.10.8.1.jar
pyrolite-4.13.jar
scala-compiler-2.11.12.jar
scala-library-2.11.12.jar

View file

@ -186,7 +186,7 @@ parquet-hadoop-1.10.0.jar
parquet-hadoop-bundle-1.6.0.jar
parquet-jackson-1.10.0.jar
protobuf-java-2.5.0.jar
py4j-0.10.7.jar
py4j-0.10.8.1.jar
pyrolite-4.13.jar
re2j-1.1.jar
scala-compiler-2.11.12.jar

View file

@ -29,4 +29,4 @@ The Python packaging for Spark is not intended to replace all of the other use c
## Python Requirements
At its core PySpark depends on Py4J (currently version 0.10.7), but some additional sub-packages have their own extra requirements for some features (including numpy, pandas, and pyarrow).
At its core PySpark depends on Py4J (currently version 0.10.8.1), but some additional sub-packages have their own extra requirements for some features (including numpy, pandas, and pyarrow).

View file

@ -37,7 +37,7 @@ BUILDDIR ?= _build
# 2. If both are set, SPHINXBUILD has a higher priority over SPHINXPYTHON
# 3. By default, SPHINXBUILD is used as 'sphinx-build'.
export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.7-src.zip)
export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.8.1-src.zip)
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4

Binary file not shown.

Binary file not shown.

View file

@ -201,7 +201,7 @@ try:
'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
scripts=scripts,
license='http://www.apache.org/licenses/LICENSE-2.0',
install_requires=['py4j==0.10.7'],
install_requires=['py4j==0.10.8.1'],
setup_requires=['pypandoc'],
extras_require={
'ml': ['numpy>=1.7'],

View file

@ -1169,7 +1169,7 @@ private[spark] class Client(
val pyArchivesFile = new File(pyLibPath, "pyspark.zip")
require(pyArchivesFile.exists(),
s"$pyArchivesFile not found; cannot run pyspark application in YARN mode.")
val py4jFile = new File(pyLibPath, "py4j-0.10.7-src.zip")
val py4jFile = new File(pyLibPath, "py4j-0.10.8.1-src.zip")
require(py4jFile.exists(),
s"$py4jFile not found; cannot run pyspark application in YARN mode.")
Seq(pyArchivesFile.getAbsolutePath(), py4jFile.getAbsolutePath())

View file

@ -265,7 +265,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
// needed locations.
val sparkHome = sys.props("spark.test.home")
val pythonPath = Seq(
s"$sparkHome/python/lib/py4j-0.10.7-src.zip",
s"$sparkHome/python/lib/py4j-0.10.8.1-src.zip",
s"$sparkHome/python")
val extraEnvVars = Map(
"PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator),

View file

@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
# Add the PySpark classes to the PYTHONPATH:
if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then
export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:${PYTHONPATH}"
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.8.1-src.zip:${PYTHONPATH}"
export PYSPARK_PYTHONPATH_SET=1
fi