[SPARK-21278][PYSPARK] Upgrade to Py4J 0.10.6
## What changes were proposed in this pull request? This PR aims to bump Py4J in order to fix the following float/double bug. Py4J 0.10.5 fixes this (https://github.com/bartdag/py4j/issues/272) and the latest Py4J is 0.10.6. **BEFORE** ``` >>> df = spark.range(1) >>> df.select(df['id'] + 17.133574204226083).show() +--------------------+ |(id + 17.1335742042)| +--------------------+ | 17.1335742042| +--------------------+ ``` **AFTER** ``` >>> df = spark.range(1) >>> df.select(df['id'] + 17.133574204226083).show() +-------------------------+ |(id + 17.133574204226083)| +-------------------------+ | 17.133574204226083| +-------------------------+ ``` ## How was this patch tested? Manual. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #18546 from dongjoon-hyun/SPARK-21278.
This commit is contained in:
parent
c8e7f445b9
commit
c8d0aba198
2
LICENSE
2
LICENSE
|
@ -263,7 +263,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
|
|||
(New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
|
||||
(The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
|
||||
(The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
|
||||
(The New BSD License) Py4J (net.sf.py4j:py4j:0.10.4 - http://py4j.sourceforge.net/)
|
||||
(The New BSD License) Py4J (net.sf.py4j:py4j:0.10.6 - http://py4j.sourceforge.net/)
|
||||
(Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
|
||||
(BSD licence) sbt and sbt-launch-lib.bash
|
||||
(BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)
|
||||
|
|
|
@ -57,7 +57,7 @@ export PYSPARK_PYTHON
|
|||
|
||||
# Add the PySpark classes to the Python path:
|
||||
export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
|
||||
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:$PYTHONPATH"
|
||||
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.6-src.zip:$PYTHONPATH"
|
||||
|
||||
# Load the PySpark shell.py script when ./pyspark is used interactively:
|
||||
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
|
||||
|
|
|
@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
|
|||
)
|
||||
|
||||
set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
|
||||
set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.4-src.zip;%PYTHONPATH%
|
||||
set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.6-src.zip;%PYTHONPATH%
|
||||
|
||||
set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
|
||||
set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
|
||||
|
|
|
@ -335,7 +335,7 @@
|
|||
<dependency>
|
||||
<groupId>net.sf.py4j</groupId>
|
||||
<artifactId>py4j</artifactId>
|
||||
<version>0.10.4</version>
|
||||
<version>0.10.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
|
|
@ -32,7 +32,7 @@ private[spark] object PythonUtils {
|
|||
val pythonPath = new ArrayBuffer[String]
|
||||
for (sparkHome <- sys.env.get("SPARK_HOME")) {
|
||||
pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
|
||||
pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.4-src.zip").mkString(File.separator)
|
||||
pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.6-src.zip").mkString(File.separator)
|
||||
}
|
||||
pythonPath ++= SparkContext.jarOfObject(this)
|
||||
pythonPath.mkString(File.pathSeparator)
|
||||
|
|
|
@ -156,7 +156,7 @@ parquet-jackson-1.8.2.jar
|
|||
pmml-model-1.2.15.jar
|
||||
pmml-schema-1.2.15.jar
|
||||
protobuf-java-2.5.0.jar
|
||||
py4j-0.10.4.jar
|
||||
py4j-0.10.6.jar
|
||||
pyrolite-4.13.jar
|
||||
scala-compiler-2.11.8.jar
|
||||
scala-library-2.11.8.jar
|
||||
|
|
|
@ -157,7 +157,7 @@ parquet-jackson-1.8.2.jar
|
|||
pmml-model-1.2.15.jar
|
||||
pmml-schema-1.2.15.jar
|
||||
protobuf-java-2.5.0.jar
|
||||
py4j-0.10.4.jar
|
||||
py4j-0.10.6.jar
|
||||
pyrolite-4.13.jar
|
||||
scala-compiler-2.11.8.jar
|
||||
scala-library-2.11.8.jar
|
||||
|
|
|
@ -29,4 +29,4 @@ The Python packaging for Spark is not intended to replace all of the other use c
|
|||
|
||||
## Python Requirements
|
||||
|
||||
At its core PySpark depends on Py4J (currently version 0.10.4), but additional sub-packages have their own requirements (including numpy and pandas).
|
||||
At its core PySpark depends on Py4J (currently version 0.10.6), but additional sub-packages have their own requirements (including numpy and pandas).
|
||||
|
|
|
@ -7,7 +7,7 @@ SPHINXBUILD ?= sphinx-build
|
|||
PAPER ?=
|
||||
BUILDDIR ?= _build
|
||||
|
||||
export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.4-src.zip)
|
||||
export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.6-src.zip)
|
||||
|
||||
# User-friendly check for sphinx-build
|
||||
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
|
||||
|
|
Binary file not shown.
BIN
python/lib/py4j-0.10.6-src.zip
Normal file
BIN
python/lib/py4j-0.10.6-src.zip
Normal file
Binary file not shown.
|
@ -194,7 +194,7 @@ try:
|
|||
'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
|
||||
scripts=scripts,
|
||||
license='http://www.apache.org/licenses/LICENSE-2.0',
|
||||
install_requires=['py4j==0.10.4'],
|
||||
install_requires=['py4j==0.10.6'],
|
||||
setup_requires=['pypandoc'],
|
||||
extras_require={
|
||||
'ml': ['numpy>=1.7'],
|
||||
|
|
|
@ -1124,7 +1124,7 @@ private[spark] class Client(
|
|||
val pyArchivesFile = new File(pyLibPath, "pyspark.zip")
|
||||
require(pyArchivesFile.exists(),
|
||||
s"$pyArchivesFile not found; cannot run pyspark application in YARN mode.")
|
||||
val py4jFile = new File(pyLibPath, "py4j-0.10.4-src.zip")
|
||||
val py4jFile = new File(pyLibPath, "py4j-0.10.6-src.zip")
|
||||
require(py4jFile.exists(),
|
||||
s"$py4jFile not found; cannot run pyspark application in YARN mode.")
|
||||
Seq(pyArchivesFile.getAbsolutePath(), py4jFile.getAbsolutePath())
|
||||
|
|
|
@ -249,7 +249,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
|
|||
// needed locations.
|
||||
val sparkHome = sys.props("spark.test.home")
|
||||
val pythonPath = Seq(
|
||||
s"$sparkHome/python/lib/py4j-0.10.4-src.zip",
|
||||
s"$sparkHome/python/lib/py4j-0.10.6-src.zip",
|
||||
s"$sparkHome/python")
|
||||
val extraEnvVars = Map(
|
||||
"PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator),
|
||||
|
|
|
@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
|
|||
# Add the PySpark classes to the PYTHONPATH:
|
||||
if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then
|
||||
export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
|
||||
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${PYTHONPATH}"
|
||||
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.6-src.zip:${PYTHONPATH}"
|
||||
export PYSPARK_PYTHONPATH_SET=1
|
||||
fi
|
||||
|
|
Loading…
Reference in a new issue