[SPARK-15061][PYSPARK] Upgrade to Py4J 0.10.1

## What changes were proposed in this pull request?

This upgrades to Py4J 0.10.1 which reduces syscal overhead in Java gateway ( see https://github.com/bartdag/py4j/issues/201 ). Related https://issues.apache.org/jira/browse/SPARK-6728 .

## How was this patch tested?

Existing doctests & unit tests pass

Author: Holden Karau <holden@us.ibm.com>

Closes #13064 from holdenk/SPARK-15061-upgrade-to-py4j-0.10.1.
This commit is contained in:
Holden Karau 2016-05-13 08:59:18 +01:00 committed by Sean Owen
parent bdff299f9e
commit 382dbc12bb
16 changed files with 15 additions and 15 deletions

View file

@ -263,7 +263,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
(New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf) (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
(The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net) (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
(The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net) (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
(The New BSD License) Py4J (net.sf.py4j:py4j:0.9.2 - http://py4j.sourceforge.net/) (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.1 - http://py4j.sourceforge.net/)
(Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/) (Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
(BSD licence) sbt and sbt-launch-lib.bash (BSD licence) sbt and sbt-launch-lib.bash
(BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE) (BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)

View file

@ -63,7 +63,7 @@ export PYSPARK_PYTHON
# Add the PySpark classes to the Python path: # Add the PySpark classes to the Python path:
export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH" export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.9.2-src.zip:$PYTHONPATH" export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.1-src.zip:$PYTHONPATH"
# Load the PySpark shell.py script when ./pyspark is used interactively: # Load the PySpark shell.py script when ./pyspark is used interactively:
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP" export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"

View file

@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
) )
set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH% set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.9.2-src.zip;%PYTHONPATH% set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.1-src.zip;%PYTHONPATH%
set OLD_PYTHONSTARTUP=%PYTHONSTARTUP% set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py

View file

@ -328,7 +328,7 @@
<dependency> <dependency>
<groupId>net.sf.py4j</groupId> <groupId>net.sf.py4j</groupId>
<artifactId>py4j</artifactId> <artifactId>py4j</artifactId>
<version>0.9.2</version> <version>0.10.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>

View file

@ -32,7 +32,7 @@ private[spark] object PythonUtils {
val pythonPath = new ArrayBuffer[String] val pythonPath = new ArrayBuffer[String]
for (sparkHome <- sys.env.get("SPARK_HOME")) { for (sparkHome <- sys.env.get("SPARK_HOME")) {
pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator) pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.9.2-src.zip").mkString(File.separator) pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.1-src.zip").mkString(File.separator)
} }
pythonPath ++= SparkContext.jarOfObject(this) pythonPath ++= SparkContext.jarOfObject(this)
pythonPath.mkString(File.pathSeparator) pythonPath.mkString(File.pathSeparator)

View file

@ -140,7 +140,7 @@ pmml-agent-1.2.7.jar
pmml-model-1.2.7.jar pmml-model-1.2.7.jar
pmml-schema-1.2.7.jar pmml-schema-1.2.7.jar
protobuf-java-2.5.0.jar protobuf-java-2.5.0.jar
py4j-0.9.2.jar py4j-0.10.1.jar
pyrolite-4.9.jar pyrolite-4.9.jar
scala-compiler-2.11.8.jar scala-compiler-2.11.8.jar
scala-library-2.11.8.jar scala-library-2.11.8.jar

View file

@ -147,7 +147,7 @@ pmml-agent-1.2.7.jar
pmml-model-1.2.7.jar pmml-model-1.2.7.jar
pmml-schema-1.2.7.jar pmml-schema-1.2.7.jar
protobuf-java-2.5.0.jar protobuf-java-2.5.0.jar
py4j-0.9.2.jar py4j-0.10.1.jar
pyrolite-4.9.jar pyrolite-4.9.jar
scala-compiler-2.11.8.jar scala-compiler-2.11.8.jar
scala-library-2.11.8.jar scala-library-2.11.8.jar

View file

@ -147,7 +147,7 @@ pmml-agent-1.2.7.jar
pmml-model-1.2.7.jar pmml-model-1.2.7.jar
pmml-schema-1.2.7.jar pmml-schema-1.2.7.jar
protobuf-java-2.5.0.jar protobuf-java-2.5.0.jar
py4j-0.9.2.jar py4j-0.10.1.jar
pyrolite-4.9.jar pyrolite-4.9.jar
scala-compiler-2.11.8.jar scala-compiler-2.11.8.jar
scala-library-2.11.8.jar scala-library-2.11.8.jar

View file

@ -155,7 +155,7 @@ pmml-agent-1.2.7.jar
pmml-model-1.2.7.jar pmml-model-1.2.7.jar
pmml-schema-1.2.7.jar pmml-schema-1.2.7.jar
protobuf-java-2.5.0.jar protobuf-java-2.5.0.jar
py4j-0.9.2.jar py4j-0.10.1.jar
pyrolite-4.9.jar pyrolite-4.9.jar
scala-compiler-2.11.8.jar scala-compiler-2.11.8.jar
scala-library-2.11.8.jar scala-library-2.11.8.jar

View file

@ -156,7 +156,7 @@ pmml-agent-1.2.7.jar
pmml-model-1.2.7.jar pmml-model-1.2.7.jar
pmml-schema-1.2.7.jar pmml-schema-1.2.7.jar
protobuf-java-2.5.0.jar protobuf-java-2.5.0.jar
py4j-0.9.2.jar py4j-0.10.1.jar
pyrolite-4.9.jar pyrolite-4.9.jar
scala-compiler-2.11.8.jar scala-compiler-2.11.8.jar
scala-library-2.11.8.jar scala-library-2.11.8.jar

View file

@ -7,7 +7,7 @@ SPHINXBUILD ?= sphinx-build
PAPER ?= PAPER ?=
BUILDDIR ?= _build BUILDDIR ?= _build
export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.9.2-src.zip) export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.1-src.zip)
# User-friendly check for sphinx-build # User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)

Binary file not shown.

Binary file not shown.

View file

@ -27,4 +27,4 @@ fi
export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}" export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
# Add the PySpark classes to the PYTHONPATH: # Add the PySpark classes to the PYTHONPATH:
export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}" export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.9.2-src.zip:${PYTHONPATH}" export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.1-src.zip:${PYTHONPATH}"

View file

@ -1138,9 +1138,9 @@ private[spark] class Client(
val pyArchivesFile = new File(pyLibPath, "pyspark.zip") val pyArchivesFile = new File(pyLibPath, "pyspark.zip")
require(pyArchivesFile.exists(), require(pyArchivesFile.exists(),
"pyspark.zip not found; cannot run pyspark application in YARN mode.") "pyspark.zip not found; cannot run pyspark application in YARN mode.")
val py4jFile = new File(pyLibPath, "py4j-0.9.2-src.zip") val py4jFile = new File(pyLibPath, "py4j-0.10.1-src.zip")
require(py4jFile.exists(), require(py4jFile.exists(),
"py4j-0.9.2-src.zip not found; cannot run pyspark application in YARN mode.") "py4j-0.10.1-src.zip not found; cannot run pyspark application in YARN mode.")
Seq(pyArchivesFile.getAbsolutePath(), py4jFile.getAbsolutePath()) Seq(pyArchivesFile.getAbsolutePath(), py4jFile.getAbsolutePath())
} }
} }

View file

@ -197,7 +197,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
// needed locations. // needed locations.
val sparkHome = sys.props("spark.test.home") val sparkHome = sys.props("spark.test.home")
val pythonPath = Seq( val pythonPath = Seq(
s"$sparkHome/python/lib/py4j-0.9.2-src.zip", s"$sparkHome/python/lib/py4j-0.10.1-src.zip",
s"$sparkHome/python") s"$sparkHome/python")
val extraEnv = Map( val extraEnv = Map(
"PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator), "PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator),