[SPARK-26803][PYTHON] Add sbin subdirectory to pyspark

## What changes were proposed in this pull request?

Modifies `setup.py` so that `sbin` subdirectory is included in pyspark

## How was this patch tested?

Manually tested with python 2.7 and python 3.7

```sh
$ ./build/mvn -D skipTests -P hive -P hive-thriftserver -P yarn -P mesos clean package
$ cd python
$ python setup.py sdist
$ pip install  dist/pyspark-2.1.0.dev0.tar.gz
```

Checked manually that `sbin` is now present in install directory.

srowen holdenk

Closes #23715 from oulenz/pyspark_sbin.

Authored-by: Oliver Urs Lenz <oliver.urs.lenz@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
This commit is contained in:
Oliver Urs Lenz 2019-02-27 08:39:55 -06:00 committed by Sean Owen
parent 7912dbb88f
commit 28e1695e17
2 changed files with 12 additions and 0 deletions

View file

@ -17,6 +17,7 @@
global-exclude *.py[cod] __pycache__ .DS_Store
recursive-include deps/jars *.jar
graft deps/bin
recursive-include deps/sbin spark-config.sh spark-daemon.sh start-history-server.sh stop-history-server.sh
recursive-include deps/data *.data *.txt
recursive-include deps/licenses *.txt
recursive-include deps/examples *.py

View file

@ -69,10 +69,12 @@ elif len(JARS_PATH) == 0 and not os.path.exists(TEMP_PATH):
EXAMPLES_PATH = os.path.join(SPARK_HOME, "examples/src/main/python")
SCRIPTS_PATH = os.path.join(SPARK_HOME, "bin")
USER_SCRIPTS_PATH = os.path.join(SPARK_HOME, "sbin")
DATA_PATH = os.path.join(SPARK_HOME, "data")
LICENSES_PATH = os.path.join(SPARK_HOME, "licenses")
SCRIPTS_TARGET = os.path.join(TEMP_PATH, "bin")
USER_SCRIPTS_TARGET = os.path.join(TEMP_PATH, "sbin")
JARS_TARGET = os.path.join(TEMP_PATH, "jars")
EXAMPLES_TARGET = os.path.join(TEMP_PATH, "examples")
DATA_TARGET = os.path.join(TEMP_PATH, "data")
@ -122,6 +124,7 @@ try:
if _supports_symlinks():
os.symlink(JARS_PATH, JARS_TARGET)
os.symlink(SCRIPTS_PATH, SCRIPTS_TARGET)
os.symlink(USER_SCRIPTS_PATH, USER_SCRIPTS_TARGET)
os.symlink(EXAMPLES_PATH, EXAMPLES_TARGET)
os.symlink(DATA_PATH, DATA_TARGET)
os.symlink(LICENSES_PATH, LICENSES_TARGET)
@ -129,6 +132,7 @@ try:
# For windows fall back to the slower copytree
copytree(JARS_PATH, JARS_TARGET)
copytree(SCRIPTS_PATH, SCRIPTS_TARGET)
copytree(USER_SCRIPTS_PATH, USER_SCRIPTS_TARGET)
copytree(EXAMPLES_PATH, EXAMPLES_TARGET)
copytree(DATA_PATH, DATA_TARGET)
copytree(LICENSES_PATH, LICENSES_TARGET)
@ -177,6 +181,7 @@ try:
'pyspark.sql',
'pyspark.streaming',
'pyspark.bin',
'pyspark.sbin',
'pyspark.jars',
'pyspark.python.pyspark',
'pyspark.python.lib',
@ -187,6 +192,7 @@ try:
package_dir={
'pyspark.jars': 'deps/jars',
'pyspark.bin': 'deps/bin',
'pyspark.sbin': 'deps/sbin',
'pyspark.python.lib': 'lib',
'pyspark.data': 'deps/data',
'pyspark.licenses': 'deps/licenses',
@ -195,6 +201,9 @@ try:
package_data={
'pyspark.jars': ['*.jar'],
'pyspark.bin': ['*'],
'pyspark.sbin': ['spark-config.sh', 'spark-daemon.sh',
'start-history-server.sh',
'stop-history-server.sh', ],
'pyspark.python.lib': ['*.zip'],
'pyspark.data': ['*.txt', '*.data'],
'pyspark.licenses': ['*.txt'],
@ -231,12 +240,14 @@ finally:
if _supports_symlinks():
os.remove(os.path.join(TEMP_PATH, "jars"))
os.remove(os.path.join(TEMP_PATH, "bin"))
os.remove(os.path.join(TEMP_PATH, "sbin"))
os.remove(os.path.join(TEMP_PATH, "examples"))
os.remove(os.path.join(TEMP_PATH, "data"))
os.remove(os.path.join(TEMP_PATH, "licenses"))
else:
rmtree(os.path.join(TEMP_PATH, "jars"))
rmtree(os.path.join(TEMP_PATH, "bin"))
rmtree(os.path.join(TEMP_PATH, "sbin"))
rmtree(os.path.join(TEMP_PATH, "examples"))
rmtree(os.path.join(TEMP_PATH, "data"))
rmtree(os.path.join(TEMP_PATH, "licenses"))