2016-11-16 17:22:15 -05:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
#
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
|
|
# this work for additional information regarding copyright ownership.
|
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
# (the "License"); you may not use this file except in compliance with
|
|
|
|
# the License. You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
from __future__ import print_function
|
|
|
|
import glob
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
from setuptools import setup, find_packages
|
|
|
|
from shutil import copyfile, copytree, rmtree
|
|
|
|
|
|
|
|
if sys.version_info < (2, 7):
|
|
|
|
print("Python versions prior to 2.7 are not supported for pip installed PySpark.",
|
|
|
|
file=sys.stderr)
|
2018-03-08 06:38:34 -05:00
|
|
|
sys.exit(-1)
|
2016-11-16 17:22:15 -05:00
|
|
|
|
|
|
|
try:
|
|
|
|
exec(open('pyspark/version.py').read())
|
|
|
|
except IOError:
|
|
|
|
print("Failed to load PySpark version file for packaging. You must be in Spark's python dir.",
|
|
|
|
file=sys.stderr)
|
|
|
|
sys.exit(-1)
|
2018-08-26 22:02:31 -04:00
|
|
|
VERSION = __version__ # noqa
|
2016-11-16 17:22:15 -05:00
|
|
|
# A temporary path so we can access above the Python project root and fetch scripts and jars we need
|
|
|
|
TEMP_PATH = "deps"
|
|
|
|
SPARK_HOME = os.path.abspath("../")
|
|
|
|
|
|
|
|
# Provide guidance about how to use setup.py
|
|
|
|
incorrect_invocation_message = """
|
|
|
|
If you are installing pyspark from spark source, you must first build Spark and
|
|
|
|
run sdist.
|
|
|
|
|
|
|
|
To build Spark with maven you can run:
|
|
|
|
./build/mvn -DskipTests clean package
|
|
|
|
Building the source dist is done in the Python directory:
|
|
|
|
cd python
|
|
|
|
python setup.py sdist
|
|
|
|
pip install dist/*.tar.gz"""
|
|
|
|
|
|
|
|
# Figure out where the jars are we need to package with PySpark.
|
|
|
|
JARS_PATH = glob.glob(os.path.join(SPARK_HOME, "assembly/target/scala-*/jars/"))
|
|
|
|
|
|
|
|
if len(JARS_PATH) == 1:
|
|
|
|
JARS_PATH = JARS_PATH[0]
|
|
|
|
elif (os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1):
|
|
|
|
# Release mode puts the jars in a jars directory
|
|
|
|
JARS_PATH = os.path.join(SPARK_HOME, "jars")
|
|
|
|
elif len(JARS_PATH) > 1:
|
|
|
|
print("Assembly jars exist for multiple scalas ({0}), please cleanup assembly/target".format(
|
|
|
|
JARS_PATH), file=sys.stderr)
|
|
|
|
sys.exit(-1)
|
|
|
|
elif len(JARS_PATH) == 0 and not os.path.exists(TEMP_PATH):
|
|
|
|
print(incorrect_invocation_message, file=sys.stderr)
|
|
|
|
sys.exit(-1)
|
|
|
|
|
|
|
|
EXAMPLES_PATH = os.path.join(SPARK_HOME, "examples/src/main/python")
|
|
|
|
SCRIPTS_PATH = os.path.join(SPARK_HOME, "bin")
|
2016-12-06 17:09:27 -05:00
|
|
|
DATA_PATH = os.path.join(SPARK_HOME, "data")
|
|
|
|
LICENSES_PATH = os.path.join(SPARK_HOME, "licenses")
|
|
|
|
|
2016-11-16 17:22:15 -05:00
|
|
|
SCRIPTS_TARGET = os.path.join(TEMP_PATH, "bin")
|
|
|
|
JARS_TARGET = os.path.join(TEMP_PATH, "jars")
|
|
|
|
EXAMPLES_TARGET = os.path.join(TEMP_PATH, "examples")
|
2016-12-06 17:09:27 -05:00
|
|
|
DATA_TARGET = os.path.join(TEMP_PATH, "data")
|
|
|
|
LICENSES_TARGET = os.path.join(TEMP_PATH, "licenses")
|
2016-11-16 17:22:15 -05:00
|
|
|
|
|
|
|
# Check and see if we are under the spark path in which case we need to build the symlink farm.
|
|
|
|
# This is important because we only want to build the symlink farm while under Spark otherwise we
|
|
|
|
# want to use the symlink farm. And if the symlink farm exists under while under Spark (e.g. a
|
|
|
|
# partially built sdist) we should error and have the user sort it out.
|
|
|
|
in_spark = (os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or
|
|
|
|
(os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1))
|
|
|
|
|
|
|
|
|
|
|
|
def _supports_symlinks():
|
|
|
|
"""Check if the system supports symlinks (e.g. *nix) or not."""
|
|
|
|
return getattr(os, "symlink", None) is not None
|
|
|
|
|
|
|
|
|
|
|
|
if (in_spark):
|
|
|
|
# Construct links for setup
|
|
|
|
try:
|
|
|
|
os.mkdir(TEMP_PATH)
|
|
|
|
except:
|
|
|
|
print("Temp path for symlink to parent already exists {0}".format(TEMP_PATH),
|
|
|
|
file=sys.stderr)
|
2018-03-08 06:38:34 -05:00
|
|
|
sys.exit(-1)
|
2016-11-16 17:22:15 -05:00
|
|
|
|
[SPARK-23319][TESTS] Explicitly specify Pandas and PyArrow versions in PySpark tests (to skip or test)
## What changes were proposed in this pull request?
This PR proposes to explicitly specify Pandas and PyArrow versions in PySpark tests to skip or test.
We declared the extra dependencies:
https://github.com/apache/spark/blob/b8bfce51abf28c66ba1fc67b0f25fe1617c81025/python/setup.py#L204
In case of PyArrow:
Currently we only check if pyarrow is installed or not without checking the version. It already fails to run tests. For example, if PyArrow 0.7.0 is installed:
```
======================================================================
ERROR: test_vectorized_udf_wrong_return_type (pyspark.sql.tests.ScalarPandasUDF)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/.../spark/python/pyspark/sql/tests.py", line 4019, in test_vectorized_udf_wrong_return_type
f = pandas_udf(lambda x: x * 1.0, MapType(LongType(), LongType()))
File "/.../spark/python/pyspark/sql/functions.py", line 2309, in pandas_udf
return _create_udf(f=f, returnType=return_type, evalType=eval_type)
File "/.../spark/python/pyspark/sql/udf.py", line 47, in _create_udf
require_minimum_pyarrow_version()
File "/.../spark/python/pyspark/sql/utils.py", line 132, in require_minimum_pyarrow_version
"however, your version was %s." % pyarrow.__version__)
ImportError: pyarrow >= 0.8.0 must be installed on calling Python process; however, your version was 0.7.0.
----------------------------------------------------------------------
Ran 33 tests in 8.098s
FAILED (errors=33)
```
In case of Pandas:
There are few tests for old Pandas which were tested only when Pandas version was lower, and I rewrote them to be tested when both Pandas version is lower and missing.
## How was this patch tested?
Manually tested by modifying the condition:
```
test_createDataFrame_column_name_encoding (pyspark.sql.tests.ArrowTests) ... skipped 'Pandas >= 1.19.2 must be installed; however, your version was 0.19.2.'
test_createDataFrame_does_not_modify_input (pyspark.sql.tests.ArrowTests) ... skipped 'Pandas >= 1.19.2 must be installed; however, your version was 0.19.2.'
test_createDataFrame_respect_session_timezone (pyspark.sql.tests.ArrowTests) ... skipped 'Pandas >= 1.19.2 must be installed; however, your version was 0.19.2.'
```
```
test_createDataFrame_column_name_encoding (pyspark.sql.tests.ArrowTests) ... skipped 'Pandas >= 0.19.2 must be installed; however, it was not found.'
test_createDataFrame_does_not_modify_input (pyspark.sql.tests.ArrowTests) ... skipped 'Pandas >= 0.19.2 must be installed; however, it was not found.'
test_createDataFrame_respect_session_timezone (pyspark.sql.tests.ArrowTests) ... skipped 'Pandas >= 0.19.2 must be installed; however, it was not found.'
```
```
test_createDataFrame_column_name_encoding (pyspark.sql.tests.ArrowTests) ... skipped 'PyArrow >= 1.8.0 must be installed; however, your version was 0.8.0.'
test_createDataFrame_does_not_modify_input (pyspark.sql.tests.ArrowTests) ... skipped 'PyArrow >= 1.8.0 must be installed; however, your version was 0.8.0.'
test_createDataFrame_respect_session_timezone (pyspark.sql.tests.ArrowTests) ... skipped 'PyArrow >= 1.8.0 must be installed; however, your version was 0.8.0.'
```
```
test_createDataFrame_column_name_encoding (pyspark.sql.tests.ArrowTests) ... skipped 'PyArrow >= 0.8.0 must be installed; however, it was not found.'
test_createDataFrame_does_not_modify_input (pyspark.sql.tests.ArrowTests) ... skipped 'PyArrow >= 0.8.0 must be installed; however, it was not found.'
test_createDataFrame_respect_session_timezone (pyspark.sql.tests.ArrowTests) ... skipped 'PyArrow >= 0.8.0 must be installed; however, it was not found.'
```
Author: hyukjinkwon <gurwls223@gmail.com>
Closes #20487 from HyukjinKwon/pyarrow-pandas-skip.
2018-02-07 09:28:10 -05:00
|
|
|
# If you are changing the versions here, please also change ./python/pyspark/sql/utils.py and
|
|
|
|
# ./python/run-tests.py. In case of Arrow, you should also check ./pom.xml.
|
|
|
|
_minimum_pandas_version = "0.19.2"
|
|
|
|
_minimum_pyarrow_version = "0.8.0"
|
|
|
|
|
2016-11-16 17:22:15 -05:00
|
|
|
try:
|
|
|
|
# We copy the shell script to be under pyspark/python/pyspark so that the launcher scripts
|
|
|
|
# find it where expected. The rest of the files aren't copied because they are accessed
|
|
|
|
# using Python imports instead which will be resolved correctly.
|
|
|
|
try:
|
|
|
|
os.makedirs("pyspark/python/pyspark")
|
|
|
|
except OSError:
|
|
|
|
# Don't worry if the directory already exists.
|
|
|
|
pass
|
|
|
|
copyfile("pyspark/shell.py", "pyspark/python/pyspark/shell.py")
|
|
|
|
|
|
|
|
if (in_spark):
|
|
|
|
# Construct the symlink farm - this is necessary since we can't refer to the path above the
|
|
|
|
# package root and we need to copy the jars and scripts which are up above the python root.
|
|
|
|
if _supports_symlinks():
|
|
|
|
os.symlink(JARS_PATH, JARS_TARGET)
|
|
|
|
os.symlink(SCRIPTS_PATH, SCRIPTS_TARGET)
|
|
|
|
os.symlink(EXAMPLES_PATH, EXAMPLES_TARGET)
|
2016-12-06 17:09:27 -05:00
|
|
|
os.symlink(DATA_PATH, DATA_TARGET)
|
|
|
|
os.symlink(LICENSES_PATH, LICENSES_TARGET)
|
2016-11-16 17:22:15 -05:00
|
|
|
else:
|
|
|
|
# For windows fall back to the slower copytree
|
|
|
|
copytree(JARS_PATH, JARS_TARGET)
|
|
|
|
copytree(SCRIPTS_PATH, SCRIPTS_TARGET)
|
|
|
|
copytree(EXAMPLES_PATH, EXAMPLES_TARGET)
|
2016-12-06 17:09:27 -05:00
|
|
|
copytree(DATA_PATH, DATA_TARGET)
|
|
|
|
copytree(LICENSES_PATH, LICENSES_TARGET)
|
2016-11-16 17:22:15 -05:00
|
|
|
else:
|
|
|
|
# If we are not inside of SPARK_HOME verify we have the required symlink farm
|
|
|
|
if not os.path.exists(JARS_TARGET):
|
|
|
|
print("To build packaging must be in the python directory under the SPARK_HOME.",
|
|
|
|
file=sys.stderr)
|
|
|
|
|
|
|
|
if not os.path.isdir(SCRIPTS_TARGET):
|
|
|
|
print(incorrect_invocation_message, file=sys.stderr)
|
2018-03-08 06:38:34 -05:00
|
|
|
sys.exit(-1)
|
2016-11-16 17:22:15 -05:00
|
|
|
|
|
|
|
# Scripts directive requires a list of each script path and does not take wild cards.
|
|
|
|
script_names = os.listdir(SCRIPTS_TARGET)
|
|
|
|
scripts = list(map(lambda script: os.path.join(SCRIPTS_TARGET, script), script_names))
|
|
|
|
# We add find_spark_home.py to the bin directory we install so that pip installed PySpark
|
|
|
|
# will search for SPARK_HOME with Python.
|
|
|
|
scripts.append("pyspark/find_spark_home.py")
|
|
|
|
|
|
|
|
# Parse the README markdown file into rst for PyPI
|
|
|
|
long_description = "!!!!! missing pandoc do not upload to PyPI !!!!"
|
|
|
|
try:
|
|
|
|
import pypandoc
|
|
|
|
long_description = pypandoc.convert('README.md', 'rst')
|
|
|
|
except ImportError:
|
|
|
|
print("Could not import pypandoc - required to package PySpark", file=sys.stderr)
|
Fixed pandoc dependency issue in python/setup.py
## Problem Description
When pyspark is listed as a dependency of another package, installing
the other package will cause an install failure in pyspark. When the
other package is being installed, pyspark's setup_requires requirements
are installed including pypandoc. Thus, the exception handling on
setup.py:152 does not work because the pypandoc module is indeed
available. However, the pypandoc.convert() function fails if pandoc
itself is not installed (in our use cases it is not). This raises an
OSError that is not handled, and setup fails.
The following is a sample failure:
```
$ which pandoc
$ pip freeze | grep pypandoc
pypandoc==1.4
$ pip install pyspark
Collecting pyspark
Downloading pyspark-2.2.0.post0.tar.gz (188.3MB)
100% |████████████████████████████████| 188.3MB 16.8MB/s
Complete output from command python setup.py egg_info:
Maybe try:
sudo apt-get install pandoc
See http://johnmacfarlane.net/pandoc/installing.html
for installation options
---------------------------------------------------------------
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/tmp/pip-build-mfnizcwa/pyspark/setup.py", line 151, in <module>
long_description = pypandoc.convert('README.md', 'rst')
File "/home/tbeck/.virtualenvs/cem/lib/python3.5/site-packages/pypandoc/__init__.py", line 69, in convert
outputfile=outputfile, filters=filters)
File "/home/tbeck/.virtualenvs/cem/lib/python3.5/site-packages/pypandoc/__init__.py", line 260, in _convert_input
_ensure_pandoc_path()
File "/home/tbeck/.virtualenvs/cem/lib/python3.5/site-packages/pypandoc/__init__.py", line 544, in _ensure_pandoc_path
raise OSError("No pandoc was found: either install pandoc and add it\n"
OSError: No pandoc was found: either install pandoc and add it
to your PATH or or call pypandoc.download_pandoc(...) or
install pypandoc wheels with included pandoc.
----------------------------------------
Command "python setup.py egg_info" failed with error code 1 in /tmp/pip-build-mfnizcwa/pyspark/
```
## What changes were proposed in this pull request?
This change simply adds an additional exception handler for the OSError
that is raised. This allows pyspark to be installed client-side without requiring pandoc to be installed.
## How was this patch tested?
I tested this by building a wheel package of pyspark with the change applied. Then, in a clean virtual environment with pypandoc installed but pandoc not available on the system, I installed pyspark from the wheel.
Here is the output
```
$ pip freeze | grep pypandoc
pypandoc==1.4
$ which pandoc
$ pip install --no-cache-dir ../spark/python/dist/pyspark-2.3.0.dev0-py2.py3-none-any.whl
Processing /home/tbeck/work/spark/python/dist/pyspark-2.3.0.dev0-py2.py3-none-any.whl
Requirement already satisfied: py4j==0.10.6 in /home/tbeck/.virtualenvs/cem/lib/python3.5/site-packages (from pyspark==2.3.0.dev0)
Installing collected packages: pyspark
Successfully installed pyspark-2.3.0.dev0
```
Author: Tucker Beck <tucker.beck@rentrakmail.com>
Closes #18981 from dusktreader/dusktreader/fix-pandoc-dependency-issue-in-setup_py.
2017-09-06 20:38:00 -04:00
|
|
|
except OSError:
|
|
|
|
print("Could not convert - pandoc is not installed", file=sys.stderr)
|
2016-11-16 17:22:15 -05:00
|
|
|
|
|
|
|
setup(
|
|
|
|
name='pyspark',
|
|
|
|
version=VERSION,
|
|
|
|
description='Apache Spark Python API',
|
|
|
|
long_description=long_description,
|
|
|
|
author='Spark Developers',
|
|
|
|
author_email='dev@spark.apache.org',
|
|
|
|
url='https://github.com/apache/spark/tree/master/python',
|
|
|
|
packages=['pyspark',
|
|
|
|
'pyspark.mllib',
|
2017-01-25 17:43:39 -05:00
|
|
|
'pyspark.mllib.linalg',
|
|
|
|
'pyspark.mllib.stat',
|
2016-11-16 17:22:15 -05:00
|
|
|
'pyspark.ml',
|
2017-01-25 17:43:39 -05:00
|
|
|
'pyspark.ml.linalg',
|
|
|
|
'pyspark.ml.param',
|
2016-11-16 17:22:15 -05:00
|
|
|
'pyspark.sql',
|
|
|
|
'pyspark.streaming',
|
|
|
|
'pyspark.bin',
|
|
|
|
'pyspark.jars',
|
|
|
|
'pyspark.python.pyspark',
|
|
|
|
'pyspark.python.lib',
|
2016-12-06 17:09:27 -05:00
|
|
|
'pyspark.data',
|
|
|
|
'pyspark.licenses',
|
2016-11-16 17:22:15 -05:00
|
|
|
'pyspark.examples.src.main.python'],
|
|
|
|
include_package_data=True,
|
|
|
|
package_dir={
|
|
|
|
'pyspark.jars': 'deps/jars',
|
|
|
|
'pyspark.bin': 'deps/bin',
|
|
|
|
'pyspark.python.lib': 'lib',
|
2016-12-06 17:09:27 -05:00
|
|
|
'pyspark.data': 'deps/data',
|
|
|
|
'pyspark.licenses': 'deps/licenses',
|
2016-11-16 17:22:15 -05:00
|
|
|
'pyspark.examples.src.main.python': 'deps/examples',
|
|
|
|
},
|
|
|
|
package_data={
|
|
|
|
'pyspark.jars': ['*.jar'],
|
|
|
|
'pyspark.bin': ['*'],
|
|
|
|
'pyspark.python.lib': ['*.zip'],
|
2016-12-06 17:09:27 -05:00
|
|
|
'pyspark.data': ['*.txt', '*.data'],
|
|
|
|
'pyspark.licenses': ['*.txt'],
|
2016-11-16 17:22:15 -05:00
|
|
|
'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
|
|
|
|
scripts=scripts,
|
|
|
|
license='http://www.apache.org/licenses/LICENSE-2.0',
|
2018-04-13 17:28:24 -04:00
|
|
|
install_requires=['py4j==0.10.7'],
|
2016-11-16 17:22:15 -05:00
|
|
|
setup_requires=['pypandoc'],
|
|
|
|
extras_require={
|
|
|
|
'ml': ['numpy>=1.7'],
|
|
|
|
'mllib': ['numpy>=1.7'],
|
[SPARK-23319][TESTS] Explicitly specify Pandas and PyArrow versions in PySpark tests (to skip or test)
## What changes were proposed in this pull request?
This PR proposes to explicitly specify Pandas and PyArrow versions in PySpark tests to skip or test.
We declared the extra dependencies:
https://github.com/apache/spark/blob/b8bfce51abf28c66ba1fc67b0f25fe1617c81025/python/setup.py#L204
In case of PyArrow:
Currently we only check if pyarrow is installed or not without checking the version. It already fails to run tests. For example, if PyArrow 0.7.0 is installed:
```
======================================================================
ERROR: test_vectorized_udf_wrong_return_type (pyspark.sql.tests.ScalarPandasUDF)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/.../spark/python/pyspark/sql/tests.py", line 4019, in test_vectorized_udf_wrong_return_type
f = pandas_udf(lambda x: x * 1.0, MapType(LongType(), LongType()))
File "/.../spark/python/pyspark/sql/functions.py", line 2309, in pandas_udf
return _create_udf(f=f, returnType=return_type, evalType=eval_type)
File "/.../spark/python/pyspark/sql/udf.py", line 47, in _create_udf
require_minimum_pyarrow_version()
File "/.../spark/python/pyspark/sql/utils.py", line 132, in require_minimum_pyarrow_version
"however, your version was %s." % pyarrow.__version__)
ImportError: pyarrow >= 0.8.0 must be installed on calling Python process; however, your version was 0.7.0.
----------------------------------------------------------------------
Ran 33 tests in 8.098s
FAILED (errors=33)
```
In case of Pandas:
There are few tests for old Pandas which were tested only when Pandas version was lower, and I rewrote them to be tested when both Pandas version is lower and missing.
## How was this patch tested?
Manually tested by modifying the condition:
```
test_createDataFrame_column_name_encoding (pyspark.sql.tests.ArrowTests) ... skipped 'Pandas >= 1.19.2 must be installed; however, your version was 0.19.2.'
test_createDataFrame_does_not_modify_input (pyspark.sql.tests.ArrowTests) ... skipped 'Pandas >= 1.19.2 must be installed; however, your version was 0.19.2.'
test_createDataFrame_respect_session_timezone (pyspark.sql.tests.ArrowTests) ... skipped 'Pandas >= 1.19.2 must be installed; however, your version was 0.19.2.'
```
```
test_createDataFrame_column_name_encoding (pyspark.sql.tests.ArrowTests) ... skipped 'Pandas >= 0.19.2 must be installed; however, it was not found.'
test_createDataFrame_does_not_modify_input (pyspark.sql.tests.ArrowTests) ... skipped 'Pandas >= 0.19.2 must be installed; however, it was not found.'
test_createDataFrame_respect_session_timezone (pyspark.sql.tests.ArrowTests) ... skipped 'Pandas >= 0.19.2 must be installed; however, it was not found.'
```
```
test_createDataFrame_column_name_encoding (pyspark.sql.tests.ArrowTests) ... skipped 'PyArrow >= 1.8.0 must be installed; however, your version was 0.8.0.'
test_createDataFrame_does_not_modify_input (pyspark.sql.tests.ArrowTests) ... skipped 'PyArrow >= 1.8.0 must be installed; however, your version was 0.8.0.'
test_createDataFrame_respect_session_timezone (pyspark.sql.tests.ArrowTests) ... skipped 'PyArrow >= 1.8.0 must be installed; however, your version was 0.8.0.'
```
```
test_createDataFrame_column_name_encoding (pyspark.sql.tests.ArrowTests) ... skipped 'PyArrow >= 0.8.0 must be installed; however, it was not found.'
test_createDataFrame_does_not_modify_input (pyspark.sql.tests.ArrowTests) ... skipped 'PyArrow >= 0.8.0 must be installed; however, it was not found.'
test_createDataFrame_respect_session_timezone (pyspark.sql.tests.ArrowTests) ... skipped 'PyArrow >= 0.8.0 must be installed; however, it was not found.'
```
Author: hyukjinkwon <gurwls223@gmail.com>
Closes #20487 from HyukjinKwon/pyarrow-pandas-skip.
2018-02-07 09:28:10 -05:00
|
|
|
'sql': [
|
|
|
|
'pandas>=%s' % _minimum_pandas_version,
|
|
|
|
'pyarrow>=%s' % _minimum_pyarrow_version,
|
|
|
|
]
|
2016-11-16 17:22:15 -05:00
|
|
|
},
|
|
|
|
classifiers=[
|
|
|
|
'Development Status :: 5 - Production/Stable',
|
|
|
|
'License :: OSI Approved :: Apache Software License',
|
|
|
|
'Programming Language :: Python :: 2.7',
|
|
|
|
'Programming Language :: Python :: 3',
|
|
|
|
'Programming Language :: Python :: 3.4',
|
|
|
|
'Programming Language :: Python :: 3.5',
|
2017-12-27 06:51:26 -05:00
|
|
|
'Programming Language :: Python :: 3.6',
|
2018-07-06 23:37:41 -04:00
|
|
|
'Programming Language :: Python :: 3.7',
|
2016-11-16 17:22:15 -05:00
|
|
|
'Programming Language :: Python :: Implementation :: CPython',
|
|
|
|
'Programming Language :: Python :: Implementation :: PyPy']
|
|
|
|
)
|
|
|
|
finally:
|
|
|
|
# We only cleanup the symlink farm if we were in Spark, otherwise we are installing rather than
|
|
|
|
# packaging.
|
|
|
|
if (in_spark):
|
|
|
|
# Depending on cleaning up the symlink farm or copied version
|
|
|
|
if _supports_symlinks():
|
|
|
|
os.remove(os.path.join(TEMP_PATH, "jars"))
|
|
|
|
os.remove(os.path.join(TEMP_PATH, "bin"))
|
|
|
|
os.remove(os.path.join(TEMP_PATH, "examples"))
|
2016-12-06 17:09:27 -05:00
|
|
|
os.remove(os.path.join(TEMP_PATH, "data"))
|
|
|
|
os.remove(os.path.join(TEMP_PATH, "licenses"))
|
2016-11-16 17:22:15 -05:00
|
|
|
else:
|
|
|
|
rmtree(os.path.join(TEMP_PATH, "jars"))
|
|
|
|
rmtree(os.path.join(TEMP_PATH, "bin"))
|
|
|
|
rmtree(os.path.join(TEMP_PATH, "examples"))
|
2016-12-06 17:09:27 -05:00
|
|
|
rmtree(os.path.join(TEMP_PATH, "data"))
|
|
|
|
rmtree(os.path.join(TEMP_PATH, "licenses"))
|
2016-11-16 17:22:15 -05:00
|
|
|
os.rmdir(TEMP_PATH)
|