[SPARK-33983][PYTHON] Update cloudpickle to v1.6.0

### What changes were proposed in this pull request?

This PR proposes to upgrade cloudpickle from 1.5.0 to 1.6.0.
It virtually contains one fix:

4510be850d

From a cursory look, this isn't a regression, and not even properly supported in Python:

```python
>>> import pickle
>>> pickle.dumps({}.keys())
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: cannot pickle 'dict_keys' object
```

So it seems fine not to backport.

### Why are the changes needed?

To leverage bug fixes from the cloudpickle upstream.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Jenkins build and GitHub actions build will test it out.

Closes #31007 from HyukjinKwon/cloudpickle-upgrade.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
HyukjinKwon 2021-01-04 10:36:31 -08:00 committed by Dongjoon Hyun
parent 414d323d6c
commit d6322bf70c
3 changed files with 52 additions and 13 deletions

View file

@ -4,4 +4,8 @@ from __future__ import absolute_import
from pyspark.cloudpickle.cloudpickle import * # noqa
from pyspark.cloudpickle.cloudpickle_fast import CloudPickler, dumps, dump # noqa
__version__ = '1.5.0'
# Conform to the convention used by python serialization libraries, which
# expose their Pickler subclass at top-level under the "Pickler" name.
Pickler = CloudPickler
__version__ = '1.6.0'

View file

@ -88,7 +88,7 @@ else:
DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
# Track the provenance of reconstructed dynamic classes to make it possible to
# reconstruct instances from the matching singleton class definition when
# recontruct instances from the matching singleton class definition when
# appropriate and preserve the usual "isinstance" semantics of Python objects.
_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
@ -236,7 +236,7 @@ def _extract_code_globals(co):
out_names = {names[oparg] for _, oparg in _walk_global_ops(co)}
# Declaring a function inside another one using the "def ..."
# syntax generates a constant code object corresponding to the one
# syntax generates a constant code object corresonding to the one
# of the nested function's As the nested function may itself need
# global variables, we need to introspect its code, extract its
# globals, (look for code object in it's co_consts attribute..) and
@ -457,7 +457,7 @@ if sys.version_info[:2] < (3, 7): # pragma: no branch
is_typing = getattr(obj, '__origin__', None) is not None
# typing_extensions.Literal
is_literal = getattr(obj, '__values__', None) is not None
is_litteral = getattr(obj, '__values__', None) is not None
# typing_extensions.Final
is_final = getattr(obj, '__type__', None) is not None
@ -469,7 +469,7 @@ if sys.version_info[:2] < (3, 7): # pragma: no branch
getattr(obj, '__result__', None) is not None and
getattr(obj, '__args__', None) is not None
)
return any((is_typing, is_literal, is_final, is_union, is_tuple,
return any((is_typing, is_litteral, is_final, is_union, is_tuple,
is_callable))
def _create_parametrized_type_hint(origin, args):
@ -699,7 +699,7 @@ def _make_skel_func(code, cell_count, base_globals=None):
"""
# This function is deprecated and should be removed in cloudpickle 1.7
warnings.warn(
"A pickle file created using an old (<=1.4.1) version of cloudpickle "
"A pickle file created using an old (<=1.4.1) version of cloudpicke "
"is currently being loaded. This is not supported by cloudpickle and "
"will break in cloudpickle 1.7", category=UserWarning
)
@ -828,3 +828,15 @@ def _get_bases(typ):
# For regular class objects
bases_attr = '__bases__'
return getattr(typ, bases_attr)
def _make_dict_keys(obj):
return dict.fromkeys(obj).keys()
def _make_dict_values(obj):
return {i: _ for i, _ in enumerate(obj)}.values()
def _make_dict_items(obj):
return obj.items()

View file

@ -6,10 +6,11 @@ previous Python implementation of the Pickler class. Because this functionality
is only available for Python versions 3.8+, a lot of backward-compatibility
code is also removed.
Note that the C Pickler subclassing API is CPython-specific. Therefore, some
Note that the C Pickler sublassing API is CPython-specific. Therefore, some
guards present in cloudpickle.py that were written to handle PyPy specificities
are not present in cloudpickle_fast.py
"""
import _collections_abc
import abc
import copyreg
import io
@ -33,8 +34,8 @@ from .cloudpickle import (
_typevar_reduce, _get_bases, _make_cell, _make_empty_cell, CellType,
_is_parametrized_type_hint, PYPY, cell_set,
parametrized_type_hint_getinitargs, _create_parametrized_type_hint,
builtin_code_type
builtin_code_type,
_make_dict_keys, _make_dict_values, _make_dict_items,
)
@ -179,7 +180,7 @@ def _class_getstate(obj):
clsdict.pop('__weakref__', None)
if issubclass(type(obj), abc.ABCMeta):
# If obj is an instance of an ABCMeta subclass, don't pickle the
# If obj is an instance of an ABCMeta subclass, dont pickle the
# cache/negative caches populated during isinstance/issubclass
# checks, but pickle the list of registered subclasses of obj.
clsdict.pop('_abc_cache', None)
@ -400,6 +401,24 @@ def _class_reduce(obj):
return NotImplemented
def _dict_keys_reduce(obj):
# Safer not to ship the full dict as sending the rest might
# be unintended and could potentially cause leaking of
# sensitive information
return _make_dict_keys, (list(obj), )
def _dict_values_reduce(obj):
# Safer not to ship the full dict as sending the rest might
# be unintended and could potentially cause leaking of
# sensitive information
return _make_dict_values, (list(obj), )
def _dict_items_reduce(obj):
return _make_dict_items, (dict(obj), )
# COLLECTIONS OF OBJECTS STATE SETTERS
# ------------------------------------
# state setters are called at unpickling time, once the object is created and
@ -407,7 +426,7 @@ def _class_reduce(obj):
def _function_setstate(obj, state):
"""Update the state of a dynamic function.
"""Update the state of a dynaamic function.
As __closure__ and __globals__ are readonly attributes of a function, we
cannot rely on the native setstate routine of pickle.load_build, that calls
@ -473,6 +492,10 @@ class CloudPickler(Pickler):
_dispatch_table[types.MappingProxyType] = _mappingproxy_reduce
_dispatch_table[weakref.WeakSet] = _weakset_reduce
_dispatch_table[typing.TypeVar] = _typevar_reduce
_dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce
_dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
_dispatch_table[_collections_abc.dict_items] = _dict_items_reduce
dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)
@ -556,7 +579,7 @@ class CloudPickler(Pickler):
# `dispatch` attribute. Earlier versions of the protocol 5 CloudPickler
# used `CloudPickler.dispatch` as a class-level attribute storing all
# reducers implemented by cloudpickle, but the attribute name was not a
# great choice given the meaning of `CloudPickler.dispatch` when
# great choice given the meaning of `Cloudpickler.dispatch` when
# `CloudPickler` extends the pure-python pickler.
dispatch = dispatch_table
@ -630,7 +653,7 @@ class CloudPickler(Pickler):
return self._function_reduce(obj)
else:
# fallback to save_global, including the Pickler's
# dispatch_table
# distpatch_table
return NotImplemented
else: