[SPARK-33983][PYTHON] Update cloudpickle to v1.6.0
### What changes were proposed in this pull request?
This PR proposes to upgrade cloudpickle from 1.5.0 to 1.6.0.
It virtually contains one fix:
4510be850d
From a cursory look, this isn't a regression, and not even properly supported in Python:
```python
>>> import pickle
>>> pickle.dumps({}.keys())
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: cannot pickle 'dict_keys' object
```
So it seems fine not to backport.
### Why are the changes needed?
To leverage bug fixes from the cloudpickle upstream.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Jenkins build and GitHub actions build will test it out.
Closes #31007 from HyukjinKwon/cloudpickle-upgrade.
Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
This commit is contained in:
parent
414d323d6c
commit
d6322bf70c
|
@ -4,4 +4,8 @@ from __future__ import absolute_import
|
|||
from pyspark.cloudpickle.cloudpickle import * # noqa
|
||||
from pyspark.cloudpickle.cloudpickle_fast import CloudPickler, dumps, dump # noqa
|
||||
|
||||
__version__ = '1.5.0'
|
||||
# Conform to the convention used by python serialization libraries, which
|
||||
# expose their Pickler subclass at top-level under the "Pickler" name.
|
||||
Pickler = CloudPickler
|
||||
|
||||
__version__ = '1.6.0'
|
||||
|
|
|
@ -88,7 +88,7 @@ else:
|
|||
DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
|
||||
|
||||
# Track the provenance of reconstructed dynamic classes to make it possible to
|
||||
# reconstruct instances from the matching singleton class definition when
|
||||
# recontruct instances from the matching singleton class definition when
|
||||
# appropriate and preserve the usual "isinstance" semantics of Python objects.
|
||||
_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
|
||||
_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
|
||||
|
@ -236,7 +236,7 @@ def _extract_code_globals(co):
|
|||
out_names = {names[oparg] for _, oparg in _walk_global_ops(co)}
|
||||
|
||||
# Declaring a function inside another one using the "def ..."
|
||||
# syntax generates a constant code object corresponding to the one
|
||||
# syntax generates a constant code object corresonding to the one
|
||||
# of the nested function's As the nested function may itself need
|
||||
# global variables, we need to introspect its code, extract its
|
||||
# globals, (look for code object in it's co_consts attribute..) and
|
||||
|
@ -457,7 +457,7 @@ if sys.version_info[:2] < (3, 7): # pragma: no branch
|
|||
is_typing = getattr(obj, '__origin__', None) is not None
|
||||
|
||||
# typing_extensions.Literal
|
||||
is_literal = getattr(obj, '__values__', None) is not None
|
||||
is_litteral = getattr(obj, '__values__', None) is not None
|
||||
|
||||
# typing_extensions.Final
|
||||
is_final = getattr(obj, '__type__', None) is not None
|
||||
|
@ -469,7 +469,7 @@ if sys.version_info[:2] < (3, 7): # pragma: no branch
|
|||
getattr(obj, '__result__', None) is not None and
|
||||
getattr(obj, '__args__', None) is not None
|
||||
)
|
||||
return any((is_typing, is_literal, is_final, is_union, is_tuple,
|
||||
return any((is_typing, is_litteral, is_final, is_union, is_tuple,
|
||||
is_callable))
|
||||
|
||||
def _create_parametrized_type_hint(origin, args):
|
||||
|
@ -699,7 +699,7 @@ def _make_skel_func(code, cell_count, base_globals=None):
|
|||
"""
|
||||
# This function is deprecated and should be removed in cloudpickle 1.7
|
||||
warnings.warn(
|
||||
"A pickle file created using an old (<=1.4.1) version of cloudpickle "
|
||||
"A pickle file created using an old (<=1.4.1) version of cloudpicke "
|
||||
"is currently being loaded. This is not supported by cloudpickle and "
|
||||
"will break in cloudpickle 1.7", category=UserWarning
|
||||
)
|
||||
|
@ -828,3 +828,15 @@ def _get_bases(typ):
|
|||
# For regular class objects
|
||||
bases_attr = '__bases__'
|
||||
return getattr(typ, bases_attr)
|
||||
|
||||
|
||||
def _make_dict_keys(obj):
|
||||
return dict.fromkeys(obj).keys()
|
||||
|
||||
|
||||
def _make_dict_values(obj):
|
||||
return {i: _ for i, _ in enumerate(obj)}.values()
|
||||
|
||||
|
||||
def _make_dict_items(obj):
|
||||
return obj.items()
|
||||
|
|
|
@ -6,10 +6,11 @@ previous Python implementation of the Pickler class. Because this functionality
|
|||
is only available for Python versions 3.8+, a lot of backward-compatibility
|
||||
code is also removed.
|
||||
|
||||
Note that the C Pickler subclassing API is CPython-specific. Therefore, some
|
||||
Note that the C Pickler sublassing API is CPython-specific. Therefore, some
|
||||
guards present in cloudpickle.py that were written to handle PyPy specificities
|
||||
are not present in cloudpickle_fast.py
|
||||
"""
|
||||
import _collections_abc
|
||||
import abc
|
||||
import copyreg
|
||||
import io
|
||||
|
@ -33,8 +34,8 @@ from .cloudpickle import (
|
|||
_typevar_reduce, _get_bases, _make_cell, _make_empty_cell, CellType,
|
||||
_is_parametrized_type_hint, PYPY, cell_set,
|
||||
parametrized_type_hint_getinitargs, _create_parametrized_type_hint,
|
||||
builtin_code_type
|
||||
|
||||
builtin_code_type,
|
||||
_make_dict_keys, _make_dict_values, _make_dict_items,
|
||||
)
|
||||
|
||||
|
||||
|
@ -179,7 +180,7 @@ def _class_getstate(obj):
|
|||
clsdict.pop('__weakref__', None)
|
||||
|
||||
if issubclass(type(obj), abc.ABCMeta):
|
||||
# If obj is an instance of an ABCMeta subclass, don't pickle the
|
||||
# If obj is an instance of an ABCMeta subclass, dont pickle the
|
||||
# cache/negative caches populated during isinstance/issubclass
|
||||
# checks, but pickle the list of registered subclasses of obj.
|
||||
clsdict.pop('_abc_cache', None)
|
||||
|
@ -400,6 +401,24 @@ def _class_reduce(obj):
|
|||
return NotImplemented
|
||||
|
||||
|
||||
def _dict_keys_reduce(obj):
|
||||
# Safer not to ship the full dict as sending the rest might
|
||||
# be unintended and could potentially cause leaking of
|
||||
# sensitive information
|
||||
return _make_dict_keys, (list(obj), )
|
||||
|
||||
|
||||
def _dict_values_reduce(obj):
|
||||
# Safer not to ship the full dict as sending the rest might
|
||||
# be unintended and could potentially cause leaking of
|
||||
# sensitive information
|
||||
return _make_dict_values, (list(obj), )
|
||||
|
||||
|
||||
def _dict_items_reduce(obj):
|
||||
return _make_dict_items, (dict(obj), )
|
||||
|
||||
|
||||
# COLLECTIONS OF OBJECTS STATE SETTERS
|
||||
# ------------------------------------
|
||||
# state setters are called at unpickling time, once the object is created and
|
||||
|
@ -407,7 +426,7 @@ def _class_reduce(obj):
|
|||
|
||||
|
||||
def _function_setstate(obj, state):
|
||||
"""Update the state of a dynamic function.
|
||||
"""Update the state of a dynaamic function.
|
||||
|
||||
As __closure__ and __globals__ are readonly attributes of a function, we
|
||||
cannot rely on the native setstate routine of pickle.load_build, that calls
|
||||
|
@ -473,6 +492,10 @@ class CloudPickler(Pickler):
|
|||
_dispatch_table[types.MappingProxyType] = _mappingproxy_reduce
|
||||
_dispatch_table[weakref.WeakSet] = _weakset_reduce
|
||||
_dispatch_table[typing.TypeVar] = _typevar_reduce
|
||||
_dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce
|
||||
_dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
|
||||
_dispatch_table[_collections_abc.dict_items] = _dict_items_reduce
|
||||
|
||||
|
||||
dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)
|
||||
|
||||
|
@ -556,7 +579,7 @@ class CloudPickler(Pickler):
|
|||
# `dispatch` attribute. Earlier versions of the protocol 5 CloudPickler
|
||||
# used `CloudPickler.dispatch` as a class-level attribute storing all
|
||||
# reducers implemented by cloudpickle, but the attribute name was not a
|
||||
# great choice given the meaning of `CloudPickler.dispatch` when
|
||||
# great choice given the meaning of `Cloudpickler.dispatch` when
|
||||
# `CloudPickler` extends the pure-python pickler.
|
||||
dispatch = dispatch_table
|
||||
|
||||
|
@ -630,7 +653,7 @@ class CloudPickler(Pickler):
|
|||
return self._function_reduce(obj)
|
||||
else:
|
||||
# fallback to save_global, including the Pickler's
|
||||
# dispatch_table
|
||||
# distpatch_table
|
||||
return NotImplemented
|
||||
|
||||
else:
|
||||
|
|
Loading…
Reference in a new issue