[SPARK-36370][PYTHON] _builtin_table directly imported from pandas instead of being redefined

### What changes were proposed in this pull request? Suggesting to refactor the way the _builtin_table is defined in the `python/pyspark/pandas/groupby.py` module. Pandas has recently refactored the way we import the _builtin_table and is now part of the pandas.core.common module instead of being an attribute of the pandas.core.base.SelectionMixin class. ### Why are the changes needed? This change is not fully needed but the current implementation redefines this table within pyspark, so any changes of this table from the pandas library would need to be updated in the pyspark repository as well. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Ran the following command successfully : ```sh python/run-tests --testnames 'pyspark.pandas.tests.test_groupby' ``` Tests passed in 327 seconds Closes #33687 from Cedric-Magnan/_builtin_table_from_pandas. Authored-by: Cedric-Magnan <cedric.magnan@artefact.com> Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
2021-08-17 10:46:49 -07:00 · 2021-08-17 10:46:49 -07:00 · 964dfe254f
parent c0441bb7e8
commit 964dfe254f
1 changed files with 8 additions and 8 deletions
--- a/python/pyspark/pandas/groupby.py
+++ b/python/pyspark/pandas/groupby.py
@ -20,13 +20,13 @@ A wrapper for GroupedData to behave similar to pandas GroupBy.
 """
 from abc import ABCMeta, abstractmethod
 import builtins
 import sys
 import inspect
 from collections import OrderedDict, namedtuple
 from distutils.version import LooseVersion
 from functools import partial
 from itertools import product
 from pkg_resources import parse_version  # type: ignore
 from typing import (
    Any,
    Callable,
@ -44,10 +44,16 @@ from typing import (
    TYPE_CHECKING,
 )
 import numpy as np
 import pandas as pd
 from pandas.api.types import is_hashable, is_list_like
 if parse_version(pd.__version__) >= parse_version("1.3.0"):
    from pandas.core.common import _builtin_table
 else:
    from pandas.core.base import SelectionMixin
    _builtin_table = SelectionMixin._builtin_table
 from pyspark.sql import Column, DataFrame as SparkDataFrame, Window, functions as F
 from pyspark.sql.types import (  # noqa: F401
    DataType,
@ -97,12 +103,6 @@ if TYPE_CHECKING:
 # to keep it the same as pandas
 NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
 _builtin_table = {
    builtins.sum: np.sum,
    builtins.max: np.max,
    builtins.min: np.min,
 }  # type: Dict[Callable, Callable]
 class GroupBy(Generic[FrameLike], metaclass=ABCMeta):
    """