From e15daa31b36669a7e29367e385f28b6ba25acf09 Mon Sep 17 00:00:00 2001
From: Cedric-Magnan <cedric.magnan@artefact.com>
Date: Tue, 17 Aug 2021 10:46:49 -0700
Subject: [PATCH] [SPARK-36370][PYTHON] _builtin_table directly imported from
 pandas instead of being redefined

### What changes were proposed in this pull request?
Suggesting to refactor the way the _builtin_table is defined in the `python/pyspark/pandas/groupby.py` module.
Pandas has recently refactored the way we import the _builtin_table and is now part of the pandas.core.common module instead of being an attribute of the pandas.core.base.SelectionMixin class.

### Why are the changes needed?
This change is not fully needed but the current implementation redefines this table within pyspark, so any changes of this table from the pandas library would need to be updated in the pyspark repository as well.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Ran the following command successfully :
```sh
python/run-tests --testnames 'pyspark.pandas.tests.test_groupby'
```
Tests passed in 327 seconds

Closes #33687 from Cedric-Magnan/_builtin_table_from_pandas.

Authored-by: Cedric-Magnan <cedric.magnan@artefact.com>
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
(cherry picked from commit 964dfe254ff8ebf9d7f5c7115ff8f79da3f28261)
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
---
 python/pyspark/pandas/groupby.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py
index 376592dee3..2daf80f5b0 100644
--- a/python/pyspark/pandas/groupby.py
+++ b/python/pyspark/pandas/groupby.py
@@ -20,13 +20,13 @@ A wrapper for GroupedData to behave similar to pandas GroupBy.
 """
 
 from abc import ABCMeta, abstractmethod
-import builtins
 import sys
 import inspect
 from collections import OrderedDict, namedtuple
 from distutils.version import LooseVersion
 from functools import partial
 from itertools import product
+from pkg_resources import parse_version  # type: ignore
 from typing import (
     Any,
     Callable,
@@ -44,10 +44,16 @@ from typing import (
     TYPE_CHECKING,
 )
 
-import numpy as np
 import pandas as pd
 from pandas.api.types import is_hashable, is_list_like
 
+if parse_version(pd.__version__) >= parse_version("1.3.0"):
+    from pandas.core.common import _builtin_table
+else:
+    from pandas.core.base import SelectionMixin
+
+    _builtin_table = SelectionMixin._builtin_table
+
 from pyspark.sql import Column, DataFrame as SparkDataFrame, Window, functions as F
 from pyspark.sql.types import (  # noqa: F401
     DataType,
@@ -97,12 +103,6 @@ if TYPE_CHECKING:
 # to keep it the same as pandas
 NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
 
-_builtin_table = {
-    builtins.sum: np.sum,
-    builtins.max: np.max,
-    builtins.min: np.min,
-}  # type: Dict[Callable, Callable]
-
 
 class GroupBy(Generic[FrameLike], metaclass=ABCMeta):
     """