From 396b76466b588644d7e3c420681561ef57184e6c Mon Sep 17 00:00:00 2001 From: itholic Date: Wed, 18 Aug 2021 11:17:01 -0700 Subject: [PATCH] [SPARK-36388][SPARK-36386][PYTHON][FOLLOWUP] Fix DataFrame groupby-rolling and groupby-expanding to follow pandas 1.3 This PR is followup for https://github.com/apache/spark/pull/33646 to add missing tests. Some tests are missing No Unittest Closes #33776 from itholic/SPARK-36388-followup. Authored-by: itholic Signed-off-by: Takuya UESHIN (cherry picked from commit c91ae544fdd44c67fe1e4c73825570dbe71a3206) Signed-off-by: Hyukjin Kwon --- .../tests/test_ops_on_diff_frames_groupby_expanding.py | 9 ++++++--- .../tests/test_ops_on_diff_frames_groupby_rolling.py | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py index 223adeaa48..634cbd7f0b 100644 --- a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py @@ -52,14 +52,17 @@ class OpsOnDiffFramesGroupByExpandingTest(PandasOnSparkTestCase, TestUtils): psdf = ps.from_pandas(pdf) kkey = ps.from_pandas(pkey) + # The behavior of GroupBy.expanding is changed from pandas 1.3. if LooseVersion(pd.__version__) >= LooseVersion("1.3"): - # TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3 - pass - else: self.assert_eq( getattr(psdf.groupby(kkey).expanding(2), f)().sort_index(), getattr(pdf.groupby(pkey).expanding(2), f)().sort_index(), ) + else: + self.assert_eq( + getattr(psdf.groupby(kkey).expanding(2), f)().sort_index(), + getattr(pdf.groupby(pkey).expanding(2), f)().drop("a", axis=1).sort_index(), + ) self.assert_eq( getattr(psdf.groupby(kkey)["b"].expanding(2), f)().sort_index(), diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py index 4f97769b8e..04ea448d80 100644 --- a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py @@ -50,14 +50,17 @@ class OpsOnDiffFramesGroupByRollingTest(PandasOnSparkTestCase, TestUtils): psdf = ps.from_pandas(pdf) kkey = ps.from_pandas(pkey) + # The behavior of GroupBy.rolling is changed from pandas 1.3. if LooseVersion(pd.__version__) >= LooseVersion("1.3"): - # TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3 - pass - else: self.assert_eq( getattr(psdf.groupby(kkey).rolling(2), f)().sort_index(), getattr(pdf.groupby(pkey).rolling(2), f)().sort_index(), ) + else: + self.assert_eq( + getattr(psdf.groupby(kkey).rolling(2), f)().sort_index(), + getattr(pdf.groupby(pkey).rolling(2), f)().drop("a", axis=1).sort_index(), + ) self.assert_eq( getattr(psdf.groupby(kkey)["b"].rolling(2), f)().sort_index(),