[SPARK-36388][SPARK-36386][PYTHON][FOLLOWUP] Fix DataFrame groupby-rolling and groupby-expanding to follow pandas 1.3

This PR is followup for https://github.com/apache/spark/pull/33646 to add missing tests.

Some tests are missing

No

Unittest

Closes #33776 from itholic/SPARK-36388-followup.

Authored-by: itholic <haejoon.lee@databricks.com>
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
(cherry picked from commit c91ae544fd)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
This commit is contained in:
itholic 2021-08-18 11:17:01 -07:00 committed by Hyukjin Kwon
parent 786d773585
commit 396b76466b
2 changed files with 12 additions and 6 deletions

View file

@ -52,14 +52,17 @@ class OpsOnDiffFramesGroupByExpandingTest(PandasOnSparkTestCase, TestUtils):
psdf = ps.from_pandas(pdf) psdf = ps.from_pandas(pdf)
kkey = ps.from_pandas(pkey) kkey = ps.from_pandas(pkey)
# The behavior of GroupBy.expanding is changed from pandas 1.3.
if LooseVersion(pd.__version__) >= LooseVersion("1.3"): if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
# TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3
pass
else:
self.assert_eq( self.assert_eq(
getattr(psdf.groupby(kkey).expanding(2), f)().sort_index(), getattr(psdf.groupby(kkey).expanding(2), f)().sort_index(),
getattr(pdf.groupby(pkey).expanding(2), f)().sort_index(), getattr(pdf.groupby(pkey).expanding(2), f)().sort_index(),
) )
else:
self.assert_eq(
getattr(psdf.groupby(kkey).expanding(2), f)().sort_index(),
getattr(pdf.groupby(pkey).expanding(2), f)().drop("a", axis=1).sort_index(),
)
self.assert_eq( self.assert_eq(
getattr(psdf.groupby(kkey)["b"].expanding(2), f)().sort_index(), getattr(psdf.groupby(kkey)["b"].expanding(2), f)().sort_index(),

View file

@ -50,14 +50,17 @@ class OpsOnDiffFramesGroupByRollingTest(PandasOnSparkTestCase, TestUtils):
psdf = ps.from_pandas(pdf) psdf = ps.from_pandas(pdf)
kkey = ps.from_pandas(pkey) kkey = ps.from_pandas(pkey)
# The behavior of GroupBy.rolling is changed from pandas 1.3.
if LooseVersion(pd.__version__) >= LooseVersion("1.3"): if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
# TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3
pass
else:
self.assert_eq( self.assert_eq(
getattr(psdf.groupby(kkey).rolling(2), f)().sort_index(), getattr(psdf.groupby(kkey).rolling(2), f)().sort_index(),
getattr(pdf.groupby(pkey).rolling(2), f)().sort_index(), getattr(pdf.groupby(pkey).rolling(2), f)().sort_index(),
) )
else:
self.assert_eq(
getattr(psdf.groupby(kkey).rolling(2), f)().sort_index(),
getattr(pdf.groupby(pkey).rolling(2), f)().drop("a", axis=1).sort_index(),
)
self.assert_eq( self.assert_eq(
getattr(psdf.groupby(kkey)["b"].rolling(2), f)().sort_index(), getattr(psdf.groupby(kkey)["b"].rolling(2), f)().sort_index(),