[SPARK-36190][PYTHON] Improve the rest of DataTypeOps tests by avoiding joins
### What changes were proposed in this pull request? Improve the rest of DataTypeOps tests by avoiding joins. ### Why are the changes needed? bool, string, numeric DataTypeOps tests have been improved by avoiding joins. We should improve the rest of the DataTypeOps tests in the same way. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit tests. Closes #33546 from xinrong-databricks/test_no_join. Authored-by: Xinrong Meng <xinrong.meng@databricks.com> Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
This commit is contained in:
parent
eb4d1c0332
commit
9c5cb99d6e
|
@ -19,7 +19,6 @@ import pandas as pd
|
|||
from pandas.api.types import CategoricalDtype
|
||||
|
||||
from pyspark import pandas as ps
|
||||
from pyspark.pandas.config import option_context
|
||||
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
|
||||
from pyspark.testing.pandasutils import PandasOnSparkTestCase
|
||||
|
||||
|
@ -34,74 +33,75 @@ class BinaryOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
return ps.from_pandas(self.pser)
|
||||
|
||||
@property
|
||||
def other_pser(self):
|
||||
return pd.Series([b"2", b"3", b"4"])
|
||||
def byte_pdf(self):
|
||||
psers = {
|
||||
"this": self.pser,
|
||||
"that": pd.Series([b"2", b"3", b"4"]),
|
||||
}
|
||||
return pd.concat(psers, axis=1)
|
||||
|
||||
@property
|
||||
def other_psser(self):
|
||||
return ps.from_pandas(self.other_pser)
|
||||
def byte_psdf(self):
|
||||
return ps.from_pandas(self.byte_pdf)
|
||||
|
||||
def test_add(self):
|
||||
psser = self.psser
|
||||
pser = self.pser
|
||||
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
|
||||
pser, psser = byte_pdf["this"], byte_psdf["this"]
|
||||
other_pser, other_psser = byte_pdf["that"], byte_psdf["that"]
|
||||
|
||||
self.assert_eq(psser + b"1", pser + b"1")
|
||||
self.assert_eq(psser + psser, pser + pser)
|
||||
self.assert_eq(psser + psser.astype("bytes"), pser + pser.astype("bytes"))
|
||||
self.assertRaises(TypeError, lambda: psser + "x")
|
||||
self.assertRaises(TypeError, lambda: psser + 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser + psser)
|
||||
self.assert_eq(self.pser + self.pser, (self.psser + self.psser).sort_index())
|
||||
self.assert_eq(pser + pser, psser + psser)
|
||||
self.assert_eq(pser + other_pser, psser + other_psser)
|
||||
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser + psser)
|
||||
|
||||
def test_sub(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser - "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser - 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser - psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser - psser)
|
||||
|
||||
def test_mul(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser * "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser * 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser * psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser * psser)
|
||||
|
||||
def test_truediv(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser / "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser / 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser / psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser / psser)
|
||||
|
||||
def test_floordiv(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser // "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser // 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser // psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser // psser)
|
||||
|
||||
def test_mod(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser % "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser % 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser % psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser % psser)
|
||||
|
||||
def test_pow(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser ** "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser ** 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser ** psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser ** psser)
|
||||
|
||||
def test_radd(self):
|
||||
self.assert_eq(b"1" + self.psser, b"1" + self.pser)
|
||||
|
@ -177,46 +177,34 @@ class BinaryOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
self.assertRaises(TypeError, lambda: ~self.psser)
|
||||
|
||||
def test_eq(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
|
||||
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
|
||||
self.assert_eq(byte_pdf["this"] == byte_pdf["that"], byte_psdf["this"] == byte_psdf["that"])
|
||||
self.assert_eq(byte_pdf["this"] == byte_pdf["this"], byte_psdf["this"] == byte_psdf["this"])
|
||||
|
||||
def test_ne(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
|
||||
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
|
||||
self.assert_eq(byte_pdf["this"] != byte_pdf["that"], byte_psdf["this"] != byte_psdf["that"])
|
||||
self.assert_eq(byte_pdf["this"] != byte_pdf["this"], byte_psdf["this"] != byte_psdf["this"])
|
||||
|
||||
def test_lt(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser < self.other_pser, (self.psser < self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
|
||||
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
|
||||
self.assert_eq(byte_pdf["this"] < byte_pdf["that"], byte_psdf["this"] < byte_psdf["that"])
|
||||
self.assert_eq(byte_pdf["this"] < byte_pdf["this"], byte_psdf["this"] < byte_psdf["this"])
|
||||
|
||||
def test_le(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser <= self.other_pser, (self.psser <= self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
|
||||
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
|
||||
self.assert_eq(byte_pdf["this"] <= byte_pdf["that"], byte_psdf["this"] <= byte_psdf["that"])
|
||||
self.assert_eq(byte_pdf["this"] <= byte_pdf["this"], byte_psdf["this"] <= byte_psdf["this"])
|
||||
|
||||
def test_gt(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser > self.other_pser, (self.psser > self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
|
||||
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
|
||||
self.assert_eq(byte_pdf["this"] > byte_pdf["that"], byte_psdf["this"] > byte_psdf["that"])
|
||||
self.assert_eq(byte_pdf["this"] > byte_pdf["this"], byte_psdf["this"] > byte_psdf["this"])
|
||||
|
||||
def test_ge(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser >= self.other_pser, (self.psser >= self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
|
||||
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
|
||||
self.assert_eq(byte_pdf["this"] >= byte_pdf["that"], byte_psdf["this"] >= byte_psdf["that"])
|
||||
self.assert_eq(byte_pdf["this"] >= byte_pdf["this"], byte_psdf["this"] >= byte_psdf["this"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -21,50 +21,11 @@ import datetime
|
|||
import pandas as pd
|
||||
|
||||
from pyspark import pandas as ps
|
||||
from pyspark.pandas.config import option_context
|
||||
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
|
||||
from pyspark.testing.pandasutils import PandasOnSparkTestCase
|
||||
|
||||
|
||||
class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
||||
@property
|
||||
def numeric_array_psers(self):
|
||||
return [
|
||||
pd.Series([[1, 2, 3]]),
|
||||
pd.Series([[0.1, 0.2, 0.3]]),
|
||||
pd.Series([[decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3)]]),
|
||||
]
|
||||
|
||||
@property
|
||||
def non_numeric_array_psers(self):
|
||||
return {
|
||||
"string": pd.Series([["x", "y", "z"]]),
|
||||
"date": pd.Series(
|
||||
[[datetime.date(1994, 1, 1), datetime.date(1994, 1, 2), datetime.date(1994, 1, 3)]]
|
||||
),
|
||||
"bool": pd.Series([[True, True, False]]),
|
||||
}
|
||||
|
||||
@property
|
||||
def numeric_array_pssers(self):
|
||||
return [ps.from_pandas(pser) for pser in self.numeric_array_psers]
|
||||
|
||||
@property
|
||||
def non_numeric_array_pssers(self):
|
||||
pssers = {}
|
||||
|
||||
for k, v in self.non_numeric_array_psers.items():
|
||||
pssers[k] = ps.from_pandas(v)
|
||||
return pssers
|
||||
|
||||
@property
|
||||
def psers(self):
|
||||
return self.numeric_array_psers + list(self.non_numeric_array_psers.values())
|
||||
|
||||
@property
|
||||
def pssers(self):
|
||||
return self.numeric_array_pssers + list(self.non_numeric_array_pssers.values())
|
||||
|
||||
@property
|
||||
def pser(self):
|
||||
return pd.Series([[1, 2, 3]])
|
||||
|
@ -74,116 +35,161 @@ class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
return ps.from_pandas(self.pser)
|
||||
|
||||
@property
|
||||
def other_pser(self):
|
||||
return pd.Series([[2, 3, 4]])
|
||||
def numeric_array_pdf(self):
|
||||
psers = {
|
||||
"int": pd.Series([[1, 2, 3]]),
|
||||
"float": pd.Series([[0.1, 0.2, 0.3]]),
|
||||
"decimal": pd.Series([[decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3)]]),
|
||||
}
|
||||
return pd.concat(psers, axis=1)
|
||||
|
||||
@property
|
||||
def other_psser(self):
|
||||
return ps.from_pandas(self.other_pser)
|
||||
def numeric_array_psdf(self):
|
||||
return ps.from_pandas(self.numeric_array_pdf)
|
||||
|
||||
@property
|
||||
def struct_pser(self):
|
||||
return pd.Series([("x", 1)])
|
||||
def numeric_array_df_cols(self):
|
||||
return self.numeric_array_pdf.columns
|
||||
|
||||
@property
|
||||
def struct_psser(self):
|
||||
return ps.Index([("x", 1)]).to_series().reset_index(drop=True)
|
||||
def non_numeric_array_pdf(self):
|
||||
psers = {
|
||||
"string": pd.Series([["x", "y", "z"]]),
|
||||
"date": pd.Series(
|
||||
[[datetime.date(1994, 1, 1), datetime.date(1994, 1, 2), datetime.date(1994, 1, 3)]]
|
||||
),
|
||||
"bool": pd.Series([[True, True, False]]),
|
||||
}
|
||||
return pd.concat(psers, axis=1)
|
||||
|
||||
@property
|
||||
def non_numeric_array_psdf(self):
|
||||
return ps.from_pandas(self.non_numeric_array_pdf)
|
||||
|
||||
@property
|
||||
def non_numeric_array_df_cols(self):
|
||||
return self.non_numeric_array_pdf.columns
|
||||
|
||||
@property
|
||||
def array_pdf(self):
|
||||
return pd.concat([self.numeric_array_pdf, self.non_numeric_array_pdf], axis=1)
|
||||
|
||||
@property
|
||||
def array_psdf(self):
|
||||
return ps.from_pandas(self.array_pdf)
|
||||
|
||||
@property
|
||||
def array_df_cols(self):
|
||||
return self.array_pdf.columns
|
||||
|
||||
@property
|
||||
def complex_pdf(self):
|
||||
psers = {
|
||||
"this_array": self.pser,
|
||||
"that_array": pd.Series([[2, 3, 4]]),
|
||||
"this_struct": pd.Series([("x", 1)]),
|
||||
"that_struct": pd.Series([("a", 2)]),
|
||||
}
|
||||
return pd.concat(psers, axis=1)
|
||||
|
||||
@property
|
||||
def complex_psdf(self):
|
||||
pssers = {
|
||||
"this_array": self.psser,
|
||||
"that_array": ps.Series([[2, 3, 4]]),
|
||||
"this_struct": ps.Index([("x", 1)]).to_series().reset_index(drop=True),
|
||||
"that_struct": ps.Index([("a", 2)]).to_series().reset_index(drop=True),
|
||||
}
|
||||
return ps.concat(pssers, axis=1)
|
||||
|
||||
def test_add(self):
|
||||
for pser, psser in zip(self.psers, self.pssers):
|
||||
pdf, psdf = self.array_pdf, self.array_psdf
|
||||
for col in self.array_df_cols:
|
||||
self.assert_eq(pdf[col] + pdf[col], psdf[col] + psdf[col])
|
||||
|
||||
# Numeric array + Numeric array
|
||||
for col in self.numeric_array_df_cols:
|
||||
pser1, psser1 = pdf[col], psdf[col]
|
||||
for other_col in self.numeric_array_df_cols:
|
||||
pser2, psser2 = pdf[other_col], psdf[other_col]
|
||||
self.assert_eq((pser1 + pser2).sort_values(), (psser1 + psser2).sort_values())
|
||||
|
||||
# Non-numeric array + Non-numeric array
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
lambda: psdf["string"] + psdf["bool"],
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
lambda: psdf["string"] + psdf["date"],
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
lambda: psdf["bool"] + psdf["date"],
|
||||
)
|
||||
|
||||
for col in self.non_numeric_array_df_cols:
|
||||
pser, psser = pdf[col], psdf[col]
|
||||
self.assert_eq(pser + pser, psser + psser)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
# Numeric array + Numeric array
|
||||
for pser1, psser1 in zip(self.numeric_array_psers, self.numeric_array_pssers):
|
||||
for pser2, psser2 in zip(self.numeric_array_psers, self.numeric_array_pssers):
|
||||
self.assert_eq((pser1 + pser2).sort_values(), (psser1 + psser2).sort_values())
|
||||
|
||||
# Non-numeric array + Non-numeric array
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
lambda: self.non_numeric_array_pssers["string"]
|
||||
+ self.non_numeric_array_pssers["bool"],
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
lambda: self.non_numeric_array_pssers["string"]
|
||||
+ self.non_numeric_array_pssers["date"],
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
lambda: self.non_numeric_array_pssers["bool"]
|
||||
+ self.non_numeric_array_pssers["date"],
|
||||
)
|
||||
|
||||
for data_type in self.non_numeric_array_psers.keys():
|
||||
self.assert_eq(
|
||||
self.non_numeric_array_psers.get(data_type)
|
||||
+ self.non_numeric_array_psers.get(data_type),
|
||||
(
|
||||
self.non_numeric_array_pssers.get(data_type)
|
||||
+ self.non_numeric_array_pssers.get(data_type)
|
||||
).sort_index(),
|
||||
)
|
||||
|
||||
# Numeric array + Non-numeric array
|
||||
for numeric_ppser in self.numeric_array_pssers:
|
||||
for non_numeric_ppser in self.non_numeric_array_pssers.values():
|
||||
self.assertRaises(TypeError, lambda: numeric_ppser + non_numeric_ppser)
|
||||
# Numeric array + Non-numeric array
|
||||
for numeric_col in self.numeric_array_df_cols:
|
||||
for non_numeric_col in self.non_numeric_array_df_cols:
|
||||
self.assertRaises(TypeError, lambda: psdf[numeric_col] + psdf[non_numeric_col])
|
||||
|
||||
def test_sub(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser - "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser - 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser1 in self.pssers:
|
||||
for psser2 in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: psser1 - psser2)
|
||||
psdf = self.array_psdf
|
||||
for col in self.array_df_cols:
|
||||
for other_col in self.array_df_cols:
|
||||
self.assertRaises(TypeError, lambda: psdf[col] - psdf[other_col])
|
||||
|
||||
def test_mul(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser * "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser * 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser1 in self.pssers:
|
||||
for psser2 in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: psser1 * psser2)
|
||||
psdf = self.array_psdf
|
||||
for col in self.array_df_cols:
|
||||
for other_col in self.array_df_cols:
|
||||
self.assertRaises(TypeError, lambda: psdf[col] * psdf[other_col])
|
||||
|
||||
def test_truediv(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser / "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser / 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser1 in self.pssers:
|
||||
for psser2 in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: psser1 / psser2)
|
||||
psdf = self.array_psdf
|
||||
for col in self.array_df_cols:
|
||||
for other_col in self.array_df_cols:
|
||||
self.assertRaises(TypeError, lambda: psdf[col] / psdf[other_col])
|
||||
|
||||
def test_floordiv(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser // "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser // 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser1 in self.pssers:
|
||||
for psser2 in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: psser1 // psser2)
|
||||
psdf = self.array_psdf
|
||||
for col in self.array_df_cols:
|
||||
for other_col in self.array_df_cols:
|
||||
self.assertRaises(TypeError, lambda: psdf[col] // psdf[other_col])
|
||||
|
||||
def test_mod(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser % "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser % 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser1 in self.pssers:
|
||||
for psser2 in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: psser1 % psser2)
|
||||
psdf = self.array_psdf
|
||||
for col in self.array_df_cols:
|
||||
for other_col in self.array_df_cols:
|
||||
self.assertRaises(TypeError, lambda: psdf[col] % psdf[other_col])
|
||||
|
||||
def test_pow(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser ** "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser ** 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser1 in self.pssers:
|
||||
for psser2 in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: psser1 ** psser2)
|
||||
psdf = self.array_psdf
|
||||
for col in self.array_df_cols:
|
||||
for other_col in self.array_df_cols:
|
||||
self.assertRaises(TypeError, lambda: psdf[col] ** psdf[other_col])
|
||||
|
||||
def test_radd(self):
|
||||
self.assertRaises(TypeError, lambda: "x" + self.psser)
|
||||
|
@ -231,12 +237,16 @@ class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
self.assertRaises(TypeError, lambda: False | self.psser)
|
||||
|
||||
def test_from_to_pandas(self):
|
||||
for pser, psser in zip(self.psers, self.pssers):
|
||||
pdf, psdf = self.array_pdf, self.array_psdf
|
||||
for col in self.array_df_cols:
|
||||
pser, psser = pdf[col], psdf[col]
|
||||
self.assert_eq(pser, psser.to_pandas())
|
||||
self.assert_eq(ps.from_pandas(pser), psser)
|
||||
|
||||
def test_isnull(self):
|
||||
for pser, psser in zip(self.psers, self.pssers):
|
||||
pdf, psdf = self.array_pdf, self.array_psdf
|
||||
for col in self.array_df_cols:
|
||||
pser, psser = pdf[col], psdf[col]
|
||||
self.assert_eq(pser.isnull(), psser.isnull())
|
||||
|
||||
def test_astype(self):
|
||||
|
@ -252,70 +262,94 @@ class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
self.assertRaises(TypeError, lambda: ~self.psser)
|
||||
|
||||
def test_eq(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
|
||||
self.assert_eq(
|
||||
self.struct_pser == self.struct_pser,
|
||||
(self.struct_psser == self.struct_psser).sort_index(),
|
||||
)
|
||||
pdf, psdf = self.complex_pdf, self.complex_pdf
|
||||
self.assert_eq(
|
||||
pdf["this_array"] == pdf["that_array"], psdf["this_array"] == psdf["that_array"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_struct"] == pdf["that_struct"], psdf["this_struct"] == psdf["that_struct"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_array"] == pdf["this_array"], psdf["this_array"] == psdf["this_array"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_struct"] == pdf["this_struct"], psdf["this_struct"] == psdf["this_struct"]
|
||||
)
|
||||
|
||||
def test_ne(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
|
||||
self.assert_eq(
|
||||
self.struct_pser != self.struct_pser,
|
||||
(self.struct_psser != self.struct_psser).sort_index(),
|
||||
)
|
||||
pdf, psdf = self.complex_pdf, self.complex_pdf
|
||||
self.assert_eq(
|
||||
pdf["this_array"] != pdf["that_array"], psdf["this_array"] != psdf["that_array"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_struct"] != pdf["that_struct"], psdf["this_struct"] != psdf["that_struct"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_array"] != pdf["this_array"], psdf["this_array"] != psdf["this_array"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_struct"] != pdf["this_struct"], psdf["this_struct"] != psdf["this_struct"]
|
||||
)
|
||||
|
||||
def test_lt(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser < self.other_pser, (self.psser < self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
|
||||
self.assert_eq(
|
||||
self.struct_pser < self.struct_pser,
|
||||
(self.struct_psser < self.struct_psser).sort_index(),
|
||||
)
|
||||
pdf, psdf = self.complex_pdf, self.complex_pdf
|
||||
self.assert_eq(
|
||||
pdf["this_array"] < pdf["that_array"], psdf["this_array"] < psdf["that_array"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_struct"] < pdf["that_struct"], psdf["this_struct"] < psdf["that_struct"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_array"] < pdf["this_array"], psdf["this_array"] < psdf["this_array"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_struct"] < pdf["this_struct"], psdf["this_struct"] < psdf["this_struct"]
|
||||
)
|
||||
|
||||
def test_le(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser <= self.other_pser, (self.psser <= self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
|
||||
self.assert_eq(
|
||||
self.struct_pser <= self.struct_pser,
|
||||
(self.struct_psser <= self.struct_psser).sort_index(),
|
||||
)
|
||||
pdf, psdf = self.complex_pdf, self.complex_pdf
|
||||
self.assert_eq(
|
||||
pdf["this_array"] <= pdf["that_array"], psdf["this_array"] <= psdf["that_array"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_struct"] <= pdf["that_struct"], psdf["this_struct"] <= psdf["that_struct"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_array"] <= pdf["this_array"], psdf["this_array"] <= psdf["this_array"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_struct"] <= pdf["this_struct"], psdf["this_struct"] <= psdf["this_struct"]
|
||||
)
|
||||
|
||||
def test_gt(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser > self.other_pser, (self.psser > self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
|
||||
self.assert_eq(
|
||||
self.struct_pser > self.struct_pser,
|
||||
(self.struct_psser > self.struct_psser).sort_index(),
|
||||
)
|
||||
pdf, psdf = self.complex_pdf, self.complex_pdf
|
||||
self.assert_eq(
|
||||
pdf["this_array"] > pdf["that_array"], psdf["this_array"] > psdf["that_array"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_struct"] > pdf["that_struct"], psdf["this_struct"] > psdf["that_struct"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_array"] > pdf["this_array"], psdf["this_array"] > psdf["this_array"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_struct"] > pdf["this_struct"], psdf["this_struct"] > psdf["this_struct"]
|
||||
)
|
||||
|
||||
def test_ge(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser >= self.other_pser, (self.psser >= self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
|
||||
self.assert_eq(
|
||||
self.struct_pser >= self.struct_pser,
|
||||
(self.struct_psser >= self.struct_psser).sort_index(),
|
||||
)
|
||||
pdf, psdf = self.complex_pdf, self.complex_pdf
|
||||
self.assert_eq(
|
||||
pdf["this_array"] >= pdf["that_array"], psdf["this_array"] >= psdf["that_array"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_struct"] >= pdf["that_struct"], psdf["this_struct"] >= psdf["that_struct"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_array"] >= pdf["this_array"], psdf["this_array"] >= psdf["this_array"]
|
||||
)
|
||||
self.assert_eq(
|
||||
pdf["this_struct"] >= pdf["this_struct"], psdf["this_struct"] >= psdf["this_struct"]
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -20,10 +20,7 @@ import datetime
|
|||
import pandas as pd
|
||||
from pandas.api.types import CategoricalDtype
|
||||
|
||||
from pyspark.sql.types import DateType
|
||||
|
||||
from pyspark import pandas as ps
|
||||
from pyspark.pandas.config import option_context
|
||||
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
|
||||
from pyspark.testing.pandasutils import PandasOnSparkTestCase
|
||||
|
||||
|
@ -40,14 +37,18 @@ class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
return ps.from_pandas(self.pser)
|
||||
|
||||
@property
|
||||
def other_pser(self):
|
||||
return pd.Series(
|
||||
[datetime.date(2000, 1, 31), datetime.date(1994, 3, 1), datetime.date(1990, 2, 2)]
|
||||
)
|
||||
def date_pdf(self):
|
||||
psers = {
|
||||
"this": self.pser,
|
||||
"that": pd.Series(
|
||||
[datetime.date(2000, 1, 31), datetime.date(1994, 3, 1), datetime.date(1990, 2, 2)]
|
||||
),
|
||||
}
|
||||
return pd.concat(psers, axis=1)
|
||||
|
||||
@property
|
||||
def other_psser(self):
|
||||
return ps.from_pandas(self.other_pser)
|
||||
def date_psdf(self):
|
||||
return ps.from_pandas(self.date_pdf)
|
||||
|
||||
@property
|
||||
def some_date(self):
|
||||
|
@ -58,9 +59,8 @@ class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
self.assertRaises(TypeError, lambda: self.psser + 1)
|
||||
self.assertRaises(TypeError, lambda: self.psser + self.some_date)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser + psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser + psser)
|
||||
|
||||
def test_sub(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser - "x")
|
||||
|
@ -69,57 +69,54 @@ class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
(self.pser - self.some_date).dt.days,
|
||||
self.psser - self.some_date,
|
||||
)
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for pser, psser in self.pser_psser_pairs:
|
||||
if isinstance(psser.spark.data_type, DateType):
|
||||
self.assert_eq((self.pser - pser).dt.days, (self.psser - psser).sort_index())
|
||||
else:
|
||||
self.assertRaises(TypeError, lambda: self.psser - psser)
|
||||
pdf, psdf = self.pdf, self.psdf
|
||||
for col in self.df_cols:
|
||||
if col == "date":
|
||||
self.assert_eq((pdf["date"] - pdf[col]).dt.days, psdf["date"] - psdf[col])
|
||||
else:
|
||||
self.assertRaises(TypeError, lambda: psdf["date"] - psdf[col])
|
||||
pdf, psdf = self.date_pdf, self.date_psdf
|
||||
self.assert_eq((pdf["this"] - pdf["that"]).dt.days, psdf["this"] - psdf["that"])
|
||||
|
||||
def test_mul(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser * "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser * 1)
|
||||
self.assertRaises(TypeError, lambda: self.psser * self.some_date)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser * psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser * psser)
|
||||
|
||||
def test_truediv(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser / "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser / 1)
|
||||
self.assertRaises(TypeError, lambda: self.psser / self.some_date)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser / psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser / psser)
|
||||
|
||||
def test_floordiv(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser // "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser // 1)
|
||||
self.assertRaises(TypeError, lambda: self.psser // self.some_date)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser // psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser // psser)
|
||||
|
||||
def test_mod(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser % "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser % 1)
|
||||
self.assertRaises(TypeError, lambda: self.psser % self.some_date)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser % psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser % psser)
|
||||
|
||||
def test_pow(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser ** "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser ** 1)
|
||||
self.assertRaises(TypeError, lambda: self.psser ** self.some_date)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser ** psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser ** psser)
|
||||
|
||||
def test_radd(self):
|
||||
self.assertRaises(TypeError, lambda: "x" + self.psser)
|
||||
|
@ -204,46 +201,34 @@ class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
self.assertRaises(TypeError, lambda: ~self.psser)
|
||||
|
||||
def test_eq(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
|
||||
pdf, psdf = self.date_pdf, self.date_psdf
|
||||
self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == psdf["that"])
|
||||
self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == psdf["this"])
|
||||
|
||||
def test_ne(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
|
||||
pdf, psdf = self.date_pdf, self.date_psdf
|
||||
self.assert_eq(pdf["this"] != pdf["that"], psdf["this"] != psdf["that"])
|
||||
self.assert_eq(pdf["this"] != pdf["this"], psdf["this"] != psdf["this"])
|
||||
|
||||
def test_lt(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser < self.other_pser, (self.psser < self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
|
||||
pdf, psdf = self.date_pdf, self.date_psdf
|
||||
self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == psdf["that"])
|
||||
self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == psdf["this"])
|
||||
|
||||
def test_le(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser <= self.other_pser, (self.psser <= self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
|
||||
pdf, psdf = self.date_pdf, self.date_psdf
|
||||
self.assert_eq(pdf["this"] <= pdf["that"], psdf["this"] <= psdf["that"])
|
||||
self.assert_eq(pdf["this"] <= pdf["this"], psdf["this"] <= psdf["this"])
|
||||
|
||||
def test_gt(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser > self.other_pser, (self.psser > self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
|
||||
pdf, psdf = self.date_pdf, self.date_psdf
|
||||
self.assert_eq(pdf["this"] > pdf["that"], psdf["this"] > psdf["that"])
|
||||
self.assert_eq(pdf["this"] > pdf["this"], psdf["this"] > psdf["this"])
|
||||
|
||||
def test_ge(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser >= self.other_pser, (self.psser >= self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
|
||||
pdf, psdf = self.date_pdf, self.date_psdf
|
||||
self.assert_eq(pdf["this"] >= pdf["that"], psdf["this"] >= psdf["that"])
|
||||
self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= psdf["this"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -17,12 +17,10 @@
|
|||
|
||||
import datetime
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.api.types import CategoricalDtype
|
||||
|
||||
from pyspark import pandas as ps
|
||||
from pyspark.pandas.config import option_context
|
||||
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
|
||||
from pyspark.testing.pandasutils import PandasOnSparkTestCase
|
||||
|
||||
|
@ -30,19 +28,23 @@ from pyspark.testing.pandasutils import PandasOnSparkTestCase
|
|||
class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
||||
@property
|
||||
def pser(self):
|
||||
return pd.Series(pd.date_range("1994-1-31 10:30:15", periods=3, freq="M"))
|
||||
return pd.Series(pd.date_range("1994-1-31 10:30:15", periods=3, freq="D"))
|
||||
|
||||
@property
|
||||
def psser(self):
|
||||
return ps.from_pandas(self.pser)
|
||||
|
||||
@property
|
||||
def other_pser(self):
|
||||
return pd.Series(pd.date_range("1994-4-30 10:30:15", periods=3, freq="M"))
|
||||
def datetime_pdf(self):
|
||||
psers = {
|
||||
"this": self.pser,
|
||||
"that": pd.Series(pd.date_range("1994-2-1 10:30:15", periods=3, freq="D")),
|
||||
}
|
||||
return pd.concat(psers, axis=1)
|
||||
|
||||
@property
|
||||
def other_psser(self):
|
||||
return ps.from_pandas(self.other_pser)
|
||||
def datetime_psdf(self):
|
||||
return ps.from_pandas(self.datetime_pdf)
|
||||
|
||||
@property
|
||||
def some_datetime(self):
|
||||
|
@ -53,9 +55,8 @@ class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
self.assertRaises(TypeError, lambda: self.psser + 1)
|
||||
self.assertRaises(TypeError, lambda: self.psser + self.some_datetime)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser + psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser + psser)
|
||||
|
||||
def test_sub(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser - "x")
|
||||
|
@ -64,60 +65,62 @@ class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
(self.pser - self.some_datetime).dt.total_seconds().astype("int"),
|
||||
self.psser - self.some_datetime,
|
||||
)
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for pser, psser in self.pser_psser_pairs:
|
||||
if pser.dtype == np.dtype("<M8[ns]"):
|
||||
self.assert_eq(
|
||||
(self.pser - pser).dt.total_seconds().astype("int"),
|
||||
(self.psser - psser).sort_index(),
|
||||
)
|
||||
else:
|
||||
self.assertRaises(TypeError, lambda: self.psser - psser)
|
||||
|
||||
pdf, psdf = self.pdf, self.psdf
|
||||
for col in self.df_cols:
|
||||
if col == "datetime":
|
||||
self.assert_eq(
|
||||
(pdf["datetime"] - pdf[col]).dt.total_seconds().astype("int"),
|
||||
psdf["datetime"] - psdf[col],
|
||||
)
|
||||
else:
|
||||
self.assertRaises(TypeError, lambda: psdf["datetime"] - psdf[col])
|
||||
|
||||
pdf, psdf = self.datetime_pdf, self.datetime_psdf
|
||||
self.assert_eq(
|
||||
(pdf["that"] - pdf["this"]).dt.total_seconds().astype("int"),
|
||||
psdf["that"] - psdf["this"],
|
||||
)
|
||||
|
||||
def test_mul(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser * "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser * 1)
|
||||
self.assertRaises(TypeError, lambda: self.psser * self.some_datetime)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser * psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser * psser)
|
||||
|
||||
def test_truediv(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser / "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser / 1)
|
||||
self.assertRaises(TypeError, lambda: self.psser / self.some_datetime)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser / psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser / psser)
|
||||
|
||||
def test_floordiv(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser // "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser // 1)
|
||||
self.assertRaises(TypeError, lambda: self.psser // self.some_datetime)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser // psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser // psser)
|
||||
|
||||
def test_mod(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser % "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser % 1)
|
||||
self.assertRaises(TypeError, lambda: self.psser % self.some_datetime)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser % psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser % psser)
|
||||
|
||||
def test_pow(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser ** "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser ** 1)
|
||||
self.assertRaises(TypeError, lambda: self.psser ** self.some_datetime)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser ** psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser ** psser)
|
||||
|
||||
def test_radd(self):
|
||||
self.assertRaises(TypeError, lambda: "x" + self.psser)
|
||||
|
@ -202,46 +205,34 @@ class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
self.assertRaises(TypeError, lambda: ~self.psser)
|
||||
|
||||
def test_eq(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
|
||||
pdf, psdf = self.datetime_pdf, self.datetime_psdf
|
||||
self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == psdf["that"])
|
||||
self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == psdf["this"])
|
||||
|
||||
def test_ne(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
|
||||
pdf, psdf = self.datetime_pdf, self.datetime_psdf
|
||||
self.assert_eq(pdf["this"] != pdf["that"], psdf["this"] != psdf["that"])
|
||||
self.assert_eq(pdf["this"] != pdf["this"], psdf["this"] != psdf["this"])
|
||||
|
||||
def test_lt(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser < self.other_pser, (self.psser < self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
|
||||
pdf, psdf = self.datetime_pdf, self.datetime_psdf
|
||||
self.assert_eq(pdf["this"] < pdf["that"], psdf["this"] < psdf["that"])
|
||||
self.assert_eq(pdf["this"] < pdf["this"], psdf["this"] < psdf["this"])
|
||||
|
||||
def test_le(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser <= self.other_pser, (self.psser <= self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
|
||||
pdf, psdf = self.datetime_pdf, self.datetime_psdf
|
||||
self.assert_eq(pdf["this"] <= pdf["that"], psdf["this"] <= psdf["that"])
|
||||
self.assert_eq(pdf["this"] <= pdf["this"], psdf["this"] <= psdf["this"])
|
||||
|
||||
def test_gt(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser > self.other_pser, (self.psser > self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
|
||||
pdf, psdf = self.datetime_pdf, self.datetime_psdf
|
||||
self.assert_eq(pdf["this"] > pdf["that"], psdf["this"] > psdf["that"])
|
||||
self.assert_eq(pdf["this"] > pdf["this"], psdf["this"] > psdf["this"])
|
||||
|
||||
def test_ge(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(
|
||||
self.pser >= self.other_pser, (self.psser >= self.other_psser).sort_index()
|
||||
)
|
||||
self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
|
||||
pdf, psdf = self.datetime_pdf, self.datetime_psdf
|
||||
self.assert_eq(pdf["this"] >= pdf["that"], psdf["this"] >= psdf["that"])
|
||||
self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= psdf["this"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -19,7 +19,6 @@ import pandas as pd
|
|||
from pandas.api.types import CategoricalDtype
|
||||
|
||||
import pyspark.pandas as ps
|
||||
from pyspark.pandas.config import option_context
|
||||
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
|
||||
from pyspark.testing.pandasutils import PandasOnSparkTestCase
|
||||
|
||||
|
@ -37,57 +36,50 @@ class NullOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
self.assertRaises(TypeError, lambda: self.psser + "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser + 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser + psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser + psser)
|
||||
|
||||
def test_sub(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser - "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser - 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser - psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser - psser)
|
||||
|
||||
def test_mul(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser * "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser * 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser * psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser * psser)
|
||||
|
||||
def test_truediv(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser / "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser / 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser / psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser / psser)
|
||||
|
||||
def test_floordiv(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser // "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser // 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser // psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser // psser)
|
||||
|
||||
def test_mod(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser % "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser % 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser % psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser % psser)
|
||||
|
||||
def test_pow(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser ** "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser ** 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser ** psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser ** psser)
|
||||
|
||||
def test_radd(self):
|
||||
self.assertRaises(TypeError, lambda: "x" + self.psser)
|
||||
|
@ -145,28 +137,28 @@ class NullOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
self.assertRaises(TypeError, lambda: ~self.psser)
|
||||
|
||||
def test_eq(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
|
||||
pser, psser = self.pser, self.psser
|
||||
self.assert_eq(pser == pser, psser == psser)
|
||||
|
||||
def test_ne(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
|
||||
pser, psser = self.pser, self.psser
|
||||
self.assert_eq(pser != pser, psser != psser)
|
||||
|
||||
def test_lt(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
|
||||
pser, psser = self.pser, self.psser
|
||||
self.assert_eq(pser < pser, psser < psser)
|
||||
|
||||
def test_le(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
|
||||
pser, psser = self.pser, self.psser
|
||||
self.assert_eq(pser <= pser, psser <= psser)
|
||||
|
||||
def test_gt(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
|
||||
pser, psser = self.pser, self.psser
|
||||
self.assert_eq(pser > pser, psser > psser)
|
||||
|
||||
def test_ge(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
|
||||
pser, psser = self.pser, self.psser
|
||||
self.assert_eq(pser >= pser, psser >= psser)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -19,7 +19,6 @@ import pandas as pd
|
|||
|
||||
import pyspark.pandas as ps
|
||||
from pyspark.ml.linalg import SparseVector
|
||||
from pyspark.pandas.config import option_context
|
||||
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
|
||||
from pyspark.testing.pandasutils import PandasOnSparkTestCase
|
||||
|
||||
|
@ -34,61 +33,67 @@ class UDTOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
def psser(self):
|
||||
return ps.from_pandas(self.pser)
|
||||
|
||||
@property
|
||||
def udt_pdf(self):
|
||||
sparse_values = {0: 0.2, 1: 1.0}
|
||||
psers = {
|
||||
"this": self.pser,
|
||||
"that": pd.Series([SparseVector(len(sparse_values), sparse_values)]),
|
||||
}
|
||||
return pd.concat(psers, axis=1)
|
||||
|
||||
@property
|
||||
def udt_psdf(self):
|
||||
return ps.from_pandas(self.udt_pdf)
|
||||
|
||||
def test_add(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser + "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser + 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser + psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser + psser)
|
||||
|
||||
def test_sub(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser - "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser - 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser - psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser - psser)
|
||||
|
||||
def test_mul(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser * "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser * 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser * psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser * psser)
|
||||
|
||||
def test_truediv(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser / "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser / 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser / psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser / psser)
|
||||
|
||||
def test_floordiv(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser // "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser // 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser // psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser // psser)
|
||||
|
||||
def test_mod(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser % "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser % 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser % psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser % psser)
|
||||
|
||||
def test_pow(self):
|
||||
self.assertRaises(TypeError, lambda: self.psser ** "x")
|
||||
self.assertRaises(TypeError, lambda: self.psser ** 1)
|
||||
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser ** psser)
|
||||
for psser in self.pssers:
|
||||
self.assertRaises(TypeError, lambda: self.psser ** psser)
|
||||
|
||||
def test_radd(self):
|
||||
self.assertRaises(TypeError, lambda: "x" + self.psser)
|
||||
|
@ -141,12 +146,14 @@ class UDTOpsTest(PandasOnSparkTestCase, TestCasesUtils):
|
|||
self.assertRaises(TypeError, lambda: ~self.psser)
|
||||
|
||||
def test_eq(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
|
||||
pdf, psdf = self.udt_pdf, self.udt_psdf
|
||||
self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == psdf["this"])
|
||||
self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == psdf["that"])
|
||||
|
||||
def test_ne(self):
|
||||
with option_context("compute.ops_on_diff_frames", True):
|
||||
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
|
||||
pdf, psdf = self.udt_pdf, self.udt_psdf
|
||||
self.assert_eq(pdf["this"] != pdf["this"], psdf["this"] != psdf["this"])
|
||||
self.assert_eq(pdf["this"] != pdf["that"], psdf["this"] != psdf["that"])
|
||||
|
||||
def test_lt(self):
|
||||
self.assertRaisesRegex(
|
||||
|
|
Loading…
Reference in a new issue