[SPARK-36190][PYTHON] Improve the rest of DataTypeOps tests by avoiding joins

### What changes were proposed in this pull request?
Improve the rest of DataTypeOps tests by avoiding joins.

### Why are the changes needed?
bool, string, numeric DataTypeOps tests have been improved by avoiding joins.
We should improve the rest of the DataTypeOps tests in the same way.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Unit tests.

Closes #33546 from xinrong-databricks/test_no_join.

Authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
This commit is contained in:
Xinrong Meng 2021-07-28 15:53:38 -07:00 committed by Takuya UESHIN
parent eb4d1c0332
commit 9c5cb99d6e
6 changed files with 408 additions and 411 deletions

View file

@ -19,7 +19,6 @@ import pandas as pd
from pandas.api.types import CategoricalDtype
from pyspark import pandas as ps
from pyspark.pandas.config import option_context
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
from pyspark.testing.pandasutils import PandasOnSparkTestCase
@ -34,74 +33,75 @@ class BinaryOpsTest(PandasOnSparkTestCase, TestCasesUtils):
return ps.from_pandas(self.pser)
@property
def other_pser(self):
return pd.Series([b"2", b"3", b"4"])
def byte_pdf(self):
psers = {
"this": self.pser,
"that": pd.Series([b"2", b"3", b"4"]),
}
return pd.concat(psers, axis=1)
@property
def other_psser(self):
return ps.from_pandas(self.other_pser)
def byte_psdf(self):
return ps.from_pandas(self.byte_pdf)
def test_add(self):
psser = self.psser
pser = self.pser
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
pser, psser = byte_pdf["this"], byte_psdf["this"]
other_pser, other_psser = byte_pdf["that"], byte_psdf["that"]
self.assert_eq(psser + b"1", pser + b"1")
self.assert_eq(psser + psser, pser + pser)
self.assert_eq(psser + psser.astype("bytes"), pser + pser.astype("bytes"))
self.assertRaises(TypeError, lambda: psser + "x")
self.assertRaises(TypeError, lambda: psser + 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser + psser)
self.assert_eq(self.pser + self.pser, (self.psser + self.psser).sort_index())
self.assert_eq(pser + pser, psser + psser)
self.assert_eq(pser + other_pser, psser + other_psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser + psser)
def test_sub(self):
self.assertRaises(TypeError, lambda: self.psser - "x")
self.assertRaises(TypeError, lambda: self.psser - 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser - psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser - psser)
def test_mul(self):
self.assertRaises(TypeError, lambda: self.psser * "x")
self.assertRaises(TypeError, lambda: self.psser * 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser * psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser * psser)
def test_truediv(self):
self.assertRaises(TypeError, lambda: self.psser / "x")
self.assertRaises(TypeError, lambda: self.psser / 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser / psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser / psser)
def test_floordiv(self):
self.assertRaises(TypeError, lambda: self.psser // "x")
self.assertRaises(TypeError, lambda: self.psser // 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser // psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser // psser)
def test_mod(self):
self.assertRaises(TypeError, lambda: self.psser % "x")
self.assertRaises(TypeError, lambda: self.psser % 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser % psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser % psser)
def test_pow(self):
self.assertRaises(TypeError, lambda: self.psser ** "x")
self.assertRaises(TypeError, lambda: self.psser ** 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser ** psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser ** psser)
def test_radd(self):
self.assert_eq(b"1" + self.psser, b"1" + self.pser)
@ -177,46 +177,34 @@ class BinaryOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: ~self.psser)
def test_eq(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
)
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
self.assert_eq(byte_pdf["this"] == byte_pdf["that"], byte_psdf["this"] == byte_psdf["that"])
self.assert_eq(byte_pdf["this"] == byte_pdf["this"], byte_psdf["this"] == byte_psdf["this"])
def test_ne(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
)
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
self.assert_eq(byte_pdf["this"] != byte_pdf["that"], byte_psdf["this"] != byte_psdf["that"])
self.assert_eq(byte_pdf["this"] != byte_pdf["this"], byte_psdf["this"] != byte_psdf["this"])
def test_lt(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser < self.other_pser, (self.psser < self.other_psser).sort_index()
)
self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
self.assert_eq(byte_pdf["this"] < byte_pdf["that"], byte_psdf["this"] < byte_psdf["that"])
self.assert_eq(byte_pdf["this"] < byte_pdf["this"], byte_psdf["this"] < byte_psdf["this"])
def test_le(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser <= self.other_pser, (self.psser <= self.other_psser).sort_index()
)
self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
self.assert_eq(byte_pdf["this"] <= byte_pdf["that"], byte_psdf["this"] <= byte_psdf["that"])
self.assert_eq(byte_pdf["this"] <= byte_pdf["this"], byte_psdf["this"] <= byte_psdf["this"])
def test_gt(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser > self.other_pser, (self.psser > self.other_psser).sort_index()
)
self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
self.assert_eq(byte_pdf["this"] > byte_pdf["that"], byte_psdf["this"] > byte_psdf["that"])
self.assert_eq(byte_pdf["this"] > byte_pdf["this"], byte_psdf["this"] > byte_psdf["this"])
def test_ge(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser >= self.other_pser, (self.psser >= self.other_psser).sort_index()
)
self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
self.assert_eq(byte_pdf["this"] >= byte_pdf["that"], byte_psdf["this"] >= byte_psdf["that"])
self.assert_eq(byte_pdf["this"] >= byte_pdf["this"], byte_psdf["this"] >= byte_psdf["this"])
if __name__ == "__main__":

View file

@ -21,50 +21,11 @@ import datetime
import pandas as pd
from pyspark import pandas as ps
from pyspark.pandas.config import option_context
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
from pyspark.testing.pandasutils import PandasOnSparkTestCase
class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
@property
def numeric_array_psers(self):
return [
pd.Series([[1, 2, 3]]),
pd.Series([[0.1, 0.2, 0.3]]),
pd.Series([[decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3)]]),
]
@property
def non_numeric_array_psers(self):
return {
"string": pd.Series([["x", "y", "z"]]),
"date": pd.Series(
[[datetime.date(1994, 1, 1), datetime.date(1994, 1, 2), datetime.date(1994, 1, 3)]]
),
"bool": pd.Series([[True, True, False]]),
}
@property
def numeric_array_pssers(self):
return [ps.from_pandas(pser) for pser in self.numeric_array_psers]
@property
def non_numeric_array_pssers(self):
pssers = {}
for k, v in self.non_numeric_array_psers.items():
pssers[k] = ps.from_pandas(v)
return pssers
@property
def psers(self):
return self.numeric_array_psers + list(self.non_numeric_array_psers.values())
@property
def pssers(self):
return self.numeric_array_pssers + list(self.non_numeric_array_pssers.values())
@property
def pser(self):
return pd.Series([[1, 2, 3]])
@ -74,116 +35,161 @@ class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
return ps.from_pandas(self.pser)
@property
def other_pser(self):
return pd.Series([[2, 3, 4]])
def numeric_array_pdf(self):
psers = {
"int": pd.Series([[1, 2, 3]]),
"float": pd.Series([[0.1, 0.2, 0.3]]),
"decimal": pd.Series([[decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3)]]),
}
return pd.concat(psers, axis=1)
@property
def other_psser(self):
return ps.from_pandas(self.other_pser)
def numeric_array_psdf(self):
return ps.from_pandas(self.numeric_array_pdf)
@property
def struct_pser(self):
return pd.Series([("x", 1)])
def numeric_array_df_cols(self):
return self.numeric_array_pdf.columns
@property
def struct_psser(self):
return ps.Index([("x", 1)]).to_series().reset_index(drop=True)
def non_numeric_array_pdf(self):
psers = {
"string": pd.Series([["x", "y", "z"]]),
"date": pd.Series(
[[datetime.date(1994, 1, 1), datetime.date(1994, 1, 2), datetime.date(1994, 1, 3)]]
),
"bool": pd.Series([[True, True, False]]),
}
return pd.concat(psers, axis=1)
@property
def non_numeric_array_psdf(self):
return ps.from_pandas(self.non_numeric_array_pdf)
@property
def non_numeric_array_df_cols(self):
return self.non_numeric_array_pdf.columns
@property
def array_pdf(self):
return pd.concat([self.numeric_array_pdf, self.non_numeric_array_pdf], axis=1)
@property
def array_psdf(self):
return ps.from_pandas(self.array_pdf)
@property
def array_df_cols(self):
return self.array_pdf.columns
@property
def complex_pdf(self):
psers = {
"this_array": self.pser,
"that_array": pd.Series([[2, 3, 4]]),
"this_struct": pd.Series([("x", 1)]),
"that_struct": pd.Series([("a", 2)]),
}
return pd.concat(psers, axis=1)
@property
def complex_psdf(self):
pssers = {
"this_array": self.psser,
"that_array": ps.Series([[2, 3, 4]]),
"this_struct": ps.Index([("x", 1)]).to_series().reset_index(drop=True),
"that_struct": ps.Index([("a", 2)]).to_series().reset_index(drop=True),
}
return ps.concat(pssers, axis=1)
def test_add(self):
for pser, psser in zip(self.psers, self.pssers):
pdf, psdf = self.array_pdf, self.array_psdf
for col in self.array_df_cols:
self.assert_eq(pdf[col] + pdf[col], psdf[col] + psdf[col])
# Numeric array + Numeric array
for col in self.numeric_array_df_cols:
pser1, psser1 = pdf[col], psdf[col]
for other_col in self.numeric_array_df_cols:
pser2, psser2 = pdf[other_col], psdf[other_col]
self.assert_eq((pser1 + pser2).sort_values(), (psser1 + psser2).sort_values())
# Non-numeric array + Non-numeric array
self.assertRaises(
TypeError,
lambda: psdf["string"] + psdf["bool"],
)
self.assertRaises(
TypeError,
lambda: psdf["string"] + psdf["date"],
)
self.assertRaises(
TypeError,
lambda: psdf["bool"] + psdf["date"],
)
for col in self.non_numeric_array_df_cols:
pser, psser = pdf[col], psdf[col]
self.assert_eq(pser + pser, psser + psser)
with option_context("compute.ops_on_diff_frames", True):
# Numeric array + Numeric array
for pser1, psser1 in zip(self.numeric_array_psers, self.numeric_array_pssers):
for pser2, psser2 in zip(self.numeric_array_psers, self.numeric_array_pssers):
self.assert_eq((pser1 + pser2).sort_values(), (psser1 + psser2).sort_values())
# Non-numeric array + Non-numeric array
self.assertRaises(
TypeError,
lambda: self.non_numeric_array_pssers["string"]
+ self.non_numeric_array_pssers["bool"],
)
self.assertRaises(
TypeError,
lambda: self.non_numeric_array_pssers["string"]
+ self.non_numeric_array_pssers["date"],
)
self.assertRaises(
TypeError,
lambda: self.non_numeric_array_pssers["bool"]
+ self.non_numeric_array_pssers["date"],
)
for data_type in self.non_numeric_array_psers.keys():
self.assert_eq(
self.non_numeric_array_psers.get(data_type)
+ self.non_numeric_array_psers.get(data_type),
(
self.non_numeric_array_pssers.get(data_type)
+ self.non_numeric_array_pssers.get(data_type)
).sort_index(),
)
# Numeric array + Non-numeric array
for numeric_ppser in self.numeric_array_pssers:
for non_numeric_ppser in self.non_numeric_array_pssers.values():
self.assertRaises(TypeError, lambda: numeric_ppser + non_numeric_ppser)
# Numeric array + Non-numeric array
for numeric_col in self.numeric_array_df_cols:
for non_numeric_col in self.non_numeric_array_df_cols:
self.assertRaises(TypeError, lambda: psdf[numeric_col] + psdf[non_numeric_col])
def test_sub(self):
self.assertRaises(TypeError, lambda: self.psser - "x")
self.assertRaises(TypeError, lambda: self.psser - 1)
with option_context("compute.ops_on_diff_frames", True):
for psser1 in self.pssers:
for psser2 in self.pssers:
self.assertRaises(TypeError, lambda: psser1 - psser2)
psdf = self.array_psdf
for col in self.array_df_cols:
for other_col in self.array_df_cols:
self.assertRaises(TypeError, lambda: psdf[col] - psdf[other_col])
def test_mul(self):
self.assertRaises(TypeError, lambda: self.psser * "x")
self.assertRaises(TypeError, lambda: self.psser * 1)
with option_context("compute.ops_on_diff_frames", True):
for psser1 in self.pssers:
for psser2 in self.pssers:
self.assertRaises(TypeError, lambda: psser1 * psser2)
psdf = self.array_psdf
for col in self.array_df_cols:
for other_col in self.array_df_cols:
self.assertRaises(TypeError, lambda: psdf[col] * psdf[other_col])
def test_truediv(self):
self.assertRaises(TypeError, lambda: self.psser / "x")
self.assertRaises(TypeError, lambda: self.psser / 1)
with option_context("compute.ops_on_diff_frames", True):
for psser1 in self.pssers:
for psser2 in self.pssers:
self.assertRaises(TypeError, lambda: psser1 / psser2)
psdf = self.array_psdf
for col in self.array_df_cols:
for other_col in self.array_df_cols:
self.assertRaises(TypeError, lambda: psdf[col] / psdf[other_col])
def test_floordiv(self):
self.assertRaises(TypeError, lambda: self.psser // "x")
self.assertRaises(TypeError, lambda: self.psser // 1)
with option_context("compute.ops_on_diff_frames", True):
for psser1 in self.pssers:
for psser2 in self.pssers:
self.assertRaises(TypeError, lambda: psser1 // psser2)
psdf = self.array_psdf
for col in self.array_df_cols:
for other_col in self.array_df_cols:
self.assertRaises(TypeError, lambda: psdf[col] // psdf[other_col])
def test_mod(self):
self.assertRaises(TypeError, lambda: self.psser % "x")
self.assertRaises(TypeError, lambda: self.psser % 1)
with option_context("compute.ops_on_diff_frames", True):
for psser1 in self.pssers:
for psser2 in self.pssers:
self.assertRaises(TypeError, lambda: psser1 % psser2)
psdf = self.array_psdf
for col in self.array_df_cols:
for other_col in self.array_df_cols:
self.assertRaises(TypeError, lambda: psdf[col] % psdf[other_col])
def test_pow(self):
self.assertRaises(TypeError, lambda: self.psser ** "x")
self.assertRaises(TypeError, lambda: self.psser ** 1)
with option_context("compute.ops_on_diff_frames", True):
for psser1 in self.pssers:
for psser2 in self.pssers:
self.assertRaises(TypeError, lambda: psser1 ** psser2)
psdf = self.array_psdf
for col in self.array_df_cols:
for other_col in self.array_df_cols:
self.assertRaises(TypeError, lambda: psdf[col] ** psdf[other_col])
def test_radd(self):
self.assertRaises(TypeError, lambda: "x" + self.psser)
@ -231,12 +237,16 @@ class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: False | self.psser)
def test_from_to_pandas(self):
for pser, psser in zip(self.psers, self.pssers):
pdf, psdf = self.array_pdf, self.array_psdf
for col in self.array_df_cols:
pser, psser = pdf[col], psdf[col]
self.assert_eq(pser, psser.to_pandas())
self.assert_eq(ps.from_pandas(pser), psser)
def test_isnull(self):
for pser, psser in zip(self.psers, self.pssers):
pdf, psdf = self.array_pdf, self.array_psdf
for col in self.array_df_cols:
pser, psser = pdf[col], psdf[col]
self.assert_eq(pser.isnull(), psser.isnull())
def test_astype(self):
@ -252,70 +262,94 @@ class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: ~self.psser)
def test_eq(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
)
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
self.assert_eq(
self.struct_pser == self.struct_pser,
(self.struct_psser == self.struct_psser).sort_index(),
)
pdf, psdf = self.complex_pdf, self.complex_pdf
self.assert_eq(
pdf["this_array"] == pdf["that_array"], psdf["this_array"] == psdf["that_array"]
)
self.assert_eq(
pdf["this_struct"] == pdf["that_struct"], psdf["this_struct"] == psdf["that_struct"]
)
self.assert_eq(
pdf["this_array"] == pdf["this_array"], psdf["this_array"] == psdf["this_array"]
)
self.assert_eq(
pdf["this_struct"] == pdf["this_struct"], psdf["this_struct"] == psdf["this_struct"]
)
def test_ne(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
)
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
self.assert_eq(
self.struct_pser != self.struct_pser,
(self.struct_psser != self.struct_psser).sort_index(),
)
pdf, psdf = self.complex_pdf, self.complex_pdf
self.assert_eq(
pdf["this_array"] != pdf["that_array"], psdf["this_array"] != psdf["that_array"]
)
self.assert_eq(
pdf["this_struct"] != pdf["that_struct"], psdf["this_struct"] != psdf["that_struct"]
)
self.assert_eq(
pdf["this_array"] != pdf["this_array"], psdf["this_array"] != psdf["this_array"]
)
self.assert_eq(
pdf["this_struct"] != pdf["this_struct"], psdf["this_struct"] != psdf["this_struct"]
)
def test_lt(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser < self.other_pser, (self.psser < self.other_psser).sort_index()
)
self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
self.assert_eq(
self.struct_pser < self.struct_pser,
(self.struct_psser < self.struct_psser).sort_index(),
)
pdf, psdf = self.complex_pdf, self.complex_pdf
self.assert_eq(
pdf["this_array"] < pdf["that_array"], psdf["this_array"] < psdf["that_array"]
)
self.assert_eq(
pdf["this_struct"] < pdf["that_struct"], psdf["this_struct"] < psdf["that_struct"]
)
self.assert_eq(
pdf["this_array"] < pdf["this_array"], psdf["this_array"] < psdf["this_array"]
)
self.assert_eq(
pdf["this_struct"] < pdf["this_struct"], psdf["this_struct"] < psdf["this_struct"]
)
def test_le(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser <= self.other_pser, (self.psser <= self.other_psser).sort_index()
)
self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
self.assert_eq(
self.struct_pser <= self.struct_pser,
(self.struct_psser <= self.struct_psser).sort_index(),
)
pdf, psdf = self.complex_pdf, self.complex_pdf
self.assert_eq(
pdf["this_array"] <= pdf["that_array"], psdf["this_array"] <= psdf["that_array"]
)
self.assert_eq(
pdf["this_struct"] <= pdf["that_struct"], psdf["this_struct"] <= psdf["that_struct"]
)
self.assert_eq(
pdf["this_array"] <= pdf["this_array"], psdf["this_array"] <= psdf["this_array"]
)
self.assert_eq(
pdf["this_struct"] <= pdf["this_struct"], psdf["this_struct"] <= psdf["this_struct"]
)
def test_gt(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser > self.other_pser, (self.psser > self.other_psser).sort_index()
)
self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
self.assert_eq(
self.struct_pser > self.struct_pser,
(self.struct_psser > self.struct_psser).sort_index(),
)
pdf, psdf = self.complex_pdf, self.complex_pdf
self.assert_eq(
pdf["this_array"] > pdf["that_array"], psdf["this_array"] > psdf["that_array"]
)
self.assert_eq(
pdf["this_struct"] > pdf["that_struct"], psdf["this_struct"] > psdf["that_struct"]
)
self.assert_eq(
pdf["this_array"] > pdf["this_array"], psdf["this_array"] > psdf["this_array"]
)
self.assert_eq(
pdf["this_struct"] > pdf["this_struct"], psdf["this_struct"] > psdf["this_struct"]
)
def test_ge(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser >= self.other_pser, (self.psser >= self.other_psser).sort_index()
)
self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
self.assert_eq(
self.struct_pser >= self.struct_pser,
(self.struct_psser >= self.struct_psser).sort_index(),
)
pdf, psdf = self.complex_pdf, self.complex_pdf
self.assert_eq(
pdf["this_array"] >= pdf["that_array"], psdf["this_array"] >= psdf["that_array"]
)
self.assert_eq(
pdf["this_struct"] >= pdf["that_struct"], psdf["this_struct"] >= psdf["that_struct"]
)
self.assert_eq(
pdf["this_array"] >= pdf["this_array"], psdf["this_array"] >= psdf["this_array"]
)
self.assert_eq(
pdf["this_struct"] >= pdf["this_struct"], psdf["this_struct"] >= psdf["this_struct"]
)
if __name__ == "__main__":

View file

@ -20,10 +20,7 @@ import datetime
import pandas as pd
from pandas.api.types import CategoricalDtype
from pyspark.sql.types import DateType
from pyspark import pandas as ps
from pyspark.pandas.config import option_context
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
from pyspark.testing.pandasutils import PandasOnSparkTestCase
@ -40,14 +37,18 @@ class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
return ps.from_pandas(self.pser)
@property
def other_pser(self):
return pd.Series(
[datetime.date(2000, 1, 31), datetime.date(1994, 3, 1), datetime.date(1990, 2, 2)]
)
def date_pdf(self):
psers = {
"this": self.pser,
"that": pd.Series(
[datetime.date(2000, 1, 31), datetime.date(1994, 3, 1), datetime.date(1990, 2, 2)]
),
}
return pd.concat(psers, axis=1)
@property
def other_psser(self):
return ps.from_pandas(self.other_pser)
def date_psdf(self):
return ps.from_pandas(self.date_pdf)
@property
def some_date(self):
@ -58,9 +59,8 @@ class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: self.psser + 1)
self.assertRaises(TypeError, lambda: self.psser + self.some_date)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser + psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser + psser)
def test_sub(self):
self.assertRaises(TypeError, lambda: self.psser - "x")
@ -69,57 +69,54 @@ class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
(self.pser - self.some_date).dt.days,
self.psser - self.some_date,
)
with option_context("compute.ops_on_diff_frames", True):
for pser, psser in self.pser_psser_pairs:
if isinstance(psser.spark.data_type, DateType):
self.assert_eq((self.pser - pser).dt.days, (self.psser - psser).sort_index())
else:
self.assertRaises(TypeError, lambda: self.psser - psser)
pdf, psdf = self.pdf, self.psdf
for col in self.df_cols:
if col == "date":
self.assert_eq((pdf["date"] - pdf[col]).dt.days, psdf["date"] - psdf[col])
else:
self.assertRaises(TypeError, lambda: psdf["date"] - psdf[col])
pdf, psdf = self.date_pdf, self.date_psdf
self.assert_eq((pdf["this"] - pdf["that"]).dt.days, psdf["this"] - psdf["that"])
def test_mul(self):
self.assertRaises(TypeError, lambda: self.psser * "x")
self.assertRaises(TypeError, lambda: self.psser * 1)
self.assertRaises(TypeError, lambda: self.psser * self.some_date)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser * psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser * psser)
def test_truediv(self):
self.assertRaises(TypeError, lambda: self.psser / "x")
self.assertRaises(TypeError, lambda: self.psser / 1)
self.assertRaises(TypeError, lambda: self.psser / self.some_date)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser / psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser / psser)
def test_floordiv(self):
self.assertRaises(TypeError, lambda: self.psser // "x")
self.assertRaises(TypeError, lambda: self.psser // 1)
self.assertRaises(TypeError, lambda: self.psser // self.some_date)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser // psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser // psser)
def test_mod(self):
self.assertRaises(TypeError, lambda: self.psser % "x")
self.assertRaises(TypeError, lambda: self.psser % 1)
self.assertRaises(TypeError, lambda: self.psser % self.some_date)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser % psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser % psser)
def test_pow(self):
self.assertRaises(TypeError, lambda: self.psser ** "x")
self.assertRaises(TypeError, lambda: self.psser ** 1)
self.assertRaises(TypeError, lambda: self.psser ** self.some_date)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser ** psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser ** psser)
def test_radd(self):
self.assertRaises(TypeError, lambda: "x" + self.psser)
@ -204,46 +201,34 @@ class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: ~self.psser)
def test_eq(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
)
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
pdf, psdf = self.date_pdf, self.date_psdf
self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == psdf["that"])
self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == psdf["this"])
def test_ne(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
)
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
pdf, psdf = self.date_pdf, self.date_psdf
self.assert_eq(pdf["this"] != pdf["that"], psdf["this"] != psdf["that"])
self.assert_eq(pdf["this"] != pdf["this"], psdf["this"] != psdf["this"])
def test_lt(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser < self.other_pser, (self.psser < self.other_psser).sort_index()
)
self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
pdf, psdf = self.date_pdf, self.date_psdf
self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == psdf["that"])
self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == psdf["this"])
def test_le(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser <= self.other_pser, (self.psser <= self.other_psser).sort_index()
)
self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
pdf, psdf = self.date_pdf, self.date_psdf
self.assert_eq(pdf["this"] <= pdf["that"], psdf["this"] <= psdf["that"])
self.assert_eq(pdf["this"] <= pdf["this"], psdf["this"] <= psdf["this"])
def test_gt(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser > self.other_pser, (self.psser > self.other_psser).sort_index()
)
self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
pdf, psdf = self.date_pdf, self.date_psdf
self.assert_eq(pdf["this"] > pdf["that"], psdf["this"] > psdf["that"])
self.assert_eq(pdf["this"] > pdf["this"], psdf["this"] > psdf["this"])
def test_ge(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser >= self.other_pser, (self.psser >= self.other_psser).sort_index()
)
self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
pdf, psdf = self.date_pdf, self.date_psdf
self.assert_eq(pdf["this"] >= pdf["that"], psdf["this"] >= psdf["that"])
self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= psdf["this"])
if __name__ == "__main__":

View file

@ -17,12 +17,10 @@
import datetime
import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype
from pyspark import pandas as ps
from pyspark.pandas.config import option_context
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
from pyspark.testing.pandasutils import PandasOnSparkTestCase
@ -30,19 +28,23 @@ from pyspark.testing.pandasutils import PandasOnSparkTestCase
class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
@property
def pser(self):
return pd.Series(pd.date_range("1994-1-31 10:30:15", periods=3, freq="M"))
return pd.Series(pd.date_range("1994-1-31 10:30:15", periods=3, freq="D"))
@property
def psser(self):
return ps.from_pandas(self.pser)
@property
def other_pser(self):
return pd.Series(pd.date_range("1994-4-30 10:30:15", periods=3, freq="M"))
def datetime_pdf(self):
psers = {
"this": self.pser,
"that": pd.Series(pd.date_range("1994-2-1 10:30:15", periods=3, freq="D")),
}
return pd.concat(psers, axis=1)
@property
def other_psser(self):
return ps.from_pandas(self.other_pser)
def datetime_psdf(self):
return ps.from_pandas(self.datetime_pdf)
@property
def some_datetime(self):
@ -53,9 +55,8 @@ class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: self.psser + 1)
self.assertRaises(TypeError, lambda: self.psser + self.some_datetime)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser + psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser + psser)
def test_sub(self):
self.assertRaises(TypeError, lambda: self.psser - "x")
@ -64,60 +65,62 @@ class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
(self.pser - self.some_datetime).dt.total_seconds().astype("int"),
self.psser - self.some_datetime,
)
with option_context("compute.ops_on_diff_frames", True):
for pser, psser in self.pser_psser_pairs:
if pser.dtype == np.dtype("<M8[ns]"):
self.assert_eq(
(self.pser - pser).dt.total_seconds().astype("int"),
(self.psser - psser).sort_index(),
)
else:
self.assertRaises(TypeError, lambda: self.psser - psser)
pdf, psdf = self.pdf, self.psdf
for col in self.df_cols:
if col == "datetime":
self.assert_eq(
(pdf["datetime"] - pdf[col]).dt.total_seconds().astype("int"),
psdf["datetime"] - psdf[col],
)
else:
self.assertRaises(TypeError, lambda: psdf["datetime"] - psdf[col])
pdf, psdf = self.datetime_pdf, self.datetime_psdf
self.assert_eq(
(pdf["that"] - pdf["this"]).dt.total_seconds().astype("int"),
psdf["that"] - psdf["this"],
)
def test_mul(self):
self.assertRaises(TypeError, lambda: self.psser * "x")
self.assertRaises(TypeError, lambda: self.psser * 1)
self.assertRaises(TypeError, lambda: self.psser * self.some_datetime)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser * psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser * psser)
def test_truediv(self):
self.assertRaises(TypeError, lambda: self.psser / "x")
self.assertRaises(TypeError, lambda: self.psser / 1)
self.assertRaises(TypeError, lambda: self.psser / self.some_datetime)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser / psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser / psser)
def test_floordiv(self):
self.assertRaises(TypeError, lambda: self.psser // "x")
self.assertRaises(TypeError, lambda: self.psser // 1)
self.assertRaises(TypeError, lambda: self.psser // self.some_datetime)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser // psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser // psser)
def test_mod(self):
self.assertRaises(TypeError, lambda: self.psser % "x")
self.assertRaises(TypeError, lambda: self.psser % 1)
self.assertRaises(TypeError, lambda: self.psser % self.some_datetime)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser % psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser % psser)
def test_pow(self):
self.assertRaises(TypeError, lambda: self.psser ** "x")
self.assertRaises(TypeError, lambda: self.psser ** 1)
self.assertRaises(TypeError, lambda: self.psser ** self.some_datetime)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser ** psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser ** psser)
def test_radd(self):
self.assertRaises(TypeError, lambda: "x" + self.psser)
@ -202,46 +205,34 @@ class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: ~self.psser)
def test_eq(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
)
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
pdf, psdf = self.datetime_pdf, self.datetime_psdf
self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == psdf["that"])
self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == psdf["this"])
def test_ne(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
)
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
pdf, psdf = self.datetime_pdf, self.datetime_psdf
self.assert_eq(pdf["this"] != pdf["that"], psdf["this"] != psdf["that"])
self.assert_eq(pdf["this"] != pdf["this"], psdf["this"] != psdf["this"])
def test_lt(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser < self.other_pser, (self.psser < self.other_psser).sort_index()
)
self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
pdf, psdf = self.datetime_pdf, self.datetime_psdf
self.assert_eq(pdf["this"] < pdf["that"], psdf["this"] < psdf["that"])
self.assert_eq(pdf["this"] < pdf["this"], psdf["this"] < psdf["this"])
def test_le(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser <= self.other_pser, (self.psser <= self.other_psser).sort_index()
)
self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
pdf, psdf = self.datetime_pdf, self.datetime_psdf
self.assert_eq(pdf["this"] <= pdf["that"], psdf["this"] <= psdf["that"])
self.assert_eq(pdf["this"] <= pdf["this"], psdf["this"] <= psdf["this"])
def test_gt(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser > self.other_pser, (self.psser > self.other_psser).sort_index()
)
self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
pdf, psdf = self.datetime_pdf, self.datetime_psdf
self.assert_eq(pdf["this"] > pdf["that"], psdf["this"] > psdf["that"])
self.assert_eq(pdf["this"] > pdf["this"], psdf["this"] > psdf["this"])
def test_ge(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(
self.pser >= self.other_pser, (self.psser >= self.other_psser).sort_index()
)
self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
pdf, psdf = self.datetime_pdf, self.datetime_psdf
self.assert_eq(pdf["this"] >= pdf["that"], psdf["this"] >= psdf["that"])
self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= psdf["this"])
if __name__ == "__main__":

View file

@ -19,7 +19,6 @@ import pandas as pd
from pandas.api.types import CategoricalDtype
import pyspark.pandas as ps
from pyspark.pandas.config import option_context
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
from pyspark.testing.pandasutils import PandasOnSparkTestCase
@ -37,57 +36,50 @@ class NullOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: self.psser + "x")
self.assertRaises(TypeError, lambda: self.psser + 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser + psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser + psser)
def test_sub(self):
self.assertRaises(TypeError, lambda: self.psser - "x")
self.assertRaises(TypeError, lambda: self.psser - 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser - psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser - psser)
def test_mul(self):
self.assertRaises(TypeError, lambda: self.psser * "x")
self.assertRaises(TypeError, lambda: self.psser * 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser * psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser * psser)
def test_truediv(self):
self.assertRaises(TypeError, lambda: self.psser / "x")
self.assertRaises(TypeError, lambda: self.psser / 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser / psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser / psser)
def test_floordiv(self):
self.assertRaises(TypeError, lambda: self.psser // "x")
self.assertRaises(TypeError, lambda: self.psser // 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser // psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser // psser)
def test_mod(self):
self.assertRaises(TypeError, lambda: self.psser % "x")
self.assertRaises(TypeError, lambda: self.psser % 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser % psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser % psser)
def test_pow(self):
self.assertRaises(TypeError, lambda: self.psser ** "x")
self.assertRaises(TypeError, lambda: self.psser ** 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser ** psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser ** psser)
def test_radd(self):
self.assertRaises(TypeError, lambda: "x" + self.psser)
@ -145,28 +137,28 @@ class NullOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: ~self.psser)
def test_eq(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
pser, psser = self.pser, self.psser
self.assert_eq(pser == pser, psser == psser)
def test_ne(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
pser, psser = self.pser, self.psser
self.assert_eq(pser != pser, psser != psser)
def test_lt(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
pser, psser = self.pser, self.psser
self.assert_eq(pser < pser, psser < psser)
def test_le(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
pser, psser = self.pser, self.psser
self.assert_eq(pser <= pser, psser <= psser)
def test_gt(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
pser, psser = self.pser, self.psser
self.assert_eq(pser > pser, psser > psser)
def test_ge(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
pser, psser = self.pser, self.psser
self.assert_eq(pser >= pser, psser >= psser)
if __name__ == "__main__":

View file

@ -19,7 +19,6 @@ import pandas as pd
import pyspark.pandas as ps
from pyspark.ml.linalg import SparseVector
from pyspark.pandas.config import option_context
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
from pyspark.testing.pandasutils import PandasOnSparkTestCase
@ -34,61 +33,67 @@ class UDTOpsTest(PandasOnSparkTestCase, TestCasesUtils):
def psser(self):
return ps.from_pandas(self.pser)
@property
def udt_pdf(self):
sparse_values = {0: 0.2, 1: 1.0}
psers = {
"this": self.pser,
"that": pd.Series([SparseVector(len(sparse_values), sparse_values)]),
}
return pd.concat(psers, axis=1)
@property
def udt_psdf(self):
return ps.from_pandas(self.udt_pdf)
def test_add(self):
self.assertRaises(TypeError, lambda: self.psser + "x")
self.assertRaises(TypeError, lambda: self.psser + 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser + psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser + psser)
def test_sub(self):
self.assertRaises(TypeError, lambda: self.psser - "x")
self.assertRaises(TypeError, lambda: self.psser - 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser - psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser - psser)
def test_mul(self):
self.assertRaises(TypeError, lambda: self.psser * "x")
self.assertRaises(TypeError, lambda: self.psser * 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser * psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser * psser)
def test_truediv(self):
self.assertRaises(TypeError, lambda: self.psser / "x")
self.assertRaises(TypeError, lambda: self.psser / 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser / psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser / psser)
def test_floordiv(self):
self.assertRaises(TypeError, lambda: self.psser // "x")
self.assertRaises(TypeError, lambda: self.psser // 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser // psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser // psser)
def test_mod(self):
self.assertRaises(TypeError, lambda: self.psser % "x")
self.assertRaises(TypeError, lambda: self.psser % 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser % psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser % psser)
def test_pow(self):
self.assertRaises(TypeError, lambda: self.psser ** "x")
self.assertRaises(TypeError, lambda: self.psser ** 1)
with option_context("compute.ops_on_diff_frames", True):
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser ** psser)
for psser in self.pssers:
self.assertRaises(TypeError, lambda: self.psser ** psser)
def test_radd(self):
self.assertRaises(TypeError, lambda: "x" + self.psser)
@ -141,12 +146,14 @@ class UDTOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: ~self.psser)
def test_eq(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
pdf, psdf = self.udt_pdf, self.udt_psdf
self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == psdf["this"])
self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == psdf["that"])
def test_ne(self):
with option_context("compute.ops_on_diff_frames", True):
self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
pdf, psdf = self.udt_pdf, self.udt_psdf
self.assert_eq(pdf["this"] != pdf["this"], psdf["this"] != psdf["this"])
self.assert_eq(pdf["this"] != pdf["that"], psdf["this"] != psdf["that"])
def test_lt(self):
self.assertRaisesRegex(