diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py index ea2a686cdd..052a5b2835 100644 --- a/python/pyspark/sql/tests/test_serde.py +++ b/python/pyspark/sql/tests/test_serde.py @@ -132,6 +132,10 @@ class SerdeTests(ReusedSQLTestCase): df = self.spark.createDataFrame(data, "array") self.assertEqual(len(list(filter(lambda r: None in r.value, df.collect()))), 0) + def test_bytes_as_binary_type(self): + df = self.spark.createDataFrame([[b"abcd"]], "col binary") + self.assertEqual(df.first().col, bytearray(b'abcd')) + if __name__ == "__main__": import unittest diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 0c7f4ce3dd..81fdd41435 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -1190,7 +1190,7 @@ _acceptable_types = { DoubleType: (float,), DecimalType: (decimal.Decimal,), StringType: (str, unicode), - BinaryType: (bytearray,), + BinaryType: (bytearray, bytes), DateType: (datetime.date, datetime.datetime), TimestampType: (datetime.datetime,), ArrayType: (list, tuple, array),