[MINOR][DOCS] Fix typos at python/pyspark/sql/types.py

### What changes were proposed in this pull request? This PR fixes some typos in `python/pyspark/sql/types.py` file. ### Why are the changes needed? To deliver correct wording in documentation and codes. ### Does this PR introduce any user-facing change? Yes, it fixes some typos in user-facing API documentation. ### How was this patch tested? Locally tested the linter. Closes #27475 from sharifahmad2061/master. Lead-authored-by: sharif ahmad <sharifahmad2061@gmail.com> Co-authored-by: Sharif ahmad <sharifahmad2061@users.noreply.github.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
2020-02-07 18:42:16 +09:00 · 2020-02-07 18:42:16 +09:00 · dd2f4431f5
parent 4804445327
commit dd2f4431f5
1 changed files with 20 additions and 20 deletions
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@ -76,7 +76,7 @@ class DataType(object):

    def needConversion(self):
        """
-        Does this type need to conversion between Python object and internal SQL object.
+        Does this type needs conversion between Python object and internal SQL object.

        This is used to avoid the unnecessary conversion for ArrayType/MapType/StructType.
        """
@ -210,17 +210,17 @@ class DecimalType(FractionalType):

    The precision can be up to 38, the scale must be less or equal to precision.

-    When create a DecimalType, the default precision and scale is (10, 0). When infer
+    When creating a DecimalType, the default precision and scale is (10, 0). When inferring
    schema from decimal.Decimal objects, it will be DecimalType(38, 18).

-    :param precision: the maximum total number of digits (default: 10)
+    :param precision: the maximum (i.e. total) number of digits (default: 10)
    :param scale: the number of digits on right side of dot. (default: 0)
    """

    def __init__(self, precision=10, scale=0):
        self.precision = precision
        self.scale = scale
-        self.hasPrecisionInfo = True  # this is public API
+        self.hasPrecisionInfo = True  # this is a public API

    def simpleString(self):
        return "decimal(%d,%d)" % (self.precision, self.scale)
@ -457,8 +457,8 @@ class StructType(DataType):

    This is the data type representing a :class:`Row`.

-    Iterating a :class:`StructType` will iterate its :class:`StructField`\\s.
-    A contained :class:`StructField` can be accessed by name or position.
+    Iterating a :class:`StructType` will iterate over its :class:`StructField`\\s.
+    A contained :class:`StructField` can be accessed by its name or position.

    >>> struct1 = StructType([StructField("f1", StringType(), True)])
    >>> struct1["f1"]
@ -492,8 +492,8 @@ class StructType(DataType):

    def add(self, field, data_type=None, nullable=True, metadata=None):
        """
-        Construct a StructType by adding new elements to it to define the schema. The method accepts
-        either:
+        Construct a StructType by adding new elements to it, to define the schema.
+        The method accepts either:

            a) A single parameter which is a StructField object.
            b) Between 2 and 4 parameters as (name, data_type, nullable (optional),
@ -676,7 +676,7 @@ class UserDefinedType(DataType):
    @classmethod
    def _cachedSqlType(cls):
        """
-        Cache the sqlType() into class, because it's heavy used in `toInternal`.
+        Cache the sqlType() into class, because it's heavily used in `toInternal`.
        """
        if not hasattr(cls, "_cached_sql_type"):
            cls._cached_sql_type = cls.sqlType()
@ -693,7 +693,7 @@ class UserDefinedType(DataType):

    def serialize(self, obj):
        """
-        Converts the a user-type object into a SQL datum.
+        Converts a user-type object into a SQL datum.
        """
        raise NotImplementedError("UDT must implement toInternal().")

@ -760,7 +760,7 @@ _FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)")
 def _parse_datatype_string(s):
    """
    Parses the given data type string to a :class:`DataType`. The data type string format equals
-    to :class:`DataType.simpleString`, except that top level struct type can omit
+    :class:`DataType.simpleString`, except that the top level struct type can omit
    the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use ``byte`` instead
    of ``tinyint`` for :class:`ByteType`. We can also use ``int`` as a short name
    for :class:`IntegerType`. Since Spark 2.3, this also supports a schema in a DDL-formatted
@ -921,7 +921,7 @@ if sys.version >= "3":
 # We should be careful here. The size of these types in python depends on C
 # implementation. We need to make sure that this conversion does not lose any
 # precision. Also, JVM only support signed types, when converting unsigned types,
-# keep in mind that it required 1 more bit when stored as singed types.
+# keep in mind that it require 1 more bit when stored as signed types.
 #
 # Reference for C integer size, see:
 # ISO/IEC 9899:201x specification, chapter 5.2.4.2.1 Sizes of integer types <limits.h>.
@ -959,7 +959,7 @@ def _int_size_to_type(size):
    if size <= 64:
        return LongType

-# The list of all supported array typecodes is stored here
+# The list of all supported array typecodes, is stored here
 _array_type_mappings = {
    # Warning: Actual properties for float and double in C is not specified in C.
    # On almost every system supported by both python and JVM, they are IEEE 754
@ -995,9 +995,9 @@ if sys.version_info[0] < 3:
    _array_type_mappings['c'] = StringType

 # SPARK-21465:
-# In python2, array of 'L' happened to be mistakenly partially supported. To
+# In python2, array of 'L' happened to be mistakenly, just partially supported. To
 # avoid breaking user's code, we should keep this partial support. Below is a
-# dirty hacking to keep this partial support and make the unit test passes
+# dirty hacking to keep this partial support and pass the unit test.
 import platform
 if sys.version_info[0] < 3 and platform.python_implementation() != 'PyPy':
    if 'L' not in _array_type_mappings.keys():
@ -1071,7 +1071,7 @@ def _infer_schema(row, names=None):


 def _has_nulltype(dt):
-    """ Return whether there is NullType in `dt` or not """
+    """ Return whether there is a NullType in `dt` or not """
    if isinstance(dt, StructType):
        return any(_has_nulltype(f.dataType) for f in dt.fields)
    elif isinstance(dt, ArrayType):
@ -1211,7 +1211,7 @@ def _make_type_verifier(dataType, nullable=True, name=None):

    This verifier also checks the value of obj against datatype and raises a ValueError if it's not
    within the allowed range, e.g. using 128 as ByteType will overflow. Note that, Python float is
-    not checked, so it will become infinity when cast to Java float if it overflows.
+    not checked, so it will become infinity when cast to Java float, if it overflows.

    >>> _make_type_verifier(StructType([]))(None)
    >>> _make_type_verifier(StringType())("")
@ -1433,7 +1433,7 @@ class Row(tuple):
    ``key in row`` will search through row keys.

    Row can be used to create a row object by using named arguments.
-    It is not allowed to omit a named argument to represent the value is
+    It is not allowed to omit a named argument to represent that the value is
    None or missing. This should be explicitly set to None in this case.

    NOTE: As of Spark 3.0.0, Rows created from named arguments no longer have
@ -1524,9 +1524,9 @@ class Row(tuple):

    def asDict(self, recursive=False):
        """
-        Return as an dict
+        Return as a dict

-        :param recursive: turns the nested Row as dict (default: False).
+        :param recursive: turns the nested Rows to dict (default: False).

        >>> Row(name="Alice", age=11).asDict() == {'name': 'Alice', 'age': 11}
        True