diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py index 51ac198305..040886f717 100644 --- a/python/pyspark/mllib/linalg.py +++ b/python/pyspark/mllib/linalg.py @@ -445,8 +445,10 @@ class SparseVector(Vector): values (sorted by index). :param size: Size of the vector. - :param args: Non-zero entries, as a dictionary, list of tupes, - or two sorted lists containing indices and values. + :param args: Active entries, as a dictionary {index: value, ...}, + a list of tuples [(index, value), ...], or a list of strictly i + ncreasing indices and a list of corresponding values [index, ...], + [value, ...]. Inactive entries are treated as zeros. >>> SparseVector(4, {1: 1.0, 3: 5.5}) SparseVector(4, {1: 1.0, 3: 5.5}) @@ -456,6 +458,7 @@ class SparseVector(Vector): SparseVector(4, {1: 1.0, 3: 5.5}) """ self.size = int(size) + """ Size of the vector. """ assert 1 <= len(args) <= 2, "must pass either 2 or 3 arguments" if len(args) == 1: pairs = args[0] @@ -463,7 +466,9 @@ class SparseVector(Vector): pairs = pairs.items() pairs = sorted(pairs) self.indices = np.array([p[0] for p in pairs], dtype=np.int32) + """ A list of indices corresponding to active entries. """ self.values = np.array([p[1] for p in pairs], dtype=np.float64) + """ A list of values corresponding to active entries. """ else: if isinstance(args[0], bytes): assert isinstance(args[1], bytes), "values should be string too"