01321bc0fe
### What changes were proposed in this pull request? This PR proposes migration of `pyspark.mllib` to NumPy documentation style. ### Why are the changes needed? To improve documentation style. Before: ![old](https://user-images.githubusercontent.com/1554276/100097941-90234980-2e5d-11eb-8b4d-c25d98d85191.png) After: ![new](https://user-images.githubusercontent.com/1554276/100097966-987b8480-2e5d-11eb-9e02-07b18c327624.png) ### Does this PR introduce _any_ user-facing change? Yes, this changes both rendered HTML docs and console representation (SPARK-33243). ### How was this patch tested? `dev/lint-python` and manual inspection. Closes #30413 from zero323/SPARK-33252. Authored-by: zero323 <mszymkiewicz@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
152 lines
5.3 KiB
Python
152 lines
5.3 KiB
Python
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from typing import Generic, Sequence, Optional, Tuple, TypeVar, Union
|
|
from pyspark.rdd import RDD
|
|
from pyspark.storagelevel import StorageLevel
|
|
from pyspark.mllib.common import JavaModelWrapper
|
|
from pyspark.mllib.linalg import Vector, Matrix, QRDecomposition
|
|
from pyspark.mllib.stat import MultivariateStatisticalSummary
|
|
import pyspark.sql.dataframe
|
|
from numpy import ndarray # noqa: F401
|
|
|
|
VectorLike = Union[Vector, Sequence[Union[float, int]]]
|
|
|
|
UT = TypeVar("UT")
|
|
VT = TypeVar("VT")
|
|
|
|
class DistributedMatrix:
|
|
def numRows(self) -> int: ...
|
|
def numCols(self) -> int: ...
|
|
|
|
class RowMatrix(DistributedMatrix):
|
|
def __init__(
|
|
self,
|
|
rows: Union[RDD[Vector], pyspark.sql.dataframe.DataFrame],
|
|
numRows: int = ...,
|
|
numCols: int = ...,
|
|
) -> None: ...
|
|
@property
|
|
def rows(self) -> RDD[Vector]: ...
|
|
def numRows(self) -> int: ...
|
|
def numCols(self) -> int: ...
|
|
def computeColumnSummaryStatistics(self) -> MultivariateStatisticalSummary: ...
|
|
def computeCovariance(self) -> Matrix: ...
|
|
def computeGramianMatrix(self) -> Matrix: ...
|
|
def columnSimilarities(self, threshold: float = ...) -> CoordinateMatrix: ...
|
|
def tallSkinnyQR(
|
|
self, computeQ: bool = ...
|
|
) -> QRDecomposition[RowMatrix, Matrix]: ...
|
|
def computeSVD(
|
|
self, k: int, computeU: bool = ..., rCond: float = ...
|
|
) -> SingularValueDecomposition[RowMatrix, Matrix]: ...
|
|
def computePrincipalComponents(self, k: int) -> Matrix: ...
|
|
def multiply(self, matrix: Matrix) -> RowMatrix: ...
|
|
|
|
class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]):
|
|
@property
|
|
def U(self) -> Optional[UT]: ...
|
|
@property
|
|
def s(self) -> Vector: ...
|
|
@property
|
|
def V(self) -> VT: ...
|
|
|
|
class IndexedRow:
|
|
index: int
|
|
vector: VectorLike
|
|
def __init__(self, index: int, vector: VectorLike) -> None: ...
|
|
|
|
class IndexedRowMatrix(DistributedMatrix):
|
|
def __init__(
|
|
self,
|
|
rows: RDD[Union[Tuple[int, VectorLike], IndexedRow]],
|
|
numRows: int = ...,
|
|
numCols: int = ...,
|
|
) -> None: ...
|
|
@property
|
|
def rows(self) -> RDD[IndexedRow]: ...
|
|
def numRows(self) -> int: ...
|
|
def numCols(self) -> int: ...
|
|
def columnSimilarities(self) -> CoordinateMatrix: ...
|
|
def computeGramianMatrix(self) -> Matrix: ...
|
|
def toRowMatrix(self) -> RowMatrix: ...
|
|
def toCoordinateMatrix(self) -> CoordinateMatrix: ...
|
|
def toBlockMatrix(
|
|
self, rowsPerBlock: int = ..., colsPerBlock: int = ...
|
|
) -> BlockMatrix: ...
|
|
def computeSVD(
|
|
self, k: int, computeU: bool = ..., rCond: float = ...
|
|
) -> SingularValueDecomposition[IndexedRowMatrix, Matrix]: ...
|
|
def multiply(self, matrix: Matrix) -> IndexedRowMatrix: ...
|
|
|
|
class MatrixEntry:
|
|
i: int
|
|
j: int
|
|
value: float
|
|
def __init__(self, i: int, j: int, value: float) -> None: ...
|
|
|
|
class CoordinateMatrix(DistributedMatrix):
|
|
def __init__(
|
|
self,
|
|
entries: RDD[Union[Tuple[int, int, float], MatrixEntry]],
|
|
numRows: int = ...,
|
|
numCols: int = ...,
|
|
) -> None: ...
|
|
@property
|
|
def entries(self) -> RDD[MatrixEntry]: ...
|
|
def numRows(self) -> int: ...
|
|
def numCols(self) -> int: ...
|
|
def transpose(self) -> CoordinateMatrix: ...
|
|
def toRowMatrix(self) -> RowMatrix: ...
|
|
def toIndexedRowMatrix(self) -> IndexedRowMatrix: ...
|
|
def toBlockMatrix(
|
|
self, rowsPerBlock: int = ..., colsPerBlock: int = ...
|
|
) -> BlockMatrix: ...
|
|
|
|
class BlockMatrix(DistributedMatrix):
|
|
def __init__(
|
|
self,
|
|
blocks: RDD[Tuple[Tuple[int, int], Matrix]],
|
|
rowsPerBlock: int,
|
|
colsPerBlock: int,
|
|
numRows: int = ...,
|
|
numCols: int = ...,
|
|
) -> None: ...
|
|
@property
|
|
def blocks(self) -> RDD[Tuple[Tuple[int, int], Matrix]]: ...
|
|
@property
|
|
def rowsPerBlock(self) -> int: ...
|
|
@property
|
|
def colsPerBlock(self) -> int: ...
|
|
@property
|
|
def numRowBlocks(self) -> int: ...
|
|
@property
|
|
def numColBlocks(self) -> int: ...
|
|
def numRows(self) -> int: ...
|
|
def numCols(self) -> int: ...
|
|
def cache(self) -> BlockMatrix: ...
|
|
def persist(self, storageLevel: StorageLevel) -> BlockMatrix: ...
|
|
def validate(self) -> None: ...
|
|
def add(self, other: BlockMatrix) -> BlockMatrix: ...
|
|
def subtract(self, other: BlockMatrix) -> BlockMatrix: ...
|
|
def multiply(self, other: BlockMatrix) -> BlockMatrix: ...
|
|
def transpose(self) -> BlockMatrix: ...
|
|
def toLocalMatrix(self) -> Matrix: ...
|
|
def toIndexedRowMatrix(self) -> IndexedRowMatrix: ...
|
|
def toCoordinateMatrix(self) -> CoordinateMatrix: ...
|