[SPARK-8919] [DOCUMENTATION, MLLIB] Added @since tags to mllib.recommendation

Author: vinodkc <vinod.kc.in@gmail.com>

Closes #7325 from vinodkc/add_since_mllib.recommendation and squashes the following commits:

93156f2 [vinodkc] Changed 0.8.0 to 0.9.1
c413350 [vinodkc] Added @since
This commit is contained in:
vinodkc 2015-07-28 08:48:57 -07:00 committed by Xiangrui Meng
parent ac8c549e2f
commit 4af622c855
2 changed files with 47 additions and 1 deletions

View file

@ -26,6 +26,7 @@ import org.apache.spark.storage.StorageLevel
/**
* A more compact class to represent a rating than Tuple3[Int, Int, Double].
* @since 0.8.0
*/
case class Rating(user: Int, product: Int, rating: Double)
@ -254,6 +255,7 @@ class ALS private (
/**
* Top-level methods for calling Alternating Least Squares (ALS) matrix factorization.
* @since 0.8.0
*/
object ALS {
/**
@ -269,6 +271,7 @@ object ALS {
* @param lambda regularization factor (recommended: 0.01)
* @param blocks level of parallelism to split computation into
* @param seed random seed
* @since 0.9.1
*/
def train(
ratings: RDD[Rating],
@ -293,6 +296,7 @@ object ALS {
* @param iterations number of iterations of ALS (recommended: 10-20)
* @param lambda regularization factor (recommended: 0.01)
* @param blocks level of parallelism to split computation into
* @since 0.8.0
*/
def train(
ratings: RDD[Rating],
@ -315,6 +319,7 @@ object ALS {
* @param rank number of features to use
* @param iterations number of iterations of ALS (recommended: 10-20)
* @param lambda regularization factor (recommended: 0.01)
* @since 0.8.0
*/
def train(ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double)
: MatrixFactorizationModel = {
@ -331,6 +336,7 @@ object ALS {
* @param ratings RDD of (userID, productID, rating) pairs
* @param rank number of features to use
* @param iterations number of iterations of ALS (recommended: 10-20)
* @since 0.8.0
*/
def train(ratings: RDD[Rating], rank: Int, iterations: Int)
: MatrixFactorizationModel = {
@ -351,6 +357,7 @@ object ALS {
* @param blocks level of parallelism to split computation into
* @param alpha confidence parameter
* @param seed random seed
* @since 0.8.1
*/
def trainImplicit(
ratings: RDD[Rating],
@ -377,6 +384,7 @@ object ALS {
* @param lambda regularization factor (recommended: 0.01)
* @param blocks level of parallelism to split computation into
* @param alpha confidence parameter
* @since 0.8.1
*/
def trainImplicit(
ratings: RDD[Rating],
@ -401,6 +409,7 @@ object ALS {
* @param iterations number of iterations of ALS (recommended: 10-20)
* @param lambda regularization factor (recommended: 0.01)
* @param alpha confidence parameter
* @since 0.8.1
*/
def trainImplicit(ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double, alpha: Double)
: MatrixFactorizationModel = {
@ -418,6 +427,7 @@ object ALS {
* @param ratings RDD of (userID, productID, rating) pairs
* @param rank number of features to use
* @param iterations number of iterations of ALS (recommended: 10-20)
* @since 0.8.1
*/
def trainImplicit(ratings: RDD[Rating], rank: Int, iterations: Int)
: MatrixFactorizationModel = {

View file

@ -49,6 +49,7 @@ import org.apache.spark.storage.StorageLevel
* the features computed for this user.
* @param productFeatures RDD of tuples where each tuple represents the productId
* and the features computed for this product.
* @since 0.8.0
*/
class MatrixFactorizationModel(
val rank: Int,
@ -73,7 +74,9 @@ class MatrixFactorizationModel(
}
}
/** Predict the rating of one user for one product. */
/** Predict the rating of one user for one product.
* @since 0.8.0
*/
def predict(user: Int, product: Int): Double = {
val userVector = userFeatures.lookup(user).head
val productVector = productFeatures.lookup(product).head
@ -111,6 +114,7 @@ class MatrixFactorizationModel(
*
* @param usersProducts RDD of (user, product) pairs.
* @return RDD of Ratings.
* @since 0.9.0
*/
def predict(usersProducts: RDD[(Int, Int)]): RDD[Rating] = {
// Previously the partitions of ratings are only based on the given products.
@ -142,6 +146,7 @@ class MatrixFactorizationModel(
/**
* Java-friendly version of [[MatrixFactorizationModel.predict]].
* @since 1.2.0
*/
def predict(usersProducts: JavaPairRDD[JavaInteger, JavaInteger]): JavaRDD[Rating] = {
predict(usersProducts.rdd.asInstanceOf[RDD[(Int, Int)]]).toJavaRDD()
@ -157,6 +162,7 @@ class MatrixFactorizationModel(
* by score, decreasing. The first returned is the one predicted to be most strongly
* recommended to the user. The score is an opaque value that indicates how strongly
* recommended the product is.
* @since 1.1.0
*/
def recommendProducts(user: Int, num: Int): Array[Rating] =
MatrixFactorizationModel.recommend(userFeatures.lookup(user).head, productFeatures, num)
@ -173,6 +179,7 @@ class MatrixFactorizationModel(
* by score, decreasing. The first returned is the one predicted to be most strongly
* recommended to the product. The score is an opaque value that indicates how strongly
* recommended the user is.
* @since 1.1.0
*/
def recommendUsers(product: Int, num: Int): Array[Rating] =
MatrixFactorizationModel.recommend(productFeatures.lookup(product).head, userFeatures, num)
@ -180,6 +187,20 @@ class MatrixFactorizationModel(
protected override val formatVersion: String = "1.0"
/**
* Save this model to the given path.
*
* This saves:
* - human-readable (JSON) model metadata to path/metadata/
* - Parquet formatted data to path/data/
*
* The model may be loaded using [[Loader.load]].
*
* @param sc Spark context used to save model data.
* @param path Path specifying the directory in which to save this model.
* If the directory already exists, this method throws an exception.
* @since 1.3.0
*/
override def save(sc: SparkContext, path: String): Unit = {
MatrixFactorizationModel.SaveLoadV1_0.save(this, path)
}
@ -191,6 +212,7 @@ class MatrixFactorizationModel(
* @return [(Int, Array[Rating])] objects, where every tuple contains a userID and an array of
* rating objects which contains the same userId, recommended productID and a "score" in the
* rating field. Semantics of score is same as recommendProducts API
* @since 1.4.0
*/
def recommendProductsForUsers(num: Int): RDD[(Int, Array[Rating])] = {
MatrixFactorizationModel.recommendForAll(rank, userFeatures, productFeatures, num).map {
@ -208,6 +230,7 @@ class MatrixFactorizationModel(
* @return [(Int, Array[Rating])] objects, where every tuple contains a productID and an array
* of rating objects which contains the recommended userId, same productID and a "score" in the
* rating field. Semantics of score is same as recommendUsers API
* @since 1.4.0
*/
def recommendUsersForProducts(num: Int): RDD[(Int, Array[Rating])] = {
MatrixFactorizationModel.recommendForAll(rank, productFeatures, userFeatures, num).map {
@ -218,6 +241,9 @@ class MatrixFactorizationModel(
}
}
/**
* @since 1.3.0
*/
object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
import org.apache.spark.mllib.util.Loader._
@ -292,6 +318,16 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
}
}
/**
* Load a model from the given path.
*
* The model should have been saved by [[Saveable.save]].
*
* @param sc Spark context used for loading model files.
* @param path Path specifying the directory to which the model was saved.
* @return Model instance
* @since 1.3.0
*/
override def load(sc: SparkContext, path: String): MatrixFactorizationModel = {
val (loadedClassName, formatVersion, _) = loadMetadata(sc, path)
val classNameV1_0 = SaveLoadV1_0.thisClassName