[SPARK-11548][DOCS] Replaced example code in mllib-collaborative-filtering.md using include_example
Kindly review the changes. Author: Rishabh Bhardwaj <rbnext29@gmail.com> Closes #9519 from rishabhbhardwaj/SPARK-11337.
This commit is contained in:
parent
51d41e4b1a
commit
b7720fa455
|
@ -66,43 +66,7 @@ recommendation model by measuring the Mean Squared Error of rating prediction.
|
|||
|
||||
Refer to the [`ALS` Scala docs](api/scala/index.html#org.apache.spark.mllib.recommendation.ALS) for details on the API.
|
||||
|
||||
{% highlight scala %}
|
||||
import org.apache.spark.mllib.recommendation.ALS
|
||||
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel
|
||||
import org.apache.spark.mllib.recommendation.Rating
|
||||
|
||||
// Load and parse the data
|
||||
val data = sc.textFile("data/mllib/als/test.data")
|
||||
val ratings = data.map(_.split(',') match { case Array(user, item, rate) =>
|
||||
Rating(user.toInt, item.toInt, rate.toDouble)
|
||||
})
|
||||
|
||||
// Build the recommendation model using ALS
|
||||
val rank = 10
|
||||
val numIterations = 10
|
||||
val model = ALS.train(ratings, rank, numIterations, 0.01)
|
||||
|
||||
// Evaluate the model on rating data
|
||||
val usersProducts = ratings.map { case Rating(user, product, rate) =>
|
||||
(user, product)
|
||||
}
|
||||
val predictions =
|
||||
model.predict(usersProducts).map { case Rating(user, product, rate) =>
|
||||
((user, product), rate)
|
||||
}
|
||||
val ratesAndPreds = ratings.map { case Rating(user, product, rate) =>
|
||||
((user, product), rate)
|
||||
}.join(predictions)
|
||||
val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) =>
|
||||
val err = (r1 - r2)
|
||||
err * err
|
||||
}.mean()
|
||||
println("Mean Squared Error = " + MSE)
|
||||
|
||||
// Save and load model
|
||||
model.save(sc, "myModelPath")
|
||||
val sameModel = MatrixFactorizationModel.load(sc, "myModelPath")
|
||||
{% endhighlight %}
|
||||
{% include_example scala/org/apache/spark/examples/mllib/RecommendationExample.scala %}
|
||||
|
||||
If the rating matrix is derived from another source of information (e.g., it is inferred from
|
||||
other signals), you can use the `trainImplicit` method to get better results.
|
||||
|
@ -123,81 +87,7 @@ that is equivalent to the provided example in Scala is given below:
|
|||
|
||||
Refer to the [`ALS` Java docs](api/java/org/apache/spark/mllib/recommendation/ALS.html) for details on the API.
|
||||
|
||||
{% highlight java %}
|
||||
import scala.Tuple2;
|
||||
|
||||
import org.apache.spark.api.java.*;
|
||||
import org.apache.spark.api.java.function.Function;
|
||||
import org.apache.spark.mllib.recommendation.ALS;
|
||||
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
|
||||
import org.apache.spark.mllib.recommendation.Rating;
|
||||
import org.apache.spark.SparkConf;
|
||||
|
||||
public class CollaborativeFiltering {
|
||||
public static void main(String[] args) {
|
||||
SparkConf conf = new SparkConf().setAppName("Collaborative Filtering Example");
|
||||
JavaSparkContext sc = new JavaSparkContext(conf);
|
||||
|
||||
// Load and parse the data
|
||||
String path = "data/mllib/als/test.data";
|
||||
JavaRDD<String> data = sc.textFile(path);
|
||||
JavaRDD<Rating> ratings = data.map(
|
||||
new Function<String, Rating>() {
|
||||
public Rating call(String s) {
|
||||
String[] sarray = s.split(",");
|
||||
return new Rating(Integer.parseInt(sarray[0]), Integer.parseInt(sarray[1]),
|
||||
Double.parseDouble(sarray[2]));
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Build the recommendation model using ALS
|
||||
int rank = 10;
|
||||
int numIterations = 10;
|
||||
MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(ratings), rank, numIterations, 0.01);
|
||||
|
||||
// Evaluate the model on rating data
|
||||
JavaRDD<Tuple2<Object, Object>> userProducts = ratings.map(
|
||||
new Function<Rating, Tuple2<Object, Object>>() {
|
||||
public Tuple2<Object, Object> call(Rating r) {
|
||||
return new Tuple2<Object, Object>(r.user(), r.product());
|
||||
}
|
||||
}
|
||||
);
|
||||
JavaPairRDD<Tuple2<Integer, Integer>, Double> predictions = JavaPairRDD.fromJavaRDD(
|
||||
model.predict(JavaRDD.toRDD(userProducts)).toJavaRDD().map(
|
||||
new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
|
||||
public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){
|
||||
return new Tuple2<Tuple2<Integer, Integer>, Double>(
|
||||
new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
|
||||
}
|
||||
}
|
||||
));
|
||||
JavaRDD<Tuple2<Double, Double>> ratesAndPreds =
|
||||
JavaPairRDD.fromJavaRDD(ratings.map(
|
||||
new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
|
||||
public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){
|
||||
return new Tuple2<Tuple2<Integer, Integer>, Double>(
|
||||
new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
|
||||
}
|
||||
}
|
||||
)).join(predictions).values();
|
||||
double MSE = JavaDoubleRDD.fromRDD(ratesAndPreds.map(
|
||||
new Function<Tuple2<Double, Double>, Object>() {
|
||||
public Object call(Tuple2<Double, Double> pair) {
|
||||
Double err = pair._1() - pair._2();
|
||||
return err * err;
|
||||
}
|
||||
}
|
||||
).rdd()).mean();
|
||||
System.out.println("Mean Squared Error = " + MSE);
|
||||
|
||||
// Save and load model
|
||||
model.save(sc.sc(), "myModelPath");
|
||||
MatrixFactorizationModel sameModel = MatrixFactorizationModel.load(sc.sc(), "myModelPath");
|
||||
}
|
||||
}
|
||||
{% endhighlight %}
|
||||
{% include_example java/org/apache/spark/examples/mllib/JavaRecommendationExample.java %}
|
||||
</div>
|
||||
|
||||
<div data-lang="python" markdown="1">
|
||||
|
@ -207,29 +97,7 @@ recommendation by measuring the Mean Squared Error of rating prediction.
|
|||
|
||||
Refer to the [`ALS` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.recommendation.ALS) for more details on the API.
|
||||
|
||||
{% highlight python %}
|
||||
from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating
|
||||
|
||||
# Load and parse the data
|
||||
data = sc.textFile("data/mllib/als/test.data")
|
||||
ratings = data.map(lambda l: l.split(',')).map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))
|
||||
|
||||
# Build the recommendation model using Alternating Least Squares
|
||||
rank = 10
|
||||
numIterations = 10
|
||||
model = ALS.train(ratings, rank, numIterations)
|
||||
|
||||
# Evaluate the model on training data
|
||||
testdata = ratings.map(lambda p: (p[0], p[1]))
|
||||
predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
|
||||
ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
|
||||
MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
|
||||
print("Mean Squared Error = " + str(MSE))
|
||||
|
||||
# Save and load model
|
||||
model.save(sc, "myModelPath")
|
||||
sameModel = MatrixFactorizationModel.load(sc, "myModelPath")
|
||||
{% endhighlight %}
|
||||
{% include_example python/mllib/recommendation_example.py %}
|
||||
|
||||
If the rating matrix is derived from other source of information (i.e., it is inferred from other
|
||||
signals), you can use the trainImplicit method to get better results.
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.examples.mllib;
|
||||
|
||||
// $example on$
|
||||
import scala.Tuple2;
|
||||
|
||||
import org.apache.spark.api.java.*;
|
||||
import org.apache.spark.api.java.function.Function;
|
||||
import org.apache.spark.mllib.recommendation.ALS;
|
||||
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
|
||||
import org.apache.spark.mllib.recommendation.Rating;
|
||||
import org.apache.spark.SparkConf;
|
||||
// $example off$
|
||||
|
||||
public class JavaRecommendationExample {
|
||||
public static void main(String args[]) {
|
||||
// $example on$
|
||||
SparkConf conf = new SparkConf().setAppName("Java Collaborative Filtering Example");
|
||||
JavaSparkContext jsc = new JavaSparkContext(conf);
|
||||
|
||||
// Load and parse the data
|
||||
String path = "data/mllib/als/test.data";
|
||||
JavaRDD<String> data = jsc.textFile(path);
|
||||
JavaRDD<Rating> ratings = data.map(
|
||||
new Function<String, Rating>() {
|
||||
public Rating call(String s) {
|
||||
String[] sarray = s.split(",");
|
||||
return new Rating(Integer.parseInt(sarray[0]), Integer.parseInt(sarray[1]),
|
||||
Double.parseDouble(sarray[2]));
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Build the recommendation model using ALS
|
||||
int rank = 10;
|
||||
int numIterations = 10;
|
||||
MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(ratings), rank, numIterations, 0.01);
|
||||
|
||||
// Evaluate the model on rating data
|
||||
JavaRDD<Tuple2<Object, Object>> userProducts = ratings.map(
|
||||
new Function<Rating, Tuple2<Object, Object>>() {
|
||||
public Tuple2<Object, Object> call(Rating r) {
|
||||
return new Tuple2<Object, Object>(r.user(), r.product());
|
||||
}
|
||||
}
|
||||
);
|
||||
JavaPairRDD<Tuple2<Integer, Integer>, Double> predictions = JavaPairRDD.fromJavaRDD(
|
||||
model.predict(JavaRDD.toRDD(userProducts)).toJavaRDD().map(
|
||||
new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
|
||||
public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){
|
||||
return new Tuple2<Tuple2<Integer, Integer>, Double>(
|
||||
new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
|
||||
}
|
||||
}
|
||||
));
|
||||
JavaRDD<Tuple2<Double, Double>> ratesAndPreds =
|
||||
JavaPairRDD.fromJavaRDD(ratings.map(
|
||||
new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
|
||||
public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){
|
||||
return new Tuple2<Tuple2<Integer, Integer>, Double>(
|
||||
new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
|
||||
}
|
||||
}
|
||||
)).join(predictions).values();
|
||||
double MSE = JavaDoubleRDD.fromRDD(ratesAndPreds.map(
|
||||
new Function<Tuple2<Double, Double>, Object>() {
|
||||
public Object call(Tuple2<Double, Double> pair) {
|
||||
Double err = pair._1() - pair._2();
|
||||
return err * err;
|
||||
}
|
||||
}
|
||||
).rdd()).mean();
|
||||
System.out.println("Mean Squared Error = " + MSE);
|
||||
|
||||
// Save and load model
|
||||
model.save(jsc.sc(), "target/tmp/myCollaborativeFilter");
|
||||
MatrixFactorizationModel sameModel = MatrixFactorizationModel.load(jsc.sc(),
|
||||
"target/tmp/myCollaborativeFilter");
|
||||
// $example off$
|
||||
}
|
||||
}
|
54
examples/src/main/python/mllib/recommendation_example.py
Normal file
54
examples/src/main/python/mllib/recommendation_example.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
"""
|
||||
Collaborative Filtering Classification Example.
|
||||
"""
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
|
||||
from pyspark import SparkContext
|
||||
|
||||
# $example on$
|
||||
from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating
|
||||
# $example off$
|
||||
|
||||
if __name__ == "__main__":
|
||||
sc = SparkContext(appName="PythonCollaborativeFilteringExample")
|
||||
# $example on$
|
||||
# Load and parse the data
|
||||
data = sc.textFile("data/mllib/als/test.data")
|
||||
ratings = data.map(lambda l: l.split(','))\
|
||||
.map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))
|
||||
|
||||
# Build the recommendation model using Alternating Least Squares
|
||||
rank = 10
|
||||
numIterations = 10
|
||||
model = ALS.train(ratings, rank, numIterations)
|
||||
|
||||
# Evaluate the model on training data
|
||||
testdata = ratings.map(lambda p: (p[0], p[1]))
|
||||
predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
|
||||
ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
|
||||
MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
|
||||
print("Mean Squared Error = " + str(MSE))
|
||||
|
||||
# Save and load model
|
||||
model.save(sc, "target/tmp/myCollaborativeFilter")
|
||||
sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
|
||||
# $example off$
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// scalastyle:off println
|
||||
package org.apache.spark.examples.mllib
|
||||
|
||||
import org.apache.spark.{SparkContext, SparkConf}
|
||||
// $example on$
|
||||
import org.apache.spark.mllib.recommendation.ALS
|
||||
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel
|
||||
import org.apache.spark.mllib.recommendation.Rating
|
||||
// $example off$
|
||||
|
||||
object RecommendationExample {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val conf = new SparkConf().setAppName("CollaborativeFilteringExample")
|
||||
val sc = new SparkContext(conf)
|
||||
// $example on$
|
||||
// Load and parse the data
|
||||
val data = sc.textFile("data/mllib/als/test.data")
|
||||
val ratings = data.map(_.split(',') match { case Array(user, item, rate) =>
|
||||
Rating(user.toInt, item.toInt, rate.toDouble)
|
||||
})
|
||||
|
||||
// Build the recommendation model using ALS
|
||||
val rank = 10
|
||||
val numIterations = 10
|
||||
val model = ALS.train(ratings, rank, numIterations, 0.01)
|
||||
|
||||
// Evaluate the model on rating data
|
||||
val usersProducts = ratings.map { case Rating(user, product, rate) =>
|
||||
(user, product)
|
||||
}
|
||||
val predictions =
|
||||
model.predict(usersProducts).map { case Rating(user, product, rate) =>
|
||||
((user, product), rate)
|
||||
}
|
||||
val ratesAndPreds = ratings.map { case Rating(user, product, rate) =>
|
||||
((user, product), rate)
|
||||
}.join(predictions)
|
||||
val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) =>
|
||||
val err = (r1 - r2)
|
||||
err * err
|
||||
}.mean()
|
||||
println("Mean Squared Error = " + MSE)
|
||||
|
||||
// Save and load model
|
||||
model.save(sc, "target/tmp/myCollaborativeFilter")
|
||||
val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
|
||||
// $example off$
|
||||
}
|
||||
}
|
||||
// scalastyle:on println
|
Loading…
Reference in a new issue