[SPARK-25764][ML][EXAMPLES] Update BisectingKMeans example to use ClusteringEvaluator
## What changes were proposed in this pull request? The PR updates the examples for `BisectingKMeans` so that they don't use the deprecated method `computeCost` (see SPARK-25758). ## How was this patch tested? running examples Closes #22763 from mgaido91/SPARK-25764. Authored-by: Marco Gaido <marcogaido91@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
This commit is contained in:
parent
f704ebe902
commit
d0ecff2854
|
@ -20,6 +20,7 @@ package org.apache.spark.examples.ml;
|
|||
// $example on$
|
||||
import org.apache.spark.ml.clustering.BisectingKMeans;
|
||||
import org.apache.spark.ml.clustering.BisectingKMeansModel;
|
||||
import org.apache.spark.ml.evaluation.ClusteringEvaluator;
|
||||
import org.apache.spark.ml.linalg.Vector;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Row;
|
||||
|
@ -50,9 +51,14 @@ public class JavaBisectingKMeansExample {
|
|||
BisectingKMeans bkm = new BisectingKMeans().setK(2).setSeed(1);
|
||||
BisectingKMeansModel model = bkm.fit(dataset);
|
||||
|
||||
// Evaluate clustering.
|
||||
double cost = model.computeCost(dataset);
|
||||
System.out.println("Within Set Sum of Squared Errors = " + cost);
|
||||
// Make predictions
|
||||
Dataset<Row> predictions = model.transform(dataset);
|
||||
|
||||
// Evaluate clustering by computing Silhouette score
|
||||
ClusteringEvaluator evaluator = new ClusteringEvaluator();
|
||||
|
||||
double silhouette = evaluator.evaluate(predictions);
|
||||
System.out.println("Silhouette with squared euclidean distance = " + silhouette);
|
||||
|
||||
// Shows the result.
|
||||
System.out.println("Cluster Centers: ");
|
||||
|
|
|
@ -24,6 +24,7 @@ from __future__ import print_function
|
|||
|
||||
# $example on$
|
||||
from pyspark.ml.clustering import BisectingKMeans
|
||||
from pyspark.ml.evaluation import ClusteringEvaluator
|
||||
# $example off$
|
||||
from pyspark.sql import SparkSession
|
||||
|
||||
|
@ -41,9 +42,14 @@ if __name__ == "__main__":
|
|||
bkm = BisectingKMeans().setK(2).setSeed(1)
|
||||
model = bkm.fit(dataset)
|
||||
|
||||
# Evaluate clustering.
|
||||
cost = model.computeCost(dataset)
|
||||
print("Within Set Sum of Squared Errors = " + str(cost))
|
||||
# Make predictions
|
||||
predictions = model.transform(dataset)
|
||||
|
||||
# Evaluate clustering by computing Silhouette score
|
||||
evaluator = ClusteringEvaluator()
|
||||
|
||||
silhouette = evaluator.evaluate(predictions)
|
||||
print("Silhouette with squared euclidean distance = " + str(silhouette))
|
||||
|
||||
# Shows the result.
|
||||
print("Cluster Centers: ")
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.spark.examples.ml
|
|||
|
||||
// $example on$
|
||||
import org.apache.spark.ml.clustering.BisectingKMeans
|
||||
import org.apache.spark.ml.evaluation.ClusteringEvaluator
|
||||
// $example off$
|
||||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
|
@ -48,9 +49,14 @@ object BisectingKMeansExample {
|
|||
val bkm = new BisectingKMeans().setK(2).setSeed(1)
|
||||
val model = bkm.fit(dataset)
|
||||
|
||||
// Evaluate clustering.
|
||||
val cost = model.computeCost(dataset)
|
||||
println(s"Within Set Sum of Squared Errors = $cost")
|
||||
// Make predictions
|
||||
val predictions = model.transform(dataset)
|
||||
|
||||
// Evaluate clustering by computing Silhouette score
|
||||
val evaluator = new ClusteringEvaluator()
|
||||
|
||||
val silhouette = evaluator.evaluate(predictions)
|
||||
println(s"Silhouette with squared euclidean distance = $silhouette")
|
||||
|
||||
// Shows the result.
|
||||
println("Cluster Centers: ")
|
||||
|
|
Loading…
Reference in a new issue