[SPARK-15031][SPARK-15134][EXAMPLE][DOC] Use SparkSession and update indent in examples

## What changes were proposed in this pull request?
1, Use `SparkSession` according to [SPARK-15031](https://issues.apache.org/jira/browse/SPARK-15031)
2, Update indent for `SparkContext` according to [SPARK-15134](https://issues.apache.org/jira/browse/SPARK-15134)
3, BTW, remove some duplicate space and add missing '.'

## How was this patch tested?
manual tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #13050 from zhengruifeng/use_sparksession.
This commit is contained in:
Zheng RuiFeng 2016-05-11 22:45:30 -07:00 committed by Andrew Or
parent ba5487c061
commit 9e266d07a4
34 changed files with 192 additions and 151 deletions

View file

@ -32,7 +32,9 @@ import org.apache.spark.sql.SparkSession;
public class JavaDecisionTreeClassificationExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaDecisionTreeClassificationExample").getOrCreate();
.builder()
.appName("JavaDecisionTreeClassificationExample")
.getOrCreate();
// $example on$
// Load the data stored in LIBSVM format as a DataFrame.
@ -52,10 +54,10 @@ public class JavaDecisionTreeClassificationExample {
VectorIndexerModel featureIndexer = new VectorIndexer()
.setInputCol("features")
.setOutputCol("indexedFeatures")
.setMaxCategories(4) // features with > 4 distinct values are treated as continuous
.setMaxCategories(4) // features with > 4 distinct values are treated as continuous.
.fit(data);
// Split the data into training and test sets (30% held out for testing)
// Split the data into training and test sets (30% held out for testing).
Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
Dataset<Row> trainingData = splits[0];
Dataset<Row> testData = splits[1];
@ -71,11 +73,11 @@ public class JavaDecisionTreeClassificationExample {
.setOutputCol("predictedLabel")
.setLabels(labelIndexer.labels());
// Chain indexers and tree in a Pipeline
// Chain indexers and tree in a Pipeline.
Pipeline pipeline = new Pipeline()
.setStages(new PipelineStage[]{labelIndexer, featureIndexer, dt, labelConverter});
// Train model. This also runs the indexers.
// Train model. This also runs the indexers.
PipelineModel model = pipeline.fit(trainingData);
// Make predictions.
@ -84,7 +86,7 @@ public class JavaDecisionTreeClassificationExample {
// Select example rows to display.
predictions.select("predictedLabel", "label", "features").show(5);
// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
.setLabelCol("indexedLabel")
.setPredictionCol("prediction")

View file

@ -33,7 +33,9 @@ import org.apache.spark.sql.SparkSession;
public class JavaDecisionTreeRegressionExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaDecisionTreeRegressionExample").getOrCreate();
.builder()
.appName("JavaDecisionTreeRegressionExample")
.getOrCreate();
// $example on$
// Load the data stored in LIBSVM format as a DataFrame.
Dataset<Row> data = spark.read().format("libsvm")
@ -47,7 +49,7 @@ public class JavaDecisionTreeRegressionExample {
.setMaxCategories(4)
.fit(data);
// Split the data into training and test sets (30% held out for testing)
// Split the data into training and test sets (30% held out for testing).
Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
Dataset<Row> trainingData = splits[0];
Dataset<Row> testData = splits[1];
@ -56,11 +58,11 @@ public class JavaDecisionTreeRegressionExample {
DecisionTreeRegressor dt = new DecisionTreeRegressor()
.setFeaturesCol("indexedFeatures");
// Chain indexer and tree in a Pipeline
// Chain indexer and tree in a Pipeline.
Pipeline pipeline = new Pipeline()
.setStages(new PipelineStage[]{featureIndexer, dt});
// Train model. This also runs the indexer.
// Train model. This also runs the indexer.
PipelineModel model = pipeline.fit(trainingData);
// Make predictions.
@ -69,7 +71,7 @@ public class JavaDecisionTreeRegressionExample {
// Select example rows to display.
predictions.select("label", "features").show(5);
// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
RegressionEvaluator evaluator = new RegressionEvaluator()
.setLabelCol("label")
.setPredictionCol("prediction")

View file

@ -62,7 +62,7 @@ public class JavaDeveloperApiExample {
new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5)));
Dataset<Row> training = spark.createDataFrame(localTraining, LabeledPoint.class);
// Create a LogisticRegression instance. This instance is an Estimator.
// Create a LogisticRegression instance. This instance is an Estimator.
MyJavaLogisticRegression lr = new MyJavaLogisticRegression();
// Print out the parameters, documentation, and any default values.
System.out.println("MyJavaLogisticRegression parameters:\n" + lr.explainParams() + "\n");
@ -70,7 +70,7 @@ public class JavaDeveloperApiExample {
// We may set parameters using setter methods.
lr.setMaxIter(10);
// Learn a LogisticRegression model. This uses the parameters stored in lr.
// Learn a LogisticRegression model. This uses the parameters stored in lr.
MyJavaLogisticRegressionModel model = lr.fit(training);
// Prepare test data.
@ -214,7 +214,7 @@ class MyJavaLogisticRegressionModel
}
/**
* Number of classes the label can take. 2 indicates binary classification.
* Number of classes the label can take. 2 indicates binary classification.
*/
public int numClasses() { return 2; }

View file

@ -38,7 +38,9 @@ import org.apache.spark.sql.SparkSession;
public class JavaEstimatorTransformerParamExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaEstimatorTransformerParamExample").getOrCreate();
.builder()
.appName("JavaEstimatorTransformerParamExample")
.getOrCreate();
// $example on$
// Prepare training data.

View file

@ -75,11 +75,11 @@ public class JavaGradientBoostedTreeClassifierExample {
.setOutputCol("predictedLabel")
.setLabels(labelIndexer.labels());
// Chain indexers and GBT in a Pipeline
// Chain indexers and GBT in a Pipeline.
Pipeline pipeline = new Pipeline()
.setStages(new PipelineStage[] {labelIndexer, featureIndexer, gbt, labelConverter});
// Train model. This also runs the indexers.
// Train model. This also runs the indexers.
PipelineModel model = pipeline.fit(trainingData);
// Make predictions.
@ -88,7 +88,7 @@ public class JavaGradientBoostedTreeClassifierExample {
// Select example rows to display.
predictions.select("predictedLabel", "label", "features").show(5);
// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
.setLabelCol("indexedLabel")
.setPredictionCol("prediction")

View file

@ -34,7 +34,9 @@ import org.apache.spark.sql.SparkSession;
public class JavaGradientBoostedTreeRegressorExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaGradientBoostedTreeRegressorExample").getOrCreate();
.builder()
.appName("JavaGradientBoostedTreeRegressorExample")
.getOrCreate();
// $example on$
// Load and parse the data file, converting it to a DataFrame.
@ -48,7 +50,7 @@ public class JavaGradientBoostedTreeRegressorExample {
.setMaxCategories(4)
.fit(data);
// Split the data into training and test sets (30% held out for testing)
// Split the data into training and test sets (30% held out for testing).
Dataset<Row>[] splits = data.randomSplit(new double[] {0.7, 0.3});
Dataset<Row> trainingData = splits[0];
Dataset<Row> testData = splits[1];
@ -59,10 +61,10 @@ public class JavaGradientBoostedTreeRegressorExample {
.setFeaturesCol("indexedFeatures")
.setMaxIter(10);
// Chain indexer and GBT in a Pipeline
// Chain indexer and GBT in a Pipeline.
Pipeline pipeline = new Pipeline().setStages(new PipelineStage[] {featureIndexer, gbt});
// Train model. This also runs the indexer.
// Train model. This also runs the indexer.
PipelineModel model = pipeline.fit(trainingData);
// Make predictions.
@ -71,7 +73,7 @@ public class JavaGradientBoostedTreeRegressorExample {
// Select example rows to display.
predictions.select("prediction", "label", "features").show(5);
// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
RegressionEvaluator evaluator = new RegressionEvaluator()
.setLabelCol("label")
.setPredictionCol("prediction")

View file

@ -30,10 +30,12 @@ import org.apache.spark.sql.SparkSession;
public class JavaLinearRegressionWithElasticNetExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaLinearRegressionWithElasticNetExample").getOrCreate();
.builder()
.appName("JavaLinearRegressionWithElasticNetExample")
.getOrCreate();
// $example on$
// Load training data
// Load training data.
Dataset<Row> training = spark.read().format("libsvm")
.load("data/mllib/sample_linear_regression_data.txt");
@ -42,14 +44,14 @@ public class JavaLinearRegressionWithElasticNetExample {
.setRegParam(0.3)
.setElasticNetParam(0.8);
// Fit the model
// Fit the model.
LinearRegressionModel lrModel = lr.fit(training);
// Print the coefficients and intercept for linear regression
// Print the coefficients and intercept for linear regression.
System.out.println("Coefficients: "
+ lrModel.coefficients() + " Intercept: " + lrModel.intercept());
// Summarize the model over the training set and print out some metrics
// Summarize the model over the training set and print out some metrics.
LinearRegressionTrainingSummary trainingSummary = lrModel.summary();
System.out.println("numIterations: " + trainingSummary.totalIterations());
System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary.objectiveHistory()));

View file

@ -31,7 +31,9 @@ import org.apache.spark.sql.functions;
public class JavaLogisticRegressionSummaryExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaLogisticRegressionSummaryExample").getOrCreate();
.builder()
.appName("JavaLogisticRegressionSummaryExample")
.getOrCreate();
// Load training data
Dataset<Row> training = spark.read().format("libsvm")

View file

@ -28,7 +28,9 @@ import org.apache.spark.sql.SparkSession;
public class JavaLogisticRegressionWithElasticNetExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaLogisticRegressionWithElasticNetExample").getOrCreate();
.builder()
.appName("JavaLogisticRegressionWithElasticNetExample")
.getOrCreate();
// $example on$
// Load training data

View file

@ -43,7 +43,9 @@ import org.apache.spark.sql.SparkSession;
public class JavaModelSelectionViaCrossValidationExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaModelSelectionViaCrossValidationExample").getOrCreate();
.builder()
.appName("JavaModelSelectionViaCrossValidationExample")
.getOrCreate();
// $example on$
// Prepare training documents, which are labeled.

View file

@ -43,7 +43,9 @@ import org.apache.spark.sql.SparkSession;
public class JavaModelSelectionViaTrainValidationSplitExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaModelSelectionViaTrainValidationSplitExample").getOrCreate();
.builder()
.appName("JavaModelSelectionViaTrainValidationSplitExample")
.getOrCreate();
// $example on$
Dataset<Row> data = spark.read().format("libsvm")

View file

@ -33,7 +33,9 @@ public class JavaMultilayerPerceptronClassifierExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaMultilayerPerceptronClassifierExample").getOrCreate();
.builder()
.appName("JavaMultilayerPerceptronClassifierExample")
.getOrCreate();
// $example on$
// Load training data

View file

@ -35,7 +35,9 @@ import org.apache.spark.sql.types.StructType;
public class JavaQuantileDiscretizerExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaQuantileDiscretizerExample").getOrCreate();
.builder()
.appName("JavaQuantileDiscretizerExample")
.getOrCreate();
// $example on$
List<Row> data = Arrays.asList(

View file

@ -33,7 +33,9 @@ import org.apache.spark.sql.SparkSession;
public class JavaRandomForestClassifierExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaRandomForestClassifierExample").getOrCreate();
.builder()
.appName("JavaRandomForestClassifierExample")
.getOrCreate();
// $example on$
// Load and parse the data file, converting it to a DataFrame.

View file

@ -34,7 +34,9 @@ import org.apache.spark.sql.SparkSession;
public class JavaRandomForestRegressorExample {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaRandomForestRegressorExample").getOrCreate();
.builder()
.appName("JavaRandomForestRegressorExample")
.getOrCreate();
// $example on$
// Load and parse the data file, converting it to a DataFrame.
@ -62,7 +64,7 @@ public class JavaRandomForestRegressorExample {
Pipeline pipeline = new Pipeline()
.setStages(new PipelineStage[] {featureIndexer, rf});
// Train model. This also runs the indexer.
// Train model. This also runs the indexer.
PipelineModel model = pipeline.fit(trainingData);
// Make predictions.

View file

@ -46,7 +46,7 @@ public class JavaSimpleParamsExample {
.getOrCreate();
// Prepare training data.
// We use LabeledPoint, which is a JavaBean. Spark SQL can convert RDDs of JavaBeans
// We use LabeledPoint, which is a JavaBean. Spark SQL can convert RDDs of JavaBeans
// into DataFrames, where it uses the bean metadata to infer the schema.
List<LabeledPoint> localTraining = Lists.newArrayList(
new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
@ -56,7 +56,7 @@ public class JavaSimpleParamsExample {
Dataset<Row> training =
spark.createDataFrame(localTraining, LabeledPoint.class);
// Create a LogisticRegression instance. This instance is an Estimator.
// Create a LogisticRegression instance. This instance is an Estimator.
LogisticRegression lr = new LogisticRegression();
// Print out the parameters, documentation, and any default values.
System.out.println("LogisticRegression parameters:\n" + lr.explainParams() + "\n");
@ -65,7 +65,7 @@ public class JavaSimpleParamsExample {
lr.setMaxIter(10)
.setRegParam(0.01);
// Learn a LogisticRegression model. This uses the parameters stored in lr.
// Learn a LogisticRegression model. This uses the parameters stored in lr.
LogisticRegressionModel model1 = lr.fit(training);
// Since model1 is a Model (i.e., a Transformer produced by an Estimator),
// we can view the parameters it used during fit().
@ -82,7 +82,7 @@ public class JavaSimpleParamsExample {
// One can also combine ParamMaps.
ParamMap paramMap2 = new ParamMap();
paramMap2.put(lr.probabilityCol().w("myProbability")); // Change output column name
paramMap2.put(lr.probabilityCol().w("myProbability")); // Change output column name.
ParamMap paramMapCombined = paramMap.$plus$plus(paramMap2);
// Now learn a new model using the paramMapCombined parameters.

View file

@ -43,7 +43,9 @@ public class JavaSimpleTextClassificationPipeline {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder().appName("JavaSimpleTextClassificationPipeline").getOrCreate();
.builder()
.appName("JavaSimpleTextClassificationPipeline")
.getOrCreate();
// Prepare training documents, which are labeled.
List<LabeledDocument> localTraining = Lists.newArrayList(

View file

@ -47,10 +47,10 @@ object DecisionTreeClassificationExample {
val featureIndexer = new VectorIndexer()
.setInputCol("features")
.setOutputCol("indexedFeatures")
.setMaxCategories(4) // features with > 4 distinct values are treated as continuous
.setMaxCategories(4) // features with > 4 distinct values are treated as continuous.
.fit(data)
// Split the data into training and test sets (30% held out for testing)
// Split the data into training and test sets (30% held out for testing).
val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
// Train a DecisionTree model.
@ -64,11 +64,11 @@ object DecisionTreeClassificationExample {
.setOutputCol("predictedLabel")
.setLabels(labelIndexer.labels)
// Chain indexers and tree in a Pipeline
// Chain indexers and tree in a Pipeline.
val pipeline = new Pipeline()
.setStages(Array(labelIndexer, featureIndexer, dt, labelConverter))
// Train model. This also runs the indexers.
// Train model. This also runs the indexers.
val model = pipeline.fit(trainingData)
// Make predictions.
@ -77,7 +77,7 @@ object DecisionTreeClassificationExample {
// Select example rows to display.
predictions.select("predictedLabel", "label", "features").show(5)
// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
val evaluator = new MulticlassClassificationEvaluator()
.setLabelCol("indexedLabel")
.setPredictionCol("prediction")

View file

@ -23,7 +23,6 @@ import scala.language.reflectiveCalls
import scopt.OptionParser
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.examples.mllib.AbstractParams
import org.apache.spark.ml.{Pipeline, PipelineStage, Transformer}
import org.apache.spark.ml.classification.{DecisionTreeClassificationModel, DecisionTreeClassifier}
@ -40,7 +39,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession}
* {{{
* ./bin/run-example ml.DecisionTreeExample [options]
* }}}
* Note that Decision Trees can take a large amount of memory. If the run-example command above
* Note that Decision Trees can take a large amount of memory. If the run-example command above
* fails, try running via spark-submit and specifying the amount of memory as at least 1g.
* For local mode, run
* {{{
@ -87,7 +86,7 @@ object DecisionTreeExample {
.text(s"min info gain required to create a split, default: ${defaultParams.minInfoGain}")
.action((x, c) => c.copy(minInfoGain = x))
opt[Double]("fracTest")
.text(s"fraction of data to hold out for testing. If given option testInput, " +
.text(s"fraction of data to hold out for testing. If given option testInput, " +
s"this option is ignored. default: ${defaultParams.fracTest}")
.action((x, c) => c.copy(fracTest = x))
opt[Boolean]("cacheNodeIds")
@ -106,7 +105,7 @@ object DecisionTreeExample {
s"default: ${defaultParams.checkpointInterval}")
.action((x, c) => c.copy(checkpointInterval = x))
opt[String]("testInput")
.text(s"input path to test dataset. If given, option fracTest is ignored." +
.text(s"input path to test dataset. If given, option fracTest is ignored." +
s" default: ${defaultParams.testInput}")
.action((x, c) => c.copy(testInput = x))
opt[String]("dataFormat")
@ -157,11 +156,10 @@ object DecisionTreeExample {
* @param dataFormat "libsvm" or "dense"
* @param testInput Path to test dataset.
* @param algo Classification or Regression
* @param fracTest Fraction of input data to hold out for testing. Ignored if testInput given.
* @param fracTest Fraction of input data to hold out for testing. Ignored if testInput given.
* @return (training dataset, test dataset)
*/
private[ml] def loadDatasets(
sc: SparkContext,
input: String,
dataFormat: String,
testInput: String,
@ -200,18 +198,21 @@ object DecisionTreeExample {
}
def run(params: Params) {
val conf = new SparkConf().setAppName(s"DecisionTreeExample with $params")
val sc = new SparkContext(conf)
params.checkpointDir.foreach(sc.setCheckpointDir)
val spark = SparkSession
.builder
.appName(s"DecisionTreeExample with $params")
.getOrCreate()
params.checkpointDir.foreach(spark.sparkContext.setCheckpointDir)
val algo = params.algo.toLowerCase
println(s"DecisionTreeExample with parameters:\n$params")
// Load training and test data and cache it.
val (training: DataFrame, test: DataFrame) =
loadDatasets(sc, params.input, params.dataFormat, params.testInput, algo, params.fracTest)
loadDatasets(params.input, params.dataFormat, params.testInput, algo, params.fracTest)
// Set up Pipeline
// Set up Pipeline.
val stages = new mutable.ArrayBuffer[PipelineStage]()
// (1) For classification, re-index classes.
val labelColName = if (algo == "classification") "indexedLabel" else "label"
@ -228,7 +229,7 @@ object DecisionTreeExample {
.setOutputCol("indexedFeatures")
.setMaxCategories(10)
stages += featuresIndexer
// (3) Learn Decision Tree
// (3) Learn Decision Tree.
val dt = algo match {
case "classification" =>
new DecisionTreeClassifier()
@ -255,13 +256,13 @@ object DecisionTreeExample {
stages += dt
val pipeline = new Pipeline().setStages(stages.toArray)
// Fit the Pipeline
// Fit the Pipeline.
val startTime = System.nanoTime()
val pipelineModel = pipeline.fit(training)
val elapsedTime = (System.nanoTime() - startTime) / 1e9
println(s"Training time: $elapsedTime seconds")
// Get the trained Decision Tree from the fitted PipelineModel
// Get the trained Decision Tree from the fitted PipelineModel.
algo match {
case "classification" =>
val treeModel = pipelineModel.stages.last.asInstanceOf[DecisionTreeClassificationModel]
@ -280,7 +281,7 @@ object DecisionTreeExample {
case _ => throw new IllegalArgumentException("Algo ${params.algo} not supported.")
}
// Evaluate model on training, test data
// Evaluate model on training, test data.
algo match {
case "classification" =>
println("Training data results:")
@ -296,11 +297,11 @@ object DecisionTreeExample {
throw new IllegalArgumentException("Algo ${params.algo} not supported.")
}
sc.stop()
spark.stop()
}
/**
* Evaluate the given ClassificationModel on data. Print the results.
* Evaluate the given ClassificationModel on data. Print the results.
* @param model Must fit ClassificationModel abstraction
* @param data DataFrame with "prediction" and labelColName columns
* @param labelColName Name of the labelCol parameter for the model
@ -314,7 +315,7 @@ object DecisionTreeExample {
val fullPredictions = model.transform(data).cache()
val predictions = fullPredictions.select("prediction").rdd.map(_.getDouble(0))
val labels = fullPredictions.select(labelColName).rdd.map(_.getDouble(0))
// Print number of classes for reference
// Print number of classes for reference.
val numClasses = MetadataUtils.getNumClasses(fullPredictions.schema(labelColName)) match {
case Some(n) => n
case None => throw new RuntimeException(
@ -325,7 +326,7 @@ object DecisionTreeExample {
}
/**
* Evaluate the given RegressionModel on data. Print the results.
* Evaluate the given RegressionModel on data. Print the results.
* @param model Must fit RegressionModel abstraction
* @param data DataFrame with "prediction" and labelColName columns
* @param labelColName Name of the labelCol parameter for the model

View file

@ -46,7 +46,7 @@ object DecisionTreeRegressionExample {
.setMaxCategories(4)
.fit(data)
// Split the data into training and test sets (30% held out for testing)
// Split the data into training and test sets (30% held out for testing).
val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
// Train a DecisionTree model.
@ -54,11 +54,11 @@ object DecisionTreeRegressionExample {
.setLabelCol("label")
.setFeaturesCol("indexedFeatures")
// Chain indexer and tree in a Pipeline
// Chain indexer and tree in a Pipeline.
val pipeline = new Pipeline()
.setStages(Array(featureIndexer, dt))
// Train model. This also runs the indexer.
// Train model. This also runs the indexer.
val model = pipeline.fit(trainingData)
// Make predictions.
@ -67,7 +67,7 @@ object DecisionTreeRegressionExample {
// Select example rows to display.
predictions.select("prediction", "label", "features").show(5)
// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
val evaluator = new RegressionEvaluator()
.setLabelCol("label")
.setPredictionCol("prediction")

View file

@ -50,7 +50,7 @@ object DeveloperApiExample {
LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))))
// Create a LogisticRegression instance. This instance is an Estimator.
// Create a LogisticRegression instance. This instance is an Estimator.
val lr = new MyLogisticRegression()
// Print out the parameters, documentation, and any default values.
println("MyLogisticRegression parameters:\n" + lr.explainParams() + "\n")
@ -58,7 +58,7 @@ object DeveloperApiExample {
// We may set parameters using setter methods.
lr.setMaxIter(10)
// Learn a LogisticRegression model. This uses the parameters stored in lr.
// Learn a LogisticRegression model. This uses the parameters stored in lr.
val model = lr.fit(training.toDF())
// Prepare test data.
@ -84,7 +84,7 @@ object DeveloperApiExample {
/**
* Example of defining a parameter trait for a user-defined type of [[Classifier]].
*
* NOTE: This is private since it is an example. In practice, you may not want it to be private.
* NOTE: This is private since it is an example. In practice, you may not want it to be private.
*/
private trait MyLogisticRegressionParams extends ClassifierParams {
@ -96,7 +96,7 @@ private trait MyLogisticRegressionParams extends ClassifierParams {
* - def getMyParamName
* - def setMyParamName
* Here, we have a trait to be mixed in with the Estimator and Model (MyLogisticRegression
* and MyLogisticRegressionModel). We place the setter (setMaxIter) method in the Estimator
* and MyLogisticRegressionModel). We place the setter (setMaxIter) method in the Estimator
* class since the maxIter parameter is only used during training (not in the Model).
*/
val maxIter: IntParam = new IntParam(this, "maxIter", "max number of iterations")
@ -106,7 +106,7 @@ private trait MyLogisticRegressionParams extends ClassifierParams {
/**
* Example of defining a type of [[Classifier]].
*
* NOTE: This is private since it is an example. In practice, you may not want it to be private.
* NOTE: This is private since it is an example. In practice, you may not want it to be private.
*/
private class MyLogisticRegression(override val uid: String)
extends Classifier[Vector, MyLogisticRegression, MyLogisticRegressionModel]
@ -138,7 +138,7 @@ private class MyLogisticRegression(override val uid: String)
/**
* Example of defining a type of [[ClassificationModel]].
*
* NOTE: This is private since it is an example. In practice, you may not want it to be private.
* NOTE: This is private since it is an example. In practice, you may not want it to be private.
*/
private class MyLogisticRegressionModel(
override val uid: String,
@ -169,7 +169,7 @@ private class MyLogisticRegressionModel(
Vectors.dense(-margin, margin)
}
/** Number of classes the label can take. 2 indicates binary classification. */
/** Number of classes the label can take. 2 indicates binary classification. */
override val numClasses: Int = 2
/** Number of features the model was trained on. */

View file

@ -43,7 +43,7 @@ object EstimatorTransformerParamExample {
(1.0, Vectors.dense(0.0, 1.2, -0.5))
)).toDF("label", "features")
// Create a LogisticRegression instance. This instance is an Estimator.
// Create a LogisticRegression instance. This instance is an Estimator.
val lr = new LogisticRegression()
// Print out the parameters, documentation, and any default values.
println("LogisticRegression parameters:\n" + lr.explainParams() + "\n")
@ -52,7 +52,7 @@ object EstimatorTransformerParamExample {
lr.setMaxIter(10)
.setRegParam(0.01)
// Learn a LogisticRegression model. This uses the parameters stored in lr.
// Learn a LogisticRegression model. This uses the parameters stored in lr.
val model1 = lr.fit(training)
// Since model1 is a Model (i.e., a Transformer produced by an Estimator),
// we can view the parameters it used during fit().
@ -63,11 +63,11 @@ object EstimatorTransformerParamExample {
// We may alternatively specify parameters using a ParamMap,
// which supports several methods for specifying parameters.
val paramMap = ParamMap(lr.maxIter -> 20)
.put(lr.maxIter, 30) // Specify 1 Param. This overwrites the original maxIter.
.put(lr.maxIter, 30) // Specify 1 Param. This overwrites the original maxIter.
.put(lr.regParam -> 0.1, lr.threshold -> 0.55) // Specify multiple Params.
// One can also combine ParamMaps.
val paramMap2 = ParamMap(lr.probabilityCol -> "myProbability") // Change output column name
val paramMap2 = ParamMap(lr.probabilityCol -> "myProbability") // Change output column name.
val paramMapCombined = paramMap ++ paramMap2
// Now learn a new model using the paramMapCombined parameters.

View file

@ -23,13 +23,12 @@ import scala.language.reflectiveCalls
import scopt.OptionParser
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.examples.mllib.AbstractParams
import org.apache.spark.ml.{Pipeline, PipelineStage}
import org.apache.spark.ml.classification.{GBTClassificationModel, GBTClassifier}
import org.apache.spark.ml.feature.{StringIndexer, VectorIndexer}
import org.apache.spark.ml.regression.{GBTRegressionModel, GBTRegressor}
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.{DataFrame, SparkSession}
/**
@ -37,7 +36,7 @@ import org.apache.spark.sql.DataFrame
* {{{
* ./bin/run-example ml.GBTExample [options]
* }}}
* Decision Trees and ensembles can take a large amount of memory. If the run-example command
* Decision Trees and ensembles can take a large amount of memory. If the run-example command
* above fails, try running via spark-submit and specifying the amount of memory as at least 1g.
* For local mode, run
* {{{
@ -88,7 +87,7 @@ object GBTExample {
.text(s"number of trees in ensemble, default: ${defaultParams.maxIter}")
.action((x, c) => c.copy(maxIter = x))
opt[Double]("fracTest")
.text(s"fraction of data to hold out for testing. If given option testInput, " +
.text(s"fraction of data to hold out for testing. If given option testInput, " +
s"this option is ignored. default: ${defaultParams.fracTest}")
.action((x, c) => c.copy(fracTest = x))
opt[Boolean]("cacheNodeIds")
@ -109,7 +108,7 @@ object GBTExample {
s"default: ${defaultParams.checkpointInterval}")
.action((x, c) => c.copy(checkpointInterval = x))
opt[String]("testInput")
.text(s"input path to test dataset. If given, option fracTest is ignored." +
.text(s"input path to test dataset. If given, option fracTest is ignored." +
s" default: ${defaultParams.testInput}")
.action((x, c) => c.copy(testInput = x))
opt[String]("dataFormat")
@ -136,15 +135,18 @@ object GBTExample {
}
def run(params: Params) {
val conf = new SparkConf().setAppName(s"GBTExample with $params")
val sc = new SparkContext(conf)
params.checkpointDir.foreach(sc.setCheckpointDir)
val spark = SparkSession
.builder
.appName(s"GBTExample with $params")
.getOrCreate()
params.checkpointDir.foreach(spark.sparkContext.setCheckpointDir)
val algo = params.algo.toLowerCase
println(s"GBTExample with parameters:\n$params")
// Load training and test data and cache it.
val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(sc, params.input,
val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(params.input,
params.dataFormat, params.testInput, algo, params.fracTest)
// Set up Pipeline
@ -164,7 +166,7 @@ object GBTExample {
.setOutputCol("indexedFeatures")
.setMaxCategories(10)
stages += featuresIndexer
// (3) Learn GBT
// (3) Learn GBT.
val dt = algo match {
case "classification" =>
new GBTClassifier()
@ -193,13 +195,13 @@ object GBTExample {
stages += dt
val pipeline = new Pipeline().setStages(stages.toArray)
// Fit the Pipeline
// Fit the Pipeline.
val startTime = System.nanoTime()
val pipelineModel = pipeline.fit(training)
val elapsedTime = (System.nanoTime() - startTime) / 1e9
println(s"Training time: $elapsedTime seconds")
// Get the trained GBT from the fitted PipelineModel
// Get the trained GBT from the fitted PipelineModel.
algo match {
case "classification" =>
val rfModel = pipelineModel.stages.last.asInstanceOf[GBTClassificationModel]
@ -218,7 +220,7 @@ object GBTExample {
case _ => throw new IllegalArgumentException("Algo ${params.algo} not supported.")
}
// Evaluate model on training, test data
// Evaluate model on training, test data.
algo match {
case "classification" =>
println("Training data results:")
@ -234,7 +236,7 @@ object GBTExample {
throw new IllegalArgumentException("Algo ${params.algo} not supported.")
}
sc.stop()
spark.stop()
}
}
// scalastyle:on println

View file

@ -51,7 +51,7 @@ object GradientBoostedTreeClassifierExample {
.setMaxCategories(4)
.fit(data)
// Split the data into training and test sets (30% held out for testing)
// Split the data into training and test sets (30% held out for testing).
val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
// Train a GBT model.
@ -66,11 +66,11 @@ object GradientBoostedTreeClassifierExample {
.setOutputCol("predictedLabel")
.setLabels(labelIndexer.labels)
// Chain indexers and GBT in a Pipeline
// Chain indexers and GBT in a Pipeline.
val pipeline = new Pipeline()
.setStages(Array(labelIndexer, featureIndexer, gbt, labelConverter))
// Train model. This also runs the indexers.
// Train model. This also runs the indexers.
val model = pipeline.fit(trainingData)
// Make predictions.
@ -79,7 +79,7 @@ object GradientBoostedTreeClassifierExample {
// Select example rows to display.
predictions.select("predictedLabel", "label", "features").show(5)
// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
val evaluator = new MulticlassClassificationEvaluator()
.setLabelCol("indexedLabel")
.setPredictionCol("prediction")

View file

@ -45,7 +45,7 @@ object GradientBoostedTreeRegressorExample {
.setMaxCategories(4)
.fit(data)
// Split the data into training and test sets (30% held out for testing)
// Split the data into training and test sets (30% held out for testing).
val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
// Train a GBT model.
@ -54,11 +54,11 @@ object GradientBoostedTreeRegressorExample {
.setFeaturesCol("indexedFeatures")
.setMaxIter(10)
// Chain indexer and GBT in a Pipeline
// Chain indexer and GBT in a Pipeline.
val pipeline = new Pipeline()
.setStages(Array(featureIndexer, gbt))
// Train model. This also runs the indexer.
// Train model. This also runs the indexer.
val model = pipeline.fit(trainingData)
// Make predictions.
@ -67,7 +67,7 @@ object GradientBoostedTreeRegressorExample {
// Select example rows to display.
predictions.select("prediction", "label", "features").show(5)
// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
val evaluator = new RegressionEvaluator()
.setLabelCol("label")
.setPredictionCol("prediction")

View file

@ -22,10 +22,9 @@ import scala.language.reflectiveCalls
import scopt.OptionParser
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.examples.mllib.AbstractParams
import org.apache.spark.ml.regression.LinearRegression
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.{DataFrame, SparkSession}
/**
* An example runner for linear regression with elastic-net (mixing L1/L2) regularization.
@ -74,11 +73,11 @@ object LinearRegressionExample {
s"to higher accuracy with the cost of more iterations, default: ${defaultParams.tol}")
.action((x, c) => c.copy(tol = x))
opt[Double]("fracTest")
.text(s"fraction of data to hold out for testing. If given option testInput, " +
.text(s"fraction of data to hold out for testing. If given option testInput, " +
s"this option is ignored. default: ${defaultParams.fracTest}")
.action((x, c) => c.copy(fracTest = x))
opt[String]("testInput")
.text(s"input path to test dataset. If given, option fracTest is ignored." +
.text(s"input path to test dataset. If given, option fracTest is ignored." +
s" default: ${defaultParams.testInput}")
.action((x, c) => c.copy(testInput = x))
opt[String]("dataFormat")
@ -105,13 +104,15 @@ object LinearRegressionExample {
}
def run(params: Params) {
val conf = new SparkConf().setAppName(s"LinearRegressionExample with $params")
val sc = new SparkContext(conf)
val spark = SparkSession
.builder
.appName(s"LinearRegressionExample with $params")
.getOrCreate()
println(s"LinearRegressionExample with parameters:\n$params")
// Load training and test data and cache it.
val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(sc, params.input,
val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(params.input,
params.dataFormat, params.testInput, "regression", params.fracTest)
val lir = new LinearRegression()
@ -136,7 +137,7 @@ object LinearRegressionExample {
println("Test data results:")
DecisionTreeExample.evaluateRegressionModel(lirModel, test, "label")
sc.stop()
spark.stop()
}
}
// scalastyle:on println

View file

@ -23,12 +23,11 @@ import scala.language.reflectiveCalls
import scopt.OptionParser
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.examples.mllib.AbstractParams
import org.apache.spark.ml.{Pipeline, PipelineStage}
import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
import org.apache.spark.ml.feature.StringIndexer
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.{DataFrame, SparkSession}
/**
* An example runner for logistic regression with elastic-net (mixing L1/L2) regularization.
@ -81,11 +80,11 @@ object LogisticRegressionExample {
s"to higher accuracy with the cost of more iterations, default: ${defaultParams.tol}")
.action((x, c) => c.copy(tol = x))
opt[Double]("fracTest")
.text(s"fraction of data to hold out for testing. If given option testInput, " +
.text(s"fraction of data to hold out for testing. If given option testInput, " +
s"this option is ignored. default: ${defaultParams.fracTest}")
.action((x, c) => c.copy(fracTest = x))
opt[String]("testInput")
.text(s"input path to test dataset. If given, option fracTest is ignored." +
.text(s"input path to test dataset. If given, option fracTest is ignored." +
s" default: ${defaultParams.testInput}")
.action((x, c) => c.copy(testInput = x))
opt[String]("dataFormat")
@ -112,16 +111,18 @@ object LogisticRegressionExample {
}
def run(params: Params) {
val conf = new SparkConf().setAppName(s"LogisticRegressionExample with $params")
val sc = new SparkContext(conf)
val spark = SparkSession
.builder
.appName(s"LogisticRegressionExample with $params")
.getOrCreate()
println(s"LogisticRegressionExample with parameters:\n$params")
// Load training and test data and cache it.
val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(sc, params.input,
val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(params.input,
params.dataFormat, params.testInput, "classification", params.fracTest)
// Set up Pipeline
// Set up Pipeline.
val stages = new mutable.ArrayBuffer[PipelineStage]()
val labelIndexer = new StringIndexer()
@ -141,7 +142,7 @@ object LogisticRegressionExample {
stages += lor
val pipeline = new Pipeline().setStages(stages.toArray)
// Fit the Pipeline
// Fit the Pipeline.
val startTime = System.nanoTime()
val pipelineModel = pipeline.fit(training)
val elapsedTime = (System.nanoTime() - startTime) / 1e9
@ -156,7 +157,7 @@ object LogisticRegressionExample {
println("Test data results:")
DecisionTreeExample.evaluateClassificationModel(pipelineModel, test, "indexedLabel")
sc.stop()
spark.stop()
}
}
// scalastyle:on println

View file

@ -27,7 +27,9 @@ object LogisticRegressionWithElasticNetExample {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder.appName("LogisticRegressionWithElasticNetExample").getOrCreate()
.builder
.appName("LogisticRegressionWithElasticNetExample")
.getOrCreate()
// $example on$
// Load training data

View file

@ -42,7 +42,9 @@ object ModelSelectionViaCrossValidationExample {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder.appName("ModelSelectionViaCrossValidationExample").getOrCreate()
.builder
.appName("ModelSelectionViaCrossValidationExample")
.getOrCreate()
// $example on$
// Prepare training data from a list of (id, text, label) tuples.

View file

@ -36,7 +36,9 @@ object ModelSelectionViaTrainValidationSplitExample {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder.appName("ModelSelectionViaTrainValidationSplitExample").getOrCreate()
.builder
.appName("ModelSelectionViaTrainValidationSplitExample")
.getOrCreate()
// $example on$
// Prepare training and test data.

View file

@ -51,7 +51,7 @@ object RandomForestClassifierExample {
.setMaxCategories(4)
.fit(data)
// Split the data into training and test sets (30% held out for testing)
// Split the data into training and test sets (30% held out for testing).
val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
// Train a RandomForest model.
@ -66,11 +66,11 @@ object RandomForestClassifierExample {
.setOutputCol("predictedLabel")
.setLabels(labelIndexer.labels)
// Chain indexers and forest in a Pipeline
// Chain indexers and forest in a Pipeline.
val pipeline = new Pipeline()
.setStages(Array(labelIndexer, featureIndexer, rf, labelConverter))
// Train model. This also runs the indexers.
// Train model. This also runs the indexers.
val model = pipeline.fit(trainingData)
// Make predictions.
@ -79,7 +79,7 @@ object RandomForestClassifierExample {
// Select example rows to display.
predictions.select("predictedLabel", "label", "features").show(5)
// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
val evaluator = new MulticlassClassificationEvaluator()
.setLabelCol("indexedLabel")
.setPredictionCol("prediction")

View file

@ -23,13 +23,12 @@ import scala.language.reflectiveCalls
import scopt.OptionParser
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.examples.mllib.AbstractParams
import org.apache.spark.ml.{Pipeline, PipelineStage}
import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier}
import org.apache.spark.ml.feature.{StringIndexer, VectorIndexer}
import org.apache.spark.ml.regression.{RandomForestRegressionModel, RandomForestRegressor}
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.{DataFrame, SparkSession}
/**
@ -37,7 +36,7 @@ import org.apache.spark.sql.DataFrame
* {{{
* ./bin/run-example ml.RandomForestExample [options]
* }}}
* Decision Trees and ensembles can take a large amount of memory. If the run-example command
* Decision Trees and ensembles can take a large amount of memory. If the run-example command
* above fails, try running via spark-submit and specifying the amount of memory as at least 1g.
* For local mode, run
* {{{
@ -94,7 +93,7 @@ object RandomForestExample {
s" default: ${defaultParams.numTrees}")
.action((x, c) => c.copy(featureSubsetStrategy = x))
opt[Double]("fracTest")
.text(s"fraction of data to hold out for testing. If given option testInput, " +
.text(s"fraction of data to hold out for testing. If given option testInput, " +
s"this option is ignored. default: ${defaultParams.fracTest}")
.action((x, c) => c.copy(fracTest = x))
opt[Boolean]("cacheNodeIds")
@ -115,7 +114,7 @@ object RandomForestExample {
s"default: ${defaultParams.checkpointInterval}")
.action((x, c) => c.copy(checkpointInterval = x))
opt[String]("testInput")
.text(s"input path to test dataset. If given, option fracTest is ignored." +
.text(s"input path to test dataset. If given, option fracTest is ignored." +
s" default: ${defaultParams.testInput}")
.action((x, c) => c.copy(testInput = x))
opt[String]("dataFormat")
@ -142,18 +141,21 @@ object RandomForestExample {
}
def run(params: Params) {
val conf = new SparkConf().setAppName(s"RandomForestExample with $params")
val sc = new SparkContext(conf)
params.checkpointDir.foreach(sc.setCheckpointDir)
val spark = SparkSession
.builder
.appName(s"RandomForestExample with $params")
.getOrCreate()
params.checkpointDir.foreach(spark.sparkContext.setCheckpointDir)
val algo = params.algo.toLowerCase
println(s"RandomForestExample with parameters:\n$params")
// Load training and test data and cache it.
val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(sc, params.input,
val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(params.input,
params.dataFormat, params.testInput, algo, params.fracTest)
// Set up Pipeline
// Set up Pipeline.
val stages = new mutable.ArrayBuffer[PipelineStage]()
// (1) For classification, re-index classes.
val labelColName = if (algo == "classification") "indexedLabel" else "label"
@ -170,7 +172,7 @@ object RandomForestExample {
.setOutputCol("indexedFeatures")
.setMaxCategories(10)
stages += featuresIndexer
// (3) Learn Random Forest
// (3) Learn Random Forest.
val dt = algo match {
case "classification" =>
new RandomForestClassifier()
@ -201,13 +203,13 @@ object RandomForestExample {
stages += dt
val pipeline = new Pipeline().setStages(stages.toArray)
// Fit the Pipeline
// Fit the Pipeline.
val startTime = System.nanoTime()
val pipelineModel = pipeline.fit(training)
val elapsedTime = (System.nanoTime() - startTime) / 1e9
println(s"Training time: $elapsedTime seconds")
// Get the trained Random Forest from the fitted PipelineModel
// Get the trained Random Forest from the fitted PipelineModel.
algo match {
case "classification" =>
val rfModel = pipelineModel.stages.last.asInstanceOf[RandomForestClassificationModel]
@ -226,7 +228,7 @@ object RandomForestExample {
case _ => throw new IllegalArgumentException("Algo ${params.algo} not supported.")
}
// Evaluate model on training, test data
// Evaluate model on training, test data.
algo match {
case "classification" =>
println("Training data results:")
@ -242,7 +244,7 @@ object RandomForestExample {
throw new IllegalArgumentException("Algo ${params.algo} not supported.")
}
sc.stop()
spark.stop()
}
}
// scalastyle:on println

View file

@ -45,7 +45,7 @@ object RandomForestRegressorExample {
.setMaxCategories(4)
.fit(data)
// Split the data into training and test sets (30% held out for testing)
// Split the data into training and test sets (30% held out for testing).
val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
// Train a RandomForest model.
@ -53,11 +53,11 @@ object RandomForestRegressorExample {
.setLabelCol("label")
.setFeaturesCol("indexedFeatures")
// Chain indexer and forest in a Pipeline
// Chain indexer and forest in a Pipeline.
val pipeline = new Pipeline()
.setStages(Array(featureIndexer, rf))
// Train model. This also runs the indexer.
// Train model. This also runs the indexer.
val model = pipeline.fit(trainingData)
// Make predictions.
@ -66,7 +66,7 @@ object RandomForestRegressorExample {
// Select example rows to display.
predictions.select("prediction", "label", "features").show(5)
// Select (prediction, true label) and compute test error
// Select (prediction, true label) and compute test error.
val evaluator = new RegressionEvaluator()
.setLabelCol("label")
.setPredictionCol("prediction")

View file

@ -41,7 +41,7 @@ object SimpleParamsExample {
import spark.implicits._
// Prepare training data.
// We use LabeledPoint, which is a case class. Spark SQL can convert RDDs of case classes
// We use LabeledPoint, which is a case class. Spark SQL can convert RDDs of case classes
// into DataFrames, where it uses the case class metadata to infer the schema.
val training = spark.createDataFrame(Seq(
LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
@ -49,7 +49,7 @@ object SimpleParamsExample {
LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))))
// Create a LogisticRegression instance. This instance is an Estimator.
// Create a LogisticRegression instance. This instance is an Estimator.
val lr = new LogisticRegression()
// Print out the parameters, documentation, and any default values.
println("LogisticRegression parameters:\n" + lr.explainParams() + "\n")
@ -58,7 +58,7 @@ object SimpleParamsExample {
lr.setMaxIter(10)
.setRegParam(0.01)
// Learn a LogisticRegression model. This uses the parameters stored in lr.
// Learn a LogisticRegression model. This uses the parameters stored in lr.
val model1 = lr.fit(training)
// Since model1 is a Model (i.e., a Transformer produced by an Estimator),
// we can view the parameters it used during fit().
@ -69,7 +69,7 @@ object SimpleParamsExample {
// We may alternatively specify parameters using a ParamMap,
// which supports several methods for specifying parameters.
val paramMap = ParamMap(lr.maxIter -> 20)
paramMap.put(lr.maxIter, 30) // Specify 1 Param. This overwrites the original maxIter.
paramMap.put(lr.maxIter, 30) // Specify 1 Param. This overwrites the original maxIter.
paramMap.put(lr.regParam -> 0.1, lr.thresholds -> Array(0.45, 0.55)) // Specify multiple Params.
// One can also combine ParamMaps.