[SPARK-15031][SPARK-15134][EXAMPLE][DOC] Use SparkSession and update indent in examples

## What changes were proposed in this pull request? 1, Use `SparkSession` according to [SPARK-15031](https://issues.apache.org/jira/browse/SPARK-15031) 2, Update indent for `SparkContext` according to [SPARK-15134](https://issues.apache.org/jira/browse/SPARK-15134) 3, BTW, remove some duplicate space and add missing '.' ## How was this patch tested? manual tests Author: Zheng RuiFeng <ruifengz@foxmail.com> Closes #13050 from zhengruifeng/use_sparksession.
2016-05-11 22:45:30 -07:00 · 2016-05-11 22:45:30 -07:00 · 9e266d07a4
parent ba5487c061
commit 9e266d07a4
34 changed files with 192 additions and 151 deletions
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java
@ -32,7 +32,9 @@ import org.apache.spark.sql.SparkSession;
 public class JavaDecisionTreeClassificationExample {
  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaDecisionTreeClassificationExample").getOrCreate();
+      .builder()
+      .appName("JavaDecisionTreeClassificationExample")
+      .getOrCreate();

    // $example on$
    // Load the data stored in LIBSVM format as a DataFrame.
@ -52,10 +54,10 @@ public class JavaDecisionTreeClassificationExample {
    VectorIndexerModel featureIndexer = new VectorIndexer()
      .setInputCol("features")
      .setOutputCol("indexedFeatures")
-      .setMaxCategories(4) // features with > 4 distinct values are treated as continuous
+      .setMaxCategories(4) // features with > 4 distinct values are treated as continuous.
      .fit(data);

-    // Split the data into training and test sets (30% held out for testing)
+    // Split the data into training and test sets (30% held out for testing).
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];
@ -71,11 +73,11 @@ public class JavaDecisionTreeClassificationExample {
      .setOutputCol("predictedLabel")
      .setLabels(labelIndexer.labels());

-    // Chain indexers and tree in a Pipeline
+    // Chain indexers and tree in a Pipeline.
    Pipeline pipeline = new Pipeline()
      .setStages(new PipelineStage[]{labelIndexer, featureIndexer, dt, labelConverter});

-    // Train model.  This also runs the indexers.
+    // Train model. This also runs the indexers.
    PipelineModel model = pipeline.fit(trainingData);

    // Make predictions.
@ -84,7 +86,7 @@ public class JavaDecisionTreeClassificationExample {
    // Select example rows to display.
    predictions.select("predictedLabel", "label", "features").show(5);

-    // Select (prediction, true label) and compute test error
+    // Select (prediction, true label) and compute test error.
    MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
      .setLabelCol("indexedLabel")
      .setPredictionCol("prediction")
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeRegressionExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeRegressionExample.java
@ -33,7 +33,9 @@ import org.apache.spark.sql.SparkSession;
 public class JavaDecisionTreeRegressionExample {
  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaDecisionTreeRegressionExample").getOrCreate();
+      .builder()
+      .appName("JavaDecisionTreeRegressionExample")
+      .getOrCreate();
    // $example on$
    // Load the data stored in LIBSVM format as a DataFrame.
    Dataset<Row> data = spark.read().format("libsvm")
@ -47,7 +49,7 @@ public class JavaDecisionTreeRegressionExample {
      .setMaxCategories(4)
      .fit(data);

-    // Split the data into training and test sets (30% held out for testing)
+    // Split the data into training and test sets (30% held out for testing).
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];
@ -56,11 +58,11 @@ public class JavaDecisionTreeRegressionExample {
    DecisionTreeRegressor dt = new DecisionTreeRegressor()
      .setFeaturesCol("indexedFeatures");

-    // Chain indexer and tree in a Pipeline
+    // Chain indexer and tree in a Pipeline.
    Pipeline pipeline = new Pipeline()
      .setStages(new PipelineStage[]{featureIndexer, dt});

-    // Train model.  This also runs the indexer.
+    // Train model. This also runs the indexer.
    PipelineModel model = pipeline.fit(trainingData);

    // Make predictions.
@ -69,7 +71,7 @@ public class JavaDecisionTreeRegressionExample {
    // Select example rows to display.
    predictions.select("label", "features").show(5);

-    // Select (prediction, true label) and compute test error
+    // Select (prediction, true label) and compute test error.
    RegressionEvaluator evaluator = new RegressionEvaluator()
      .setLabelCol("label")
      .setPredictionCol("prediction")
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
@ -62,7 +62,7 @@ public class JavaDeveloperApiExample {
        new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5)));
    Dataset<Row> training = spark.createDataFrame(localTraining, LabeledPoint.class);

-    // Create a LogisticRegression instance.  This instance is an Estimator.
+    // Create a LogisticRegression instance. This instance is an Estimator.
    MyJavaLogisticRegression lr = new MyJavaLogisticRegression();
    // Print out the parameters, documentation, and any default values.
    System.out.println("MyJavaLogisticRegression parameters:\n" + lr.explainParams() + "\n");
@ -70,7 +70,7 @@ public class JavaDeveloperApiExample {
    // We may set parameters using setter methods.
    lr.setMaxIter(10);

-    // Learn a LogisticRegression model.  This uses the parameters stored in lr.
+    // Learn a LogisticRegression model. This uses the parameters stored in lr.
    MyJavaLogisticRegressionModel model = lr.fit(training);

    // Prepare test data.
@ -214,7 +214,7 @@ class MyJavaLogisticRegressionModel
  }

  /**
-   * Number of classes the label can take.  2 indicates binary classification.
+   * Number of classes the label can take. 2 indicates binary classification.
   */
  public int numClasses() { return 2; }

--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java
@ -38,7 +38,9 @@ import org.apache.spark.sql.SparkSession;
 public class JavaEstimatorTransformerParamExample {
  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaEstimatorTransformerParamExample").getOrCreate();
+      .builder()
+      .appName("JavaEstimatorTransformerParamExample")
+      .getOrCreate();

    // $example on$
    // Prepare training data.
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java
@ -75,11 +75,11 @@ public class JavaGradientBoostedTreeClassifierExample {
      .setOutputCol("predictedLabel")
      .setLabels(labelIndexer.labels());

-    // Chain indexers and GBT in a Pipeline
+    // Chain indexers and GBT in a Pipeline.
    Pipeline pipeline = new Pipeline()
      .setStages(new PipelineStage[] {labelIndexer, featureIndexer, gbt, labelConverter});

-    // Train model.  This also runs the indexers.
+    // Train model. This also runs the indexers.
    PipelineModel model = pipeline.fit(trainingData);

    // Make predictions.
@ -88,7 +88,7 @@ public class JavaGradientBoostedTreeClassifierExample {
    // Select example rows to display.
    predictions.select("predictedLabel", "label", "features").show(5);

-    // Select (prediction, true label) and compute test error
+    // Select (prediction, true label) and compute test error.
    MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
      .setLabelCol("indexedLabel")
      .setPredictionCol("prediction")
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeRegressorExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeRegressorExample.java
@ -34,7 +34,9 @@ import org.apache.spark.sql.SparkSession;
 public class JavaGradientBoostedTreeRegressorExample {
  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaGradientBoostedTreeRegressorExample").getOrCreate();
+      .builder()
+      .appName("JavaGradientBoostedTreeRegressorExample")
+      .getOrCreate();

    // $example on$
    // Load and parse the data file, converting it to a DataFrame.
@ -48,7 +50,7 @@ public class JavaGradientBoostedTreeRegressorExample {
      .setMaxCategories(4)
      .fit(data);

-    // Split the data into training and test sets (30% held out for testing)
+    // Split the data into training and test sets (30% held out for testing).
    Dataset<Row>[] splits = data.randomSplit(new double[] {0.7, 0.3});
    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];
@ -59,10 +61,10 @@ public class JavaGradientBoostedTreeRegressorExample {
      .setFeaturesCol("indexedFeatures")
      .setMaxIter(10);

-    // Chain indexer and GBT in a Pipeline
+    // Chain indexer and GBT in a Pipeline.
    Pipeline pipeline = new Pipeline().setStages(new PipelineStage[] {featureIndexer, gbt});

-    // Train model.  This also runs the indexer.
+    // Train model. This also runs the indexer.
    PipelineModel model = pipeline.fit(trainingData);

    // Make predictions.
@ -71,7 +73,7 @@ public class JavaGradientBoostedTreeRegressorExample {
    // Select example rows to display.
    predictions.select("prediction", "label", "features").show(5);

-    // Select (prediction, true label) and compute test error
+    // Select (prediction, true label) and compute test error.
    RegressionEvaluator evaluator = new RegressionEvaluator()
      .setLabelCol("label")
      .setPredictionCol("prediction")
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
@ -30,10 +30,12 @@ import org.apache.spark.sql.SparkSession;
 public class JavaLinearRegressionWithElasticNetExample {
  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaLinearRegressionWithElasticNetExample").getOrCreate();
+      .builder()
+      .appName("JavaLinearRegressionWithElasticNetExample")
+      .getOrCreate();

    // $example on$
-    // Load training data
+    // Load training data.
    Dataset<Row> training = spark.read().format("libsvm")
      .load("data/mllib/sample_linear_regression_data.txt");

@ -42,14 +44,14 @@ public class JavaLinearRegressionWithElasticNetExample {
      .setRegParam(0.3)
      .setElasticNetParam(0.8);

-    // Fit the model
+    // Fit the model.
    LinearRegressionModel lrModel = lr.fit(training);

-    // Print the coefficients and intercept for linear regression
+    // Print the coefficients and intercept for linear regression.
    System.out.println("Coefficients: "
      + lrModel.coefficients() + " Intercept: " + lrModel.intercept());

-    // Summarize the model over the training set and print out some metrics
+    // Summarize the model over the training set and print out some metrics.
    LinearRegressionTrainingSummary trainingSummary = lrModel.summary();
    System.out.println("numIterations: " + trainingSummary.totalIterations());
    System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary.objectiveHistory()));
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java
@ -31,7 +31,9 @@ import org.apache.spark.sql.functions;
 public class JavaLogisticRegressionSummaryExample {
  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaLogisticRegressionSummaryExample").getOrCreate();
+      .builder()
+      .appName("JavaLogisticRegressionSummaryExample")
+      .getOrCreate();

    // Load training data
    Dataset<Row> training = spark.read().format("libsvm")
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java
@ -28,7 +28,9 @@ import org.apache.spark.sql.SparkSession;
 public class JavaLogisticRegressionWithElasticNetExample {
  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaLogisticRegressionWithElasticNetExample").getOrCreate();
+      .builder()
+      .appName("JavaLogisticRegressionWithElasticNetExample")
+      .getOrCreate();

    // $example on$
    // Load training data
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java
@ -43,7 +43,9 @@ import org.apache.spark.sql.SparkSession;
 public class JavaModelSelectionViaCrossValidationExample {
  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaModelSelectionViaCrossValidationExample").getOrCreate();
+      .builder()
+      .appName("JavaModelSelectionViaCrossValidationExample")
+      .getOrCreate();

    // $example on$
    // Prepare training documents, which are labeled.
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaTrainValidationSplitExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaTrainValidationSplitExample.java
@ -43,7 +43,9 @@ import org.apache.spark.sql.SparkSession;
 public class JavaModelSelectionViaTrainValidationSplitExample {
  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaModelSelectionViaTrainValidationSplitExample").getOrCreate();
+      .builder()
+      .appName("JavaModelSelectionViaTrainValidationSplitExample")
+      .getOrCreate();

    // $example on$
    Dataset<Row> data = spark.read().format("libsvm")
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java
@ -33,7 +33,9 @@ public class JavaMultilayerPerceptronClassifierExample {

  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaMultilayerPerceptronClassifierExample").getOrCreate();
+      .builder()
+      .appName("JavaMultilayerPerceptronClassifierExample")
+      .getOrCreate();

    // $example on$
    // Load training data
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaQuantileDiscretizerExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaQuantileDiscretizerExample.java
@ -35,7 +35,9 @@ import org.apache.spark.sql.types.StructType;
 public class JavaQuantileDiscretizerExample {
  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaQuantileDiscretizerExample").getOrCreate();
+      .builder()
+      .appName("JavaQuantileDiscretizerExample")
+      .getOrCreate();

    // $example on$
    List<Row> data = Arrays.asList(
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestClassifierExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestClassifierExample.java
@ -33,7 +33,9 @@ import org.apache.spark.sql.SparkSession;
 public class JavaRandomForestClassifierExample {
  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaRandomForestClassifierExample").getOrCreate();
+      .builder()
+      .appName("JavaRandomForestClassifierExample")
+      .getOrCreate();

    // $example on$
    // Load and parse the data file, converting it to a DataFrame.
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestRegressorExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestRegressorExample.java
@ -34,7 +34,9 @@ import org.apache.spark.sql.SparkSession;
 public class JavaRandomForestRegressorExample {
  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaRandomForestRegressorExample").getOrCreate();
+      .builder()
+      .appName("JavaRandomForestRegressorExample")
+      .getOrCreate();

    // $example on$
    // Load and parse the data file, converting it to a DataFrame.
@ -62,7 +64,7 @@ public class JavaRandomForestRegressorExample {
    Pipeline pipeline = new Pipeline()
      .setStages(new PipelineStage[] {featureIndexer, rf});

-    // Train model.  This also runs the indexer.
+    // Train model. This also runs the indexer.
    PipelineModel model = pipeline.fit(trainingData);

    // Make predictions.
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java
@ -46,7 +46,7 @@ public class JavaSimpleParamsExample {
      .getOrCreate();

    // Prepare training data.
-    // We use LabeledPoint, which is a JavaBean.  Spark SQL can convert RDDs of JavaBeans
+    // We use LabeledPoint, which is a JavaBean. Spark SQL can convert RDDs of JavaBeans
    // into DataFrames, where it uses the bean metadata to infer the schema.
    List<LabeledPoint> localTraining = Lists.newArrayList(
      new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
@ -56,7 +56,7 @@ public class JavaSimpleParamsExample {
    Dataset<Row> training =
      spark.createDataFrame(localTraining, LabeledPoint.class);

-    // Create a LogisticRegression instance.  This instance is an Estimator.
+    // Create a LogisticRegression instance. This instance is an Estimator.
    LogisticRegression lr = new LogisticRegression();
    // Print out the parameters, documentation, and any default values.
    System.out.println("LogisticRegression parameters:\n" + lr.explainParams() + "\n");
@ -65,7 +65,7 @@ public class JavaSimpleParamsExample {
    lr.setMaxIter(10)
      .setRegParam(0.01);

-    // Learn a LogisticRegression model.  This uses the parameters stored in lr.
+    // Learn a LogisticRegression model. This uses the parameters stored in lr.
    LogisticRegressionModel model1 = lr.fit(training);
    // Since model1 is a Model (i.e., a Transformer produced by an Estimator),
    // we can view the parameters it used during fit().
@ -82,7 +82,7 @@ public class JavaSimpleParamsExample {

    // One can also combine ParamMaps.
    ParamMap paramMap2 = new ParamMap();
-    paramMap2.put(lr.probabilityCol().w("myProbability")); // Change output column name
+    paramMap2.put(lr.probabilityCol().w("myProbability")); // Change output column name.
    ParamMap paramMapCombined = paramMap.$plus$plus(paramMap2);

    // Now learn a new model using the paramMapCombined parameters.
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java
@ -43,7 +43,9 @@ public class JavaSimpleTextClassificationPipeline {

  public static void main(String[] args) {
    SparkSession spark = SparkSession
-      .builder().appName("JavaSimpleTextClassificationPipeline").getOrCreate();
+      .builder()
+      .appName("JavaSimpleTextClassificationPipeline")
+      .getOrCreate();

    // Prepare training documents, which are labeled.
    List<LabeledDocument> localTraining = Lists.newArrayList(
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala
@ -47,10 +47,10 @@ object DecisionTreeClassificationExample {
    val featureIndexer = new VectorIndexer()
      .setInputCol("features")
      .setOutputCol("indexedFeatures")
-      .setMaxCategories(4) // features with > 4 distinct values are treated as continuous
+      .setMaxCategories(4) // features with > 4 distinct values are treated as continuous.
      .fit(data)

-    // Split the data into training and test sets (30% held out for testing)
+    // Split the data into training and test sets (30% held out for testing).
    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))

    // Train a DecisionTree model.
@ -64,11 +64,11 @@ object DecisionTreeClassificationExample {
      .setOutputCol("predictedLabel")
      .setLabels(labelIndexer.labels)

-    // Chain indexers and tree in a Pipeline
+    // Chain indexers and tree in a Pipeline.
    val pipeline = new Pipeline()
      .setStages(Array(labelIndexer, featureIndexer, dt, labelConverter))

-    // Train model.  This also runs the indexers.
+    // Train model. This also runs the indexers.
    val model = pipeline.fit(trainingData)

    // Make predictions.
@ -77,7 +77,7 @@ object DecisionTreeClassificationExample {
    // Select example rows to display.
    predictions.select("predictedLabel", "label", "features").show(5)

-    // Select (prediction, true label) and compute test error
+    // Select (prediction, true label) and compute test error.
    val evaluator = new MulticlassClassificationEvaluator()
      .setLabelCol("indexedLabel")
      .setPredictionCol("prediction")
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
@ -23,7 +23,6 @@ import scala.language.reflectiveCalls

 import scopt.OptionParser

-import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.examples.mllib.AbstractParams
 import org.apache.spark.ml.{Pipeline, PipelineStage, Transformer}
 import org.apache.spark.ml.classification.{DecisionTreeClassificationModel, DecisionTreeClassifier}
@ -40,7 +39,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession}
 * {{{
 * ./bin/run-example ml.DecisionTreeExample [options]
 * }}}
- * Note that Decision Trees can take a large amount of memory.  If the run-example command above
+ * Note that Decision Trees can take a large amount of memory. If the run-example command above
 * fails, try running via spark-submit and specifying the amount of memory as at least 1g.
 * For local mode, run
 * {{{
@ -87,7 +86,7 @@ object DecisionTreeExample {
        .text(s"min info gain required to create a split, default: ${defaultParams.minInfoGain}")
        .action((x, c) => c.copy(minInfoGain = x))
      opt[Double]("fracTest")
-        .text(s"fraction of data to hold out for testing.  If given option testInput, " +
+        .text(s"fraction of data to hold out for testing. If given option testInput, " +
          s"this option is ignored. default: ${defaultParams.fracTest}")
        .action((x, c) => c.copy(fracTest = x))
      opt[Boolean]("cacheNodeIds")
@ -106,7 +105,7 @@ object DecisionTreeExample {
         s"default: ${defaultParams.checkpointInterval}")
        .action((x, c) => c.copy(checkpointInterval = x))
      opt[String]("testInput")
-        .text(s"input path to test dataset.  If given, option fracTest is ignored." +
+        .text(s"input path to test dataset. If given, option fracTest is ignored." +
          s" default: ${defaultParams.testInput}")
        .action((x, c) => c.copy(testInput = x))
      opt[String]("dataFormat")
@ -157,11 +156,10 @@ object DecisionTreeExample {
   * @param dataFormat  "libsvm" or "dense"
   * @param testInput  Path to test dataset.
   * @param algo  Classification or Regression
-   * @param fracTest  Fraction of input data to hold out for testing.  Ignored if testInput given.
+   * @param fracTest  Fraction of input data to hold out for testing. Ignored if testInput given.
   * @return  (training dataset, test dataset)
   */
  private[ml] def loadDatasets(
-      sc: SparkContext,
      input: String,
      dataFormat: String,
      testInput: String,
@ -200,18 +198,21 @@ object DecisionTreeExample {
  }

  def run(params: Params) {
-    val conf = new SparkConf().setAppName(s"DecisionTreeExample with $params")
-    val sc = new SparkContext(conf)
-    params.checkpointDir.foreach(sc.setCheckpointDir)
+    val spark = SparkSession
+      .builder
+      .appName(s"DecisionTreeExample with $params")
+      .getOrCreate()
+
+    params.checkpointDir.foreach(spark.sparkContext.setCheckpointDir)
    val algo = params.algo.toLowerCase

    println(s"DecisionTreeExample with parameters:\n$params")

    // Load training and test data and cache it.
    val (training: DataFrame, test: DataFrame) =
-      loadDatasets(sc, params.input, params.dataFormat, params.testInput, algo, params.fracTest)
+      loadDatasets(params.input, params.dataFormat, params.testInput, algo, params.fracTest)

-    // Set up Pipeline
+    // Set up Pipeline.
    val stages = new mutable.ArrayBuffer[PipelineStage]()
    // (1) For classification, re-index classes.
    val labelColName = if (algo == "classification") "indexedLabel" else "label"
@ -228,7 +229,7 @@ object DecisionTreeExample {
      .setOutputCol("indexedFeatures")
      .setMaxCategories(10)
    stages += featuresIndexer
-    // (3) Learn Decision Tree
+    // (3) Learn Decision Tree.
    val dt = algo match {
      case "classification" =>
        new DecisionTreeClassifier()
@ -255,13 +256,13 @@ object DecisionTreeExample {
    stages += dt
    val pipeline = new Pipeline().setStages(stages.toArray)

-    // Fit the Pipeline
+    // Fit the Pipeline.
    val startTime = System.nanoTime()
    val pipelineModel = pipeline.fit(training)
    val elapsedTime = (System.nanoTime() - startTime) / 1e9
    println(s"Training time: $elapsedTime seconds")

-    // Get the trained Decision Tree from the fitted PipelineModel
+    // Get the trained Decision Tree from the fitted PipelineModel.
    algo match {
      case "classification" =>
        val treeModel = pipelineModel.stages.last.asInstanceOf[DecisionTreeClassificationModel]
@ -280,7 +281,7 @@ object DecisionTreeExample {
      case _ => throw new IllegalArgumentException("Algo ${params.algo} not supported.")
    }

-    // Evaluate model on training, test data
+    // Evaluate model on training, test data.
    algo match {
      case "classification" =>
        println("Training data results:")
@ -296,11 +297,11 @@ object DecisionTreeExample {
        throw new IllegalArgumentException("Algo ${params.algo} not supported.")
    }

-    sc.stop()
+    spark.stop()
  }

  /**
-   * Evaluate the given ClassificationModel on data.  Print the results.
+   * Evaluate the given ClassificationModel on data. Print the results.
   * @param model  Must fit ClassificationModel abstraction
   * @param data  DataFrame with "prediction" and labelColName columns
   * @param labelColName  Name of the labelCol parameter for the model
@ -314,7 +315,7 @@ object DecisionTreeExample {
    val fullPredictions = model.transform(data).cache()
    val predictions = fullPredictions.select("prediction").rdd.map(_.getDouble(0))
    val labels = fullPredictions.select(labelColName).rdd.map(_.getDouble(0))
-    // Print number of classes for reference
+    // Print number of classes for reference.
    val numClasses = MetadataUtils.getNumClasses(fullPredictions.schema(labelColName)) match {
      case Some(n) => n
      case None => throw new RuntimeException(
@ -325,7 +326,7 @@ object DecisionTreeExample {
  }

  /**
-   * Evaluate the given RegressionModel on data.  Print the results.
+   * Evaluate the given RegressionModel on data. Print the results.
   * @param model  Must fit RegressionModel abstraction
   * @param data  DataFrame with "prediction" and labelColName columns
   * @param labelColName  Name of the labelCol parameter for the model
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala
@ -46,7 +46,7 @@ object DecisionTreeRegressionExample {
      .setMaxCategories(4)
      .fit(data)

-    // Split the data into training and test sets (30% held out for testing)
+    // Split the data into training and test sets (30% held out for testing).
    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))

    // Train a DecisionTree model.
@ -54,11 +54,11 @@ object DecisionTreeRegressionExample {
      .setLabelCol("label")
      .setFeaturesCol("indexedFeatures")

-    // Chain indexer and tree in a Pipeline
+    // Chain indexer and tree in a Pipeline.
    val pipeline = new Pipeline()
      .setStages(Array(featureIndexer, dt))

-    // Train model.  This also runs the indexer.
+    // Train model. This also runs the indexer.
    val model = pipeline.fit(trainingData)

    // Make predictions.
@ -67,7 +67,7 @@ object DecisionTreeRegressionExample {
    // Select example rows to display.
    predictions.select("prediction", "label", "features").show(5)

-    // Select (prediction, true label) and compute test error
+    // Select (prediction, true label) and compute test error.
    val evaluator = new RegressionEvaluator()
      .setLabelCol("label")
      .setPredictionCol("prediction")
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
@ -50,7 +50,7 @@ object DeveloperApiExample {
      LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
      LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))))

-    // Create a LogisticRegression instance.  This instance is an Estimator.
+    // Create a LogisticRegression instance. This instance is an Estimator.
    val lr = new MyLogisticRegression()
    // Print out the parameters, documentation, and any default values.
    println("MyLogisticRegression parameters:\n" + lr.explainParams() + "\n")
@ -58,7 +58,7 @@ object DeveloperApiExample {
    // We may set parameters using setter methods.
    lr.setMaxIter(10)

-    // Learn a LogisticRegression model.  This uses the parameters stored in lr.
+    // Learn a LogisticRegression model. This uses the parameters stored in lr.
    val model = lr.fit(training.toDF())

    // Prepare test data.
@ -84,7 +84,7 @@ object DeveloperApiExample {
 /**
 * Example of defining a parameter trait for a user-defined type of [[Classifier]].
 *
- * NOTE: This is private since it is an example.  In practice, you may not want it to be private.
+ * NOTE: This is private since it is an example. In practice, you may not want it to be private.
 */
 private trait MyLogisticRegressionParams extends ClassifierParams {

@ -96,7 +96,7 @@ private trait MyLogisticRegressionParams extends ClassifierParams {
   *   - def getMyParamName
   *   - def setMyParamName
   * Here, we have a trait to be mixed in with the Estimator and Model (MyLogisticRegression
-   * and MyLogisticRegressionModel).  We place the setter (setMaxIter) method in the Estimator
+   * and MyLogisticRegressionModel). We place the setter (setMaxIter) method in the Estimator
   * class since the maxIter parameter is only used during training (not in the Model).
   */
  val maxIter: IntParam = new IntParam(this, "maxIter", "max number of iterations")
@ -106,7 +106,7 @@ private trait MyLogisticRegressionParams extends ClassifierParams {
 /**
 * Example of defining a type of [[Classifier]].
 *
- * NOTE: This is private since it is an example.  In practice, you may not want it to be private.
+ * NOTE: This is private since it is an example. In practice, you may not want it to be private.
 */
 private class MyLogisticRegression(override val uid: String)
  extends Classifier[Vector, MyLogisticRegression, MyLogisticRegressionModel]
@ -138,7 +138,7 @@ private class MyLogisticRegression(override val uid: String)
 /**
 * Example of defining a type of [[ClassificationModel]].
 *
- * NOTE: This is private since it is an example.  In practice, you may not want it to be private.
+ * NOTE: This is private since it is an example. In practice, you may not want it to be private.
 */
 private class MyLogisticRegressionModel(
    override val uid: String,
@ -169,7 +169,7 @@ private class MyLogisticRegressionModel(
    Vectors.dense(-margin, margin)
  }

-  /** Number of classes the label can take.  2 indicates binary classification. */
+  /** Number of classes the label can take. 2 indicates binary classification. */
  override val numClasses: Int = 2

  /** Number of features the model was trained on. */
--- a/examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala
@ -43,7 +43,7 @@ object EstimatorTransformerParamExample {
      (1.0, Vectors.dense(0.0, 1.2, -0.5))
    )).toDF("label", "features")

-    // Create a LogisticRegression instance.  This instance is an Estimator.
+    // Create a LogisticRegression instance. This instance is an Estimator.
    val lr = new LogisticRegression()
    // Print out the parameters, documentation, and any default values.
    println("LogisticRegression parameters:\n" + lr.explainParams() + "\n")
@ -52,7 +52,7 @@ object EstimatorTransformerParamExample {
    lr.setMaxIter(10)
      .setRegParam(0.01)

-    // Learn a LogisticRegression model.  This uses the parameters stored in lr.
+    // Learn a LogisticRegression model. This uses the parameters stored in lr.
    val model1 = lr.fit(training)
    // Since model1 is a Model (i.e., a Transformer produced by an Estimator),
    // we can view the parameters it used during fit().
@ -63,11 +63,11 @@ object EstimatorTransformerParamExample {
    // We may alternatively specify parameters using a ParamMap,
    // which supports several methods for specifying parameters.
    val paramMap = ParamMap(lr.maxIter -> 20)
-      .put(lr.maxIter, 30)  // Specify 1 Param.  This overwrites the original maxIter.
+      .put(lr.maxIter, 30)  // Specify 1 Param. This overwrites the original maxIter.
      .put(lr.regParam -> 0.1, lr.threshold -> 0.55)  // Specify multiple Params.

    // One can also combine ParamMaps.
-    val paramMap2 = ParamMap(lr.probabilityCol -> "myProbability")  // Change output column name
+    val paramMap2 = ParamMap(lr.probabilityCol -> "myProbability")  // Change output column name.
    val paramMapCombined = paramMap ++ paramMap2

    // Now learn a new model using the paramMapCombined parameters.
--- a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
@ -23,13 +23,12 @@ import scala.language.reflectiveCalls

 import scopt.OptionParser

-import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.examples.mllib.AbstractParams
 import org.apache.spark.ml.{Pipeline, PipelineStage}
 import org.apache.spark.ml.classification.{GBTClassificationModel, GBTClassifier}
 import org.apache.spark.ml.feature.{StringIndexer, VectorIndexer}
 import org.apache.spark.ml.regression.{GBTRegressionModel, GBTRegressor}
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{DataFrame, SparkSession}


 /**
@ -37,7 +36,7 @@ import org.apache.spark.sql.DataFrame
 * {{{
 * ./bin/run-example ml.GBTExample [options]
 * }}}
- * Decision Trees and ensembles can take a large amount of memory.  If the run-example command
+ * Decision Trees and ensembles can take a large amount of memory. If the run-example command
 * above fails, try running via spark-submit and specifying the amount of memory as at least 1g.
 * For local mode, run
 * {{{
@ -88,7 +87,7 @@ object GBTExample {
        .text(s"number of trees in ensemble, default: ${defaultParams.maxIter}")
        .action((x, c) => c.copy(maxIter = x))
      opt[Double]("fracTest")
-        .text(s"fraction of data to hold out for testing.  If given option testInput, " +
+        .text(s"fraction of data to hold out for testing. If given option testInput, " +
        s"this option is ignored. default: ${defaultParams.fracTest}")
        .action((x, c) => c.copy(fracTest = x))
      opt[Boolean]("cacheNodeIds")
@ -109,7 +108,7 @@ object GBTExample {
        s"default: ${defaultParams.checkpointInterval}")
        .action((x, c) => c.copy(checkpointInterval = x))
      opt[String]("testInput")
-        .text(s"input path to test dataset.  If given, option fracTest is ignored." +
+        .text(s"input path to test dataset. If given, option fracTest is ignored." +
        s" default: ${defaultParams.testInput}")
        .action((x, c) => c.copy(testInput = x))
      opt[String]("dataFormat")
@ -136,15 +135,18 @@ object GBTExample {
  }

  def run(params: Params) {
-    val conf = new SparkConf().setAppName(s"GBTExample with $params")
-    val sc = new SparkContext(conf)
-    params.checkpointDir.foreach(sc.setCheckpointDir)
+    val spark = SparkSession
+      .builder
+      .appName(s"GBTExample with $params")
+      .getOrCreate()
+
+    params.checkpointDir.foreach(spark.sparkContext.setCheckpointDir)
    val algo = params.algo.toLowerCase

    println(s"GBTExample with parameters:\n$params")

    // Load training and test data and cache it.
-    val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(sc, params.input,
+    val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(params.input,
      params.dataFormat, params.testInput, algo, params.fracTest)

    // Set up Pipeline
@ -164,7 +166,7 @@ object GBTExample {
      .setOutputCol("indexedFeatures")
      .setMaxCategories(10)
    stages += featuresIndexer
-    // (3) Learn GBT
+    // (3) Learn GBT.
    val dt = algo match {
      case "classification" =>
        new GBTClassifier()
@ -193,13 +195,13 @@ object GBTExample {
    stages += dt
    val pipeline = new Pipeline().setStages(stages.toArray)

-    // Fit the Pipeline
+    // Fit the Pipeline.
    val startTime = System.nanoTime()
    val pipelineModel = pipeline.fit(training)
    val elapsedTime = (System.nanoTime() - startTime) / 1e9
    println(s"Training time: $elapsedTime seconds")

-    // Get the trained GBT from the fitted PipelineModel
+    // Get the trained GBT from the fitted PipelineModel.
    algo match {
      case "classification" =>
        val rfModel = pipelineModel.stages.last.asInstanceOf[GBTClassificationModel]
@ -218,7 +220,7 @@ object GBTExample {
      case _ => throw new IllegalArgumentException("Algo ${params.algo} not supported.")
    }

-    // Evaluate model on training, test data
+    // Evaluate model on training, test data.
    algo match {
      case "classification" =>
        println("Training data results:")
@ -234,7 +236,7 @@ object GBTExample {
        throw new IllegalArgumentException("Algo ${params.algo} not supported.")
    }

-    sc.stop()
+    spark.stop()
  }
 }
 // scalastyle:on println
--- a/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala
@ -51,7 +51,7 @@ object GradientBoostedTreeClassifierExample {
      .setMaxCategories(4)
      .fit(data)

-    // Split the data into training and test sets (30% held out for testing)
+    // Split the data into training and test sets (30% held out for testing).
    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))

    // Train a GBT model.
@ -66,11 +66,11 @@ object GradientBoostedTreeClassifierExample {
      .setOutputCol("predictedLabel")
      .setLabels(labelIndexer.labels)

-    // Chain indexers and GBT in a Pipeline
+    // Chain indexers and GBT in a Pipeline.
    val pipeline = new Pipeline()
      .setStages(Array(labelIndexer, featureIndexer, gbt, labelConverter))

-    // Train model.  This also runs the indexers.
+    // Train model. This also runs the indexers.
    val model = pipeline.fit(trainingData)

    // Make predictions.
@ -79,7 +79,7 @@ object GradientBoostedTreeClassifierExample {
    // Select example rows to display.
    predictions.select("predictedLabel", "label", "features").show(5)

-    // Select (prediction, true label) and compute test error
+    // Select (prediction, true label) and compute test error.
    val evaluator = new MulticlassClassificationEvaluator()
      .setLabelCol("indexedLabel")
      .setPredictionCol("prediction")
--- a/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala
@ -45,7 +45,7 @@ object GradientBoostedTreeRegressorExample {
      .setMaxCategories(4)
      .fit(data)

-    // Split the data into training and test sets (30% held out for testing)
+    // Split the data into training and test sets (30% held out for testing).
    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))

    // Train a GBT model.
@ -54,11 +54,11 @@ object GradientBoostedTreeRegressorExample {
      .setFeaturesCol("indexedFeatures")
      .setMaxIter(10)

-    // Chain indexer and GBT in a Pipeline
+    // Chain indexer and GBT in a Pipeline.
    val pipeline = new Pipeline()
      .setStages(Array(featureIndexer, gbt))

-    // Train model.  This also runs the indexer.
+    // Train model. This also runs the indexer.
    val model = pipeline.fit(trainingData)

    // Make predictions.
@ -67,7 +67,7 @@ object GradientBoostedTreeRegressorExample {
    // Select example rows to display.
    predictions.select("prediction", "label", "features").show(5)

-    // Select (prediction, true label) and compute test error
+    // Select (prediction, true label) and compute test error.
    val evaluator = new RegressionEvaluator()
      .setLabelCol("label")
      .setPredictionCol("prediction")
--- a/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala
@ -22,10 +22,9 @@ import scala.language.reflectiveCalls

 import scopt.OptionParser

-import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.examples.mllib.AbstractParams
 import org.apache.spark.ml.regression.LinearRegression
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{DataFrame, SparkSession}

 /**
 * An example runner for linear regression with elastic-net (mixing L1/L2) regularization.
@ -74,11 +73,11 @@ object LinearRegressionExample {
        s"to higher accuracy with the cost of more iterations, default: ${defaultParams.tol}")
        .action((x, c) => c.copy(tol = x))
      opt[Double]("fracTest")
-        .text(s"fraction of data to hold out for testing.  If given option testInput, " +
+        .text(s"fraction of data to hold out for testing. If given option testInput, " +
        s"this option is ignored. default: ${defaultParams.fracTest}")
        .action((x, c) => c.copy(fracTest = x))
      opt[String]("testInput")
-        .text(s"input path to test dataset.  If given, option fracTest is ignored." +
+        .text(s"input path to test dataset. If given, option fracTest is ignored." +
        s" default: ${defaultParams.testInput}")
        .action((x, c) => c.copy(testInput = x))
      opt[String]("dataFormat")
@ -105,13 +104,15 @@ object LinearRegressionExample {
  }

  def run(params: Params) {
-    val conf = new SparkConf().setAppName(s"LinearRegressionExample with $params")
-    val sc = new SparkContext(conf)
+    val spark = SparkSession
+      .builder
+      .appName(s"LinearRegressionExample with $params")
+      .getOrCreate()

    println(s"LinearRegressionExample with parameters:\n$params")

    // Load training and test data and cache it.
-    val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(sc, params.input,
+    val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(params.input,
      params.dataFormat, params.testInput, "regression", params.fracTest)

    val lir = new LinearRegression()
@ -136,7 +137,7 @@ object LinearRegressionExample {
    println("Test data results:")
    DecisionTreeExample.evaluateRegressionModel(lirModel, test, "label")

-    sc.stop()
+    spark.stop()
  }
 }
 // scalastyle:on println
--- a/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala
@ -23,12 +23,11 @@ import scala.language.reflectiveCalls

 import scopt.OptionParser

-import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.examples.mllib.AbstractParams
 import org.apache.spark.ml.{Pipeline, PipelineStage}
 import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
 import org.apache.spark.ml.feature.StringIndexer
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{DataFrame, SparkSession}

 /**
 * An example runner for logistic regression with elastic-net (mixing L1/L2) regularization.
@ -81,11 +80,11 @@ object LogisticRegressionExample {
        s"to higher accuracy with the cost of more iterations, default: ${defaultParams.tol}")
        .action((x, c) => c.copy(tol = x))
      opt[Double]("fracTest")
-        .text(s"fraction of data to hold out for testing.  If given option testInput, " +
+        .text(s"fraction of data to hold out for testing. If given option testInput, " +
        s"this option is ignored. default: ${defaultParams.fracTest}")
        .action((x, c) => c.copy(fracTest = x))
      opt[String]("testInput")
-        .text(s"input path to test dataset.  If given, option fracTest is ignored." +
+        .text(s"input path to test dataset. If given, option fracTest is ignored." +
        s" default: ${defaultParams.testInput}")
        .action((x, c) => c.copy(testInput = x))
      opt[String]("dataFormat")
@ -112,16 +111,18 @@ object LogisticRegressionExample {
  }

  def run(params: Params) {
-    val conf = new SparkConf().setAppName(s"LogisticRegressionExample with $params")
-    val sc = new SparkContext(conf)
+    val spark = SparkSession
+      .builder
+      .appName(s"LogisticRegressionExample with $params")
+      .getOrCreate()

    println(s"LogisticRegressionExample with parameters:\n$params")

    // Load training and test data and cache it.
-    val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(sc, params.input,
+    val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(params.input,
      params.dataFormat, params.testInput, "classification", params.fracTest)

-    // Set up Pipeline
+    // Set up Pipeline.
    val stages = new mutable.ArrayBuffer[PipelineStage]()

    val labelIndexer = new StringIndexer()
@ -141,7 +142,7 @@ object LogisticRegressionExample {
    stages += lor
    val pipeline = new Pipeline().setStages(stages.toArray)

-    // Fit the Pipeline
+    // Fit the Pipeline.
    val startTime = System.nanoTime()
    val pipelineModel = pipeline.fit(training)
    val elapsedTime = (System.nanoTime() - startTime) / 1e9
@ -156,7 +157,7 @@ object LogisticRegressionExample {
    println("Test data results:")
    DecisionTreeExample.evaluateClassificationModel(pipelineModel, test, "indexedLabel")

-    sc.stop()
+    spark.stop()
  }
 }
 // scalastyle:on println
--- a/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala
@ -27,7 +27,9 @@ object LogisticRegressionWithElasticNetExample {

  def main(args: Array[String]): Unit = {
    val spark = SparkSession
-      .builder.appName("LogisticRegressionWithElasticNetExample").getOrCreate()
+      .builder
+      .appName("LogisticRegressionWithElasticNetExample")
+      .getOrCreate()

    // $example on$
    // Load training data
--- a/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaCrossValidationExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaCrossValidationExample.scala
@ -42,7 +42,9 @@ object ModelSelectionViaCrossValidationExample {

  def main(args: Array[String]): Unit = {
    val spark = SparkSession
-      .builder.appName("ModelSelectionViaCrossValidationExample").getOrCreate()
+      .builder
+      .appName("ModelSelectionViaCrossValidationExample")
+      .getOrCreate()

    // $example on$
    // Prepare training data from a list of (id, text, label) tuples.
--- a/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaTrainValidationSplitExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaTrainValidationSplitExample.scala
@ -36,7 +36,9 @@ object ModelSelectionViaTrainValidationSplitExample {

  def main(args: Array[String]): Unit = {
    val spark = SparkSession
-      .builder.appName("ModelSelectionViaTrainValidationSplitExample").getOrCreate()
+      .builder
+      .appName("ModelSelectionViaTrainValidationSplitExample")
+      .getOrCreate()

    // $example on$
    // Prepare training and test data.
--- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala
@ -51,7 +51,7 @@ object RandomForestClassifierExample {
      .setMaxCategories(4)
      .fit(data)

-    // Split the data into training and test sets (30% held out for testing)
+    // Split the data into training and test sets (30% held out for testing).
    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))

    // Train a RandomForest model.
@ -66,11 +66,11 @@ object RandomForestClassifierExample {
      .setOutputCol("predictedLabel")
      .setLabels(labelIndexer.labels)

-    // Chain indexers and forest in a Pipeline
+    // Chain indexers and forest in a Pipeline.
    val pipeline = new Pipeline()
      .setStages(Array(labelIndexer, featureIndexer, rf, labelConverter))

-    // Train model.  This also runs the indexers.
+    // Train model. This also runs the indexers.
    val model = pipeline.fit(trainingData)

    // Make predictions.
@ -79,7 +79,7 @@ object RandomForestClassifierExample {
    // Select example rows to display.
    predictions.select("predictedLabel", "label", "features").show(5)

-    // Select (prediction, true label) and compute test error
+    // Select (prediction, true label) and compute test error.
    val evaluator = new MulticlassClassificationEvaluator()
      .setLabelCol("indexedLabel")
      .setPredictionCol("prediction")
--- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
@ -23,13 +23,12 @@ import scala.language.reflectiveCalls

 import scopt.OptionParser

-import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.examples.mllib.AbstractParams
 import org.apache.spark.ml.{Pipeline, PipelineStage}
 import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier}
 import org.apache.spark.ml.feature.{StringIndexer, VectorIndexer}
 import org.apache.spark.ml.regression.{RandomForestRegressionModel, RandomForestRegressor}
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{DataFrame, SparkSession}


 /**
@ -37,7 +36,7 @@ import org.apache.spark.sql.DataFrame
 * {{{
 * ./bin/run-example ml.RandomForestExample [options]
 * }}}
- * Decision Trees and ensembles can take a large amount of memory.  If the run-example command
+ * Decision Trees and ensembles can take a large amount of memory. If the run-example command
 * above fails, try running via spark-submit and specifying the amount of memory as at least 1g.
 * For local mode, run
 * {{{
@ -94,7 +93,7 @@ object RandomForestExample {
        s" default: ${defaultParams.numTrees}")
        .action((x, c) => c.copy(featureSubsetStrategy = x))
      opt[Double]("fracTest")
-        .text(s"fraction of data to hold out for testing.  If given option testInput, " +
+        .text(s"fraction of data to hold out for testing. If given option testInput, " +
        s"this option is ignored. default: ${defaultParams.fracTest}")
        .action((x, c) => c.copy(fracTest = x))
      opt[Boolean]("cacheNodeIds")
@ -115,7 +114,7 @@ object RandomForestExample {
        s"default: ${defaultParams.checkpointInterval}")
        .action((x, c) => c.copy(checkpointInterval = x))
      opt[String]("testInput")
-        .text(s"input path to test dataset.  If given, option fracTest is ignored." +
+        .text(s"input path to test dataset. If given, option fracTest is ignored." +
        s" default: ${defaultParams.testInput}")
        .action((x, c) => c.copy(testInput = x))
      opt[String]("dataFormat")
@ -142,18 +141,21 @@ object RandomForestExample {
  }

  def run(params: Params) {
-    val conf = new SparkConf().setAppName(s"RandomForestExample with $params")
-    val sc = new SparkContext(conf)
-    params.checkpointDir.foreach(sc.setCheckpointDir)
+    val spark = SparkSession
+      .builder
+      .appName(s"RandomForestExample with $params")
+      .getOrCreate()
+
+    params.checkpointDir.foreach(spark.sparkContext.setCheckpointDir)
    val algo = params.algo.toLowerCase

    println(s"RandomForestExample with parameters:\n$params")

    // Load training and test data and cache it.
-    val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(sc, params.input,
+    val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(params.input,
      params.dataFormat, params.testInput, algo, params.fracTest)

-    // Set up Pipeline
+    // Set up Pipeline.
    val stages = new mutable.ArrayBuffer[PipelineStage]()
    // (1) For classification, re-index classes.
    val labelColName = if (algo == "classification") "indexedLabel" else "label"
@ -170,7 +172,7 @@ object RandomForestExample {
      .setOutputCol("indexedFeatures")
      .setMaxCategories(10)
    stages += featuresIndexer
-    // (3) Learn Random Forest
+    // (3) Learn Random Forest.
    val dt = algo match {
      case "classification" =>
        new RandomForestClassifier()
@ -201,13 +203,13 @@ object RandomForestExample {
    stages += dt
    val pipeline = new Pipeline().setStages(stages.toArray)

-    // Fit the Pipeline
+    // Fit the Pipeline.
    val startTime = System.nanoTime()
    val pipelineModel = pipeline.fit(training)
    val elapsedTime = (System.nanoTime() - startTime) / 1e9
    println(s"Training time: $elapsedTime seconds")

-    // Get the trained Random Forest from the fitted PipelineModel
+    // Get the trained Random Forest from the fitted PipelineModel.
    algo match {
      case "classification" =>
        val rfModel = pipelineModel.stages.last.asInstanceOf[RandomForestClassificationModel]
@ -226,7 +228,7 @@ object RandomForestExample {
      case _ => throw new IllegalArgumentException("Algo ${params.algo} not supported.")
    }

-    // Evaluate model on training, test data
+    // Evaluate model on training, test data.
    algo match {
      case "classification" =>
        println("Training data results:")
@ -242,7 +244,7 @@ object RandomForestExample {
        throw new IllegalArgumentException("Algo ${params.algo} not supported.")
    }

-    sc.stop()
+    spark.stop()
  }
 }
 // scalastyle:on println
--- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala
@ -45,7 +45,7 @@ object RandomForestRegressorExample {
      .setMaxCategories(4)
      .fit(data)

-    // Split the data into training and test sets (30% held out for testing)
+    // Split the data into training and test sets (30% held out for testing).
    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))

    // Train a RandomForest model.
@ -53,11 +53,11 @@ object RandomForestRegressorExample {
      .setLabelCol("label")
      .setFeaturesCol("indexedFeatures")

-    // Chain indexer and forest in a Pipeline
+    // Chain indexer and forest in a Pipeline.
    val pipeline = new Pipeline()
      .setStages(Array(featureIndexer, rf))

-    // Train model.  This also runs the indexer.
+    // Train model. This also runs the indexer.
    val model = pipeline.fit(trainingData)

    // Make predictions.
@ -66,7 +66,7 @@ object RandomForestRegressorExample {
    // Select example rows to display.
    predictions.select("prediction", "label", "features").show(5)

-    // Select (prediction, true label) and compute test error
+    // Select (prediction, true label) and compute test error.
    val evaluator = new RegressionEvaluator()
      .setLabelCol("label")
      .setPredictionCol("prediction")
--- a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala
@ -41,7 +41,7 @@ object SimpleParamsExample {
    import spark.implicits._

    // Prepare training data.
-    // We use LabeledPoint, which is a case class.  Spark SQL can convert RDDs of case classes
+    // We use LabeledPoint, which is a case class. Spark SQL can convert RDDs of case classes
    // into DataFrames, where it uses the case class metadata to infer the schema.
    val training = spark.createDataFrame(Seq(
      LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
@ -49,7 +49,7 @@ object SimpleParamsExample {
      LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
      LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))))

-    // Create a LogisticRegression instance.  This instance is an Estimator.
+    // Create a LogisticRegression instance. This instance is an Estimator.
    val lr = new LogisticRegression()
    // Print out the parameters, documentation, and any default values.
    println("LogisticRegression parameters:\n" + lr.explainParams() + "\n")
@ -58,7 +58,7 @@ object SimpleParamsExample {
    lr.setMaxIter(10)
      .setRegParam(0.01)

-    // Learn a LogisticRegression model.  This uses the parameters stored in lr.
+    // Learn a LogisticRegression model. This uses the parameters stored in lr.
    val model1 = lr.fit(training)
    // Since model1 is a Model (i.e., a Transformer produced by an Estimator),
    // we can view the parameters it used during fit().
@ -69,7 +69,7 @@ object SimpleParamsExample {
    // We may alternatively specify parameters using a ParamMap,
    // which supports several methods for specifying parameters.
    val paramMap = ParamMap(lr.maxIter -> 20)
-    paramMap.put(lr.maxIter, 30) // Specify 1 Param.  This overwrites the original maxIter.
+    paramMap.put(lr.maxIter, 30) // Specify 1 Param. This overwrites the original maxIter.
    paramMap.put(lr.regParam -> 0.1, lr.thresholds -> Array(0.45, 0.55)) // Specify multiple Params.

    // One can also combine ParamMaps.