diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala index 2d0212f36f..e021093753 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala @@ -237,7 +237,8 @@ class DecisionTreeClassificationModel private[ml] ( @Since("1.4.0") override def toString: String = { - s"DecisionTreeClassificationModel (uid=$uid) of depth $depth with $numNodes nodes" + s"DecisionTreeClassificationModel: uid=$uid, depth=$depth, numNodes=$numNodes, " + + s"numClasses=$numClasses, numFeatures=$numFeatures" } /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index 5bc45f2b02..e1f5338f34 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala @@ -340,7 +340,8 @@ class GBTClassificationModel private[ml]( @Since("1.4.0") override def toString: String = { - s"GBTClassificationModel (uid=$uid) with $numTrees trees" + s"GBTClassificationModel: uid = $uid, numTrees=$numTrees, numClasses=$numClasses, " + + s"numFeatures=$numFeatures" } /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala index 0dc1c24570..45114f6ee8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala @@ -326,6 +326,10 @@ class LinearSVCModel private[classification] ( @Since("2.2.0") override def write: MLWriter = new LinearSVCModel.LinearSVCWriter(this) + @Since("3.0.0") + override def toString: String = { + s"LinearSVCModel: uid=$uid, numClasses=$numClasses, numFeatures=$numFeatures" + } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 1d1d139ba0..1cb5915239 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -1181,8 +1181,7 @@ class LogisticRegressionModel private[spark] ( override def write: MLWriter = new LogisticRegressionModel.LogisticRegressionModelWriter(this) override def toString: String = { - s"LogisticRegressionModel: " + - s"uid = ${super.toString}, numClasses = $numClasses, numFeatures = $numFeatures" + s"LogisticRegressionModel: uid=$uid, numClasses=$numClasses, numFeatures=$numFeatures" } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala index 41db6f3f44..8c5d768044 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala @@ -323,6 +323,12 @@ class MultilayerPerceptronClassificationModel private[ml] ( override protected def predictRaw(features: Vector): Vector = mlpModel.predictRaw(features) override def numClasses: Int = layers.last + + @Since("3.0.0") + override def toString: String = { + s"MultilayerPerceptronClassificationModel: uid=$uid, numLayers=${layers.length}, " + + s"numClasses=$numClasses, numFeatures=$numFeatures" + } } @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala index 205f565aa2..bcca40d159 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala @@ -359,7 +359,8 @@ class NaiveBayesModel private[ml] ( @Since("1.5.0") override def toString: String = { - s"NaiveBayesModel (uid=$uid) with ${pi.size} classes" + s"NaiveBayesModel: uid=$uid, modelType=${$(modelType)}, numClasses=$numClasses, " + + s"numFeatures=$numFeatures" } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala index 675315e3bb..51a624795c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala @@ -257,6 +257,12 @@ final class OneVsRestModel private[ml] ( @Since("2.0.0") override def write: MLWriter = new OneVsRestModel.OneVsRestModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"OneVsRestModel: uid=$uid, classifier=${$(classifier)}, numClasses=$numClasses, " + + s"numFeatures=$numFeatures" + } } @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala index 245cda35d8..bc28d783ed 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala @@ -260,7 +260,8 @@ class RandomForestClassificationModel private[ml] ( @Since("1.4.0") override def toString: String = { - s"RandomForestClassificationModel (uid=$uid) with $getNumTrees trees" + s"RandomForestClassificationModel: uid=$uid, numTrees=$getNumTrees, numClasses=$numClasses, " + + s"numFeatures=$numFeatures" } /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala index 4ad0cb55b0..5f2316fa7c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala @@ -91,6 +91,9 @@ class BisectingKMeansModel private[ml] ( extends Model[BisectingKMeansModel] with BisectingKMeansParams with MLWritable with HasTrainingSummary[BisectingKMeansSummary] { + @Since("3.0.0") + lazy val numFeatures: Int = parentModel.clusterCenters.head.size + @Since("2.0.0") override def copy(extra: ParamMap): BisectingKMeansModel = { val copied = copyValues(new BisectingKMeansModel(uid, parentModel), extra) @@ -145,6 +148,12 @@ class BisectingKMeansModel private[ml] ( @Since("2.0.0") override def write: MLWriter = new BisectingKMeansModel.BisectingKMeansModelWriter(this) + @Since("3.0.0") + override def toString: String = { + s"BisectingKMeansModel: uid=$uid, k=${parentModel.k}, distanceMeasure=${$(distanceMeasure)}, " + + s"numFeatures=$numFeatures" + } + /** * Gets summary of model on training set. An exception is * thrown if `hasSummary` is false. diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala index b4d9a9f882..916f326ab5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala @@ -89,6 +89,9 @@ class GaussianMixtureModel private[ml] ( extends Model[GaussianMixtureModel] with GaussianMixtureParams with MLWritable with HasTrainingSummary[GaussianMixtureSummary] { + @Since("3.0.0") + lazy val numFeatures: Int = gaussians.head.mean.size + /** @group setParam */ @Since("2.1.0") def setFeaturesCol(value: String): this.type = set(featuresCol, value) @@ -186,6 +189,11 @@ class GaussianMixtureModel private[ml] ( @Since("2.0.0") override def write: MLWriter = new GaussianMixtureModel.GaussianMixtureModelWriter(this) + @Since("3.0.0") + override def toString: String = { + s"GaussianMixtureModel: uid=$uid, k=${weights.length}, numFeatures=$numFeatures" + } + /** * Gets summary of model on training set. An exception is * thrown if `hasSummary` is false. diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index 5cc0f38c67..caeded400f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -108,6 +108,9 @@ class KMeansModel private[ml] ( extends Model[KMeansModel] with KMeansParams with GeneralMLWritable with HasTrainingSummary[KMeansSummary] { + @Since("3.0.0") + lazy val numFeatures: Int = parentModel.clusterCenters.head.size + @Since("1.5.0") override def copy(extra: ParamMap): KMeansModel = { val copied = copyValues(new KMeansModel(uid, parentModel), extra) @@ -153,6 +156,12 @@ class KMeansModel private[ml] ( @Since("1.6.0") override def write: GeneralMLWriter = new GeneralMLWriter(this) + @Since("3.0.0") + override def toString: String = { + s"KMeansModel: uid=$uid, k=${parentModel.k}, distanceMeasure=${$(distanceMeasure)}, " + + s"numFeatures=$numFeatures" + } + /** * Gets summary of model on training set. An exception is * thrown if `hasSummary` is false. diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala index 91201e7bd0..9b0005b374 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala @@ -620,6 +620,11 @@ class LocalLDAModel private[ml] ( @Since("1.6.0") override def write: MLWriter = new LocalLDAModel.LocalLDAModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"LocalLDAModel: uid=$uid, k=${$(k)}, numFeatures=$vocabSize" + } } @@ -783,6 +788,11 @@ class DistributedLDAModel private[ml] ( @Since("1.6.0") override def write: MLWriter = new DistributedLDAModel.DistributedWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"DistributedLDAModel: uid=$uid, k=${$(k)}, numFeatures=$vocabSize" + } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala index 09e8e7b232..55b910e98d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala @@ -130,6 +130,12 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va @Since("1.4.1") override def copy(extra: ParamMap): BinaryClassificationEvaluator = defaultCopy(extra) + + @Since("3.0.0") + override def toString: String = { + s"BinaryClassificationEvaluator: uid=$uid, metricName=${$(metricName)}, " + + s"numBins=${$(numBins)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala index 868bd2a763..157bed2a6b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala @@ -120,6 +120,12 @@ class ClusteringEvaluator @Since("2.3.0") (@Since("2.3.0") override val uid: Str throw new IllegalArgumentException(s"No support for metric $mn, distance $dm") } } + + @Since("3.0.0") + override def toString: String = { + s"ClusteringEvaluator: uid=$uid, metricName=${$(metricName)}, " + + s"distanceMeasure=${$(distanceMeasure)}" + } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala index bac3e23774..ab14227f06 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala @@ -184,6 +184,12 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid @Since("1.5.0") override def copy(extra: ParamMap): MulticlassClassificationEvaluator = defaultCopy(extra) + + @Since("3.0.0") + override def toString: String = { + s"MulticlassClassificationEvaluator: uid=$uid, metricName=${$(metricName)}, " + + s"metricLabel=${$(metricLabel)}, beta=${$(beta)}, eps=${$(eps)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala index f12c6700be..5216c40819 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala @@ -121,6 +121,12 @@ class MultilabelClassificationEvaluator (override val uid: String) } override def copy(extra: ParamMap): MultilabelClassificationEvaluator = defaultCopy(extra) + + @Since("3.0.0") + override def toString: String = { + s"MultilabelClassificationEvaluator: uid=$uid, metricName=${$(metricName)}, " + + s"metricLabel=${$(metricLabel)}" + } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala index 64ab3c3f7f..ca3a8ebc16 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala @@ -105,6 +105,11 @@ class RankingEvaluator (override val uid: String) override def isLargerBetter: Boolean = true override def copy(extra: ParamMap): RankingEvaluator = defaultCopy(extra) + + @Since("3.0.0") + override def toString: String = { + s"RankingEvaluator: uid=$uid, metricName=${$(metricName)}, k=${$(k)}" + } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala index b0cafefe42..9f32d40d16 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala @@ -124,6 +124,12 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui @Since("1.5.0") override def copy(extra: ParamMap): RegressionEvaluator = defaultCopy(extra) + + @Since("3.0.0") + override def toString: String = { + s"RegressionEvaluator: uid=$uid, metricName=${$(metricName)}, " + + s"throughOrigin=${$(throughOrigin)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala index ec4d45b653..07a4f91443 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala @@ -204,6 +204,13 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) @Since("1.4.1") override def copy(extra: ParamMap): Binarizer = defaultCopy(extra) + + @Since("3.0.0") + override def toString: String = { + s"Binarizer: uid=$uid" + + get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") + + get(outputCols).map(c => s", numOutputCols=${c.length}").getOrElse("") + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala index c074830ec9..4e266fbc1e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala @@ -106,6 +106,11 @@ class BucketedRandomProjectionLSHModel private[ml]( override def write: MLWriter = { new BucketedRandomProjectionLSHModel.BucketedRandomProjectionLSHModelWriter(this) } + + @Since("3.0.0") + override def toString: String = { + s"BucketedRandomProjectionLSHModel: uid=$uid, numHashTables=${$(numHashTables)}" + } } /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala index 8533ed5ce5..9aeddae78e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala @@ -215,6 +215,13 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String override def copy(extra: ParamMap): Bucketizer = { defaultCopy[Bucketizer](extra).setParent(parent) } + + @Since("3.0.0") + override def toString: String = { + s"Bucketizer: uid=$uid" + + get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") + + get(outputCols).map(c => s", numOutputCols=${c.length}").getOrElse("") + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala index 2a3656c495..9103e4feac 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala @@ -316,6 +316,11 @@ final class ChiSqSelectorModel private[ml] ( @Since("1.6.0") override def write: MLWriter = new ChiSqSelectorModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"ChiSqSelectorModel: uid=$uid, numSelectedFeatures=${selectedFeatures.length}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala index e78e6d4f76..c58d44d492 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala @@ -307,7 +307,7 @@ class CountVectorizerModel( } val dictBr = broadcastDict.get val minTf = $(minTF) - val vectorizer = udf { (document: Seq[String]) => + val vectorizer = udf { document: Seq[String] => val termCounts = new OpenHashMap[Int, Double] var tokenCount = 0L document.foreach { term => @@ -344,6 +344,11 @@ class CountVectorizerModel( @Since("1.6.0") override def write: MLWriter = new CountVectorizerModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"CountVectorizerModel: uid=$uid, vocabularySize=${vocabulary.length}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala index 84d6a536cc..e2167f0128 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala @@ -74,6 +74,11 @@ class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String) } override protected def outputDataType: DataType = new VectorUDT + + @Since("3.0.0") + override def toString: String = { + s"DCT: uid=$uid, inverse=$inverse" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index 2f32923543..227c13d60f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -81,6 +81,12 @@ class ElementwiseProduct @Since("1.4.0") (@Since("1.4.0") override val uid: Stri } override protected def outputDataType: DataType = new VectorUDT() + + @Since("3.0.0") + override def toString: String = { + s"ElementwiseProduct: uid=$uid" + + get(scalingVec).map(v => s", vectorSize=${v.size}").getOrElse("") + } } @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala index 61b4d5d54a..39862554c5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala @@ -22,7 +22,7 @@ import org.apache.spark.annotation.Since import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.AttributeGroup import org.apache.spark.ml.linalg.Vectors -import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators, StringArrayParam} +import org.apache.spark.ml.param.{ParamMap, StringArrayParam} import org.apache.spark.ml.param.shared.{HasInputCols, HasNumFeatures, HasOutputCol} import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils} import org.apache.spark.mllib.feature.{HashingTF => OldHashingTF} @@ -199,6 +199,13 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme val attrGroup = new AttributeGroup($(outputCol), $(numFeatures)) SchemaUtils.appendColumn(schema, attrGroup.toStructField()) } + + @Since("3.0.0") + override def toString: String = { + s"FeatureHasher: uid=$uid, numFeatures=${$(numFeatures)}" + + get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") + + get(categoricalCols).map(c => s", numCategoricalCols=${c.length}").getOrElse("") + } } @Since("2.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index fe9f4f2123..80bf85936a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -127,6 +127,11 @@ class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String) @Since("1.4.1") override def copy(extra: ParamMap): HashingTF = defaultCopy(extra) + + @Since("3.0.0") + override def toString: String = { + s"HashingTF: uid=$uid, binary=${$(binary)}, numFeatures=${$(numFeatures)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 4338421bf8..5f4103abcf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -175,9 +175,13 @@ class IDFModel private[ml] ( @Since("3.0.0") def numDocs: Long = idfModel.numDocs - @Since("1.6.0") override def write: MLWriter = new IDFModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"IDFModel: uid=$uid, numDocs=$numDocs" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala index fbccfb1041..64f1722f5f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala @@ -274,6 +274,13 @@ class ImputerModel private[ml] ( @Since("2.2.0") override def write: MLWriter = new ImputerModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"ImputerModel: uid=$uid, strategy=${$(strategy)}, missingValue=${$(missingValue)}" + + get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") + + get(outputCols).map(c => s", numOutputCols=${c.length}").getOrElse("") + } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala index 611f1b691b..9a4f1d97c9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala @@ -218,6 +218,11 @@ class Interaction @Since("1.6.0") (@Since("1.6.0") override val uid: String) ext @Since("1.6.0") override def copy(extra: ParamMap): Interaction = defaultCopy(extra) + @Since("3.0.0") + override def toString: String = { + s"Interaction: uid=$uid" + + get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala index 88d09d760b..6bab70e502 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala @@ -140,6 +140,11 @@ class MaxAbsScalerModel private[ml] ( @Since("1.6.0") override def write: MLWriter = new MaxAbsScalerModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"MaxAbsScalerModel: uid=$uid, numFeatures=${maxAbs.size}" + } } @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala index da0eaad667..de7fe91c41 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala @@ -96,6 +96,11 @@ class MinHashLSHModel private[ml]( @Since("2.1.0") override def write: MLWriter = new MinHashLSHModel.MinHashLSHModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"MinHashLSHModel: uid=$uid, numHashTables=${$(numHashTables)}" + } } /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index b0d7b637ca..e381a0435e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -226,6 +226,12 @@ class MinMaxScalerModel private[ml] ( @Since("1.6.0") override def write: MLWriter = new MinMaxScalerModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"MinMaxScalerModel: uid=$uid, numFeatures=${originalMin.size}, min=${$(min)}, " + + s"max=${$(max)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala index e0772d5af2..fd6fde0744 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala @@ -70,6 +70,11 @@ class NGram @Since("1.5.0") (@Since("1.5.0") override val uid: String) } override protected def outputDataType: DataType = new ArrayType(StringType, false) + + @Since("3.0.0") + override def toString: String = { + s"NGram: uid=$uid, n=${$(n)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala index 5db7f5da49..d129c2b2c2 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala @@ -65,6 +65,11 @@ class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) } override protected def outputDataType: DataType = new VectorUDT() + + @Since("3.0.0") + override def toString: String = { + s"Normalizer: uid=$uid, p=${$(p)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala index 459994c352..fd58043cda 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -376,6 +376,13 @@ class OneHotEncoderModel private[ml] ( @Since("3.0.0") override def write: MLWriter = new OneHotEncoderModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"OneHotEncoderModel: uid=$uid, dropLast=${$(dropLast)}, handleInvalid=${$(handleInvalid)}" + + get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") + + get(outputCols).map(c => s", numOutputCols=${c.length}").getOrElse("") + } } @Since("3.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index aa5a171d4f..69dcacbb0c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -179,6 +179,11 @@ class PCAModel private[ml] ( @Since("1.6.0") override def write: MLWriter = new PCAModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"PCAModel: uid=$uid, k=${$(k)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index 5734b06ee9..592ca001a2 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -77,6 +77,11 @@ class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: Str @Since("1.4.1") override def copy(extra: ParamMap): PolynomialExpansion = defaultCopy(extra) + + @Since("3.0.0") + override def toString: String = { + s"PolynomialExpansion: uid=$uid, degree=${$(degree)}" + } } /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index 9e95762f12..7ccfafa4ac 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -320,7 +320,10 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String) override def copy(extra: ParamMap): RFormula = defaultCopy(extra) @Since("2.0.0") - override def toString: String = s"RFormula(${get(formula).getOrElse("")}) (uid=$uid)" + override def toString: String = { + s"RFormula: uid=$uid" + + get(formula).map(f => s", formula = $f").getOrElse("") + } } @Since("2.0.0") @@ -376,7 +379,9 @@ class RFormulaModel private[feature]( } @Since("2.0.0") - override def toString: String = s"RFormulaModel($resolvedFormula) (uid=$uid)" + override def toString: String = { + s"RFormulaModel: uid=$uid, resolvedFormula=$resolvedFormula" + } private def transformLabel(dataset: Dataset[_]): DataFrame = { val labelName = resolvedFormula.label diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala index 1d609ef319..1b9b808293 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala @@ -251,6 +251,12 @@ class RobustScalerModel private[ml] ( } override def write: MLWriter = new RobustScalerModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"RobustScalerModel: uid=$uid, numFeatures=${median.size}, " + + s"withCentering=${$(withCentering)}, withScaling=${$(withScaling)}" + } } @Since("3.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala index 0fb1d8c5dc..9b99b8177a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala @@ -90,6 +90,11 @@ class SQLTransformer @Since("1.6.0") (@Since("1.6.0") override val uid: String) @Since("1.6.0") override def copy(extra: ParamMap): SQLTransformer = defaultCopy(extra) + + @Since("3.0.0") + override def toString: String = { + s"SQLTransformer: uid=$uid, statement=${$(statement)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index 834c21e16d..41e319c1fb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -184,6 +184,12 @@ class StandardScalerModel private[ml] ( @Since("1.6.0") override def write: MLWriter = new StandardScalerModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"StandardScalerModel: uid=$uid, numFeatures=${mean.size}, withMean=${$(withMean)}, " + + s"withStd=${$(withStd)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index f95e03ae6c..57fe7c9c0a 100755 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -156,6 +156,12 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String @Since("1.5.0") override def copy(extra: ParamMap): StopWordsRemover = defaultCopy(extra) + + @Since("3.0.0") + override def toString: String = { + s"StopWordsRemover: uid=$uid, numStopWords=${$(stopWords).length}, locale=${$(locale)}, " + + s"caseSensitive=${$(caseSensitive)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index 2ce5acf1fe..9f9f097a26 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -412,7 +412,7 @@ class StringIndexerModel ( override def transform(dataset: Dataset[_]): DataFrame = { transformSchema(dataset.schema, logging = true) - var (inputColNames, outputColNames) = getInOutCols() + val (inputColNames, outputColNames) = getInOutCols() val outputColumns = new Array[Column](outputColNames.length) // Skips invalid rows if `handleInvalid` is set to `StringIndexer.SKIP_INVALID`. @@ -473,6 +473,14 @@ class StringIndexerModel ( @Since("1.6.0") override def write: StringIndexModelWriter = new StringIndexModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"StringIndexerModel: uid=$uid, handleInvalid=${$(handleInvalid)}" + + get(stringOrderType).map(t => s", stringOrderType=$t").getOrElse("") + + get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") + + get(outputCols).map(c => s", numOutputCols=${c.length}").getOrElse("") + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index e6e9bdfd29..5ec5b77325 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -175,6 +175,12 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String) @Since("1.4.1") override def copy(extra: ParamMap): VectorAssembler = defaultCopy(extra) + + @Since("3.0.0") + override def toString: String = { + s"VectorAssembler: uid=$uid, handleInvalid=${$(handleInvalid)}" + + get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index 6a3305aad8..18a82e5fe9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -428,7 +428,7 @@ class VectorIndexerModel private[ml] ( override def transform(dataset: Dataset[_]): DataFrame = { transformSchema(dataset.schema, logging = true) val newField = prepOutputField(dataset.schema) - val transformUDF = udf { (vector: Vector) => transformFunc(vector) } + val transformUDF = udf { vector: Vector => transformFunc(vector) } val newCol = transformUDF(dataset($(inputCol))) val ds = dataset.withColumn($(outputCol), newCol, newField.metadata) if (getHandleInvalid == VectorIndexer.SKIP_INVALID) { @@ -506,6 +506,11 @@ class VectorIndexerModel private[ml] ( @Since("1.6.0") override def write: MLWriter = new VectorIndexerModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"VectorIndexerModel: uid=$uid, numFeatures=$numFeatures, handleInvalid=${$(handleInvalid)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala index 5d787f263a..0f778e09a8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala @@ -176,6 +176,11 @@ class VectorSizeHint @Since("2.3.0") (@Since("2.3.0") override val uid: String) @Since("2.3.0") override def copy(extra: ParamMap): this.type = defaultCopy(extra) + + @Since("3.0.0") + override def toString: String = { + s"VectorSizeHint: uid=$uid, size=${$(size)}, handleInvalid=${$(handleInvalid)}" + } } @Since("2.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala index e3e462d07e..b84b8af4e8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala @@ -159,6 +159,12 @@ final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: Stri @Since("1.5.0") override def copy(extra: ParamMap): VectorSlicer = defaultCopy(extra) + + @Since("3.0.0") + override def toString: String = { + s"VectorSlicer: uid=$uid" + + get(indices).map(i => s", numSelectedFeatures=${i.length}").getOrElse("") + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 6ae90b8050..81dde0315c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -324,6 +324,12 @@ class Word2VecModel private[ml] ( @Since("1.6.0") override def write: MLWriter = new Word2VecModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"Word2VecModel: uid=$uid, numWords=${wordVectors.wordIndex.size}, " + + s"vectorSize=${$(vectorSize)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala index e1c9b927a2..a9592dbfca 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala @@ -310,6 +310,11 @@ class FPGrowthModel private[ml] ( @Since("2.2.0") override def write: MLWriter = new FPGrowthModel.FPGrowthModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"FPGrowthModel: uid=$uid, numTrainingRecords=$numTrainingRecords" + } } @Since("2.2.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index 5049ef9245..e72d7cab00 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -338,6 +338,11 @@ class ALSModel private[ml] ( @Since("1.6.0") override def write: MLWriter = new ALSModel.ALSModelWriter(this) + @Since("3.0.0") + override def toString: String = { + s"ALSModel: uid=$uid, rank=$rank" + } + /** * Returns top `numItems` items recommended for each user, for all users. * @param numItems max number of recommendations for each user diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index 1ce5b5b02b..fe89aed9a9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -311,6 +311,9 @@ class AFTSurvivalRegressionModel private[ml] ( @Since("1.6.0") val scale: Double) extends Model[AFTSurvivalRegressionModel] with AFTSurvivalRegressionParams with MLWritable { + @Since("3.0.0") + lazy val numFeatures: Int = coefficients.size + /** @group setParam */ @Since("1.6.0") def setFeaturesCol(value: String): this.type = set(featuresCol, value) @@ -386,6 +389,11 @@ class AFTSurvivalRegressionModel private[ml] ( @Since("1.6.0") override def write: MLWriter = new AFTSurvivalRegressionModel.AFTSurvivalRegressionModelWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"AFTSurvivalRegressionModel: uid=$uid, numFeatures=$numFeatures" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala index 05851d5116..4a97997a1d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala @@ -243,7 +243,8 @@ class DecisionTreeRegressionModel private[ml] ( @Since("1.4.0") override def toString: String = { - s"DecisionTreeRegressionModel (uid=$uid) of depth $depth with $numNodes nodes" + s"DecisionTreeRegressionModel: uid=$uid, depth=$depth, numNodes=$numNodes, " + + s"numFeatures=$numFeatures" } /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index 9c38647642..700f7a2075 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -302,7 +302,7 @@ class GBTRegressionModel private[ml]( @Since("1.4.0") override def toString: String = { - s"GBTRegressionModel (uid=$uid) with $numTrees trees" + s"GBTRegressionModel: uid=$uid, numTrees=$numTrees, numFeatures=$numFeatures" } /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala index c504a54629..53b29102f0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala @@ -1106,6 +1106,12 @@ class GeneralizedLinearRegressionModel private[ml] ( new GeneralizedLinearRegressionModel.GeneralizedLinearRegressionModelWriter(this) override val numFeatures: Int = coefficients.size + + @Since("3.0.0") + override def toString: String = { + s"GeneralizedLinearRegressionModel: uid=$uid, family=${$(family)}, link=${$(link)}, " + + s"numFeatures=$numFeatures" + } } @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala index 8b9233dcdc..47f9e4bfb8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala @@ -259,6 +259,14 @@ class IsotonicRegressionModel private[ml] ( @Since("1.6.0") override def write: MLWriter = new IsotonicRegressionModelWriter(this) + + @Since("3.0.0") + val numFeatures: Int = 1 + + @Since("3.0.0") + override def toString: String = { + s"IsotonicRegressionModel: uid=$uid, numFeatures=$numFeatures" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 25c6f4d980..dad1080cea 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -702,6 +702,11 @@ class LinearRegressionModel private[ml] ( */ @Since("1.6.0") override def write: GeneralMLWriter = new GeneralMLWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"LinearRegressionModel: uid=$uid, numFeatures=$numFeatures" + } } /** A writer for LinearRegression that handles the "internal" (or default) format */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala index 8f78fc1da1..c3afab57a4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala @@ -235,7 +235,7 @@ class RandomForestRegressionModel private[ml] ( @Since("1.4.0") override def toString: String = { - s"RandomForestRegressionModel (uid=$uid) with $getNumTrees trees" + s"RandomForestRegressionModel: uid=$uid, numTrees=$getNumTrees, numFeatures=$numFeatures" } /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala index e60a14f976..fff18bcbec 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala @@ -323,6 +323,11 @@ class CrossValidatorModel private[ml] ( override def write: CrossValidatorModel.CrossValidatorModelWriter = { new CrossValidatorModel.CrossValidatorModelWriter(this) } + + @Since("3.0.0") + override def toString: String = { + s"CrossValidatorModel: uid=$uid, bestModel=$bestModel, numFolds=${$(numFolds)}" + } } @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala index 8b251197af..ecf9b846ee 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala @@ -140,7 +140,7 @@ class TrainValidationSplit @Since("1.5.0") (@Since("1.5.0") override val uid: St val collectSubModelsParam = $(collectSubModels) - var subModels: Option[Array[Model[_]]] = if (collectSubModelsParam) { + val subModels: Option[Array[Model[_]]] = if (collectSubModelsParam) { Some(Array.fill[Model[_]](epm.length)(null)) } else None @@ -314,6 +314,11 @@ class TrainValidationSplitModel private[ml] ( override def write: TrainValidationSplitModel.TrainValidationSplitModelWriter = { new TrainValidationSplitModel.TrainValidationSplitModelWriter(this) } + + @Since("3.0.0") + override def toString: String = { + s"TrainValidationSplitModel: uid=$uid, bestModel=$bestModel, trainRatio=${$(trainRatio)}" + } } @Since("2.0.0") diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index d2b8751360..07116606df 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -2767,7 +2767,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { test("toString") { val model = new LogisticRegressionModel("logReg", Vectors.dense(0.1, 0.2, 0.3), 0.0) - val expected = "LogisticRegressionModel: uid = logReg, numClasses = 2, numFeatures = 3" + val expected = "LogisticRegressionModel: uid=logReg, numClasses=2, numFeatures=3" assert(model.toString === expected) } } diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index c5cdf35729..f9465bffc9 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -594,7 +594,7 @@ class LogisticRegression(JavaProbabilisticClassifier, _LogisticRegressionParams, >>> blorModel.intercept == model2.intercept True >>> model2 - LogisticRegressionModel: uid = ..., numClasses = 2, numFeatures = 2 + LogisticRegressionModel: uid=..., numClasses=2, numFeatures=2 .. versionadded:: 1.3.0 """ @@ -1146,7 +1146,7 @@ class DecisionTreeClassifier(JavaProbabilisticClassifier, _DecisionTreeClassifie >>> model.numClasses 2 >>> print(model.toDebugString) - DecisionTreeClassificationModel (uid=...) of depth 1 with 3 nodes... + DecisionTreeClassificationModel...depth=1, numNodes=3... >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"]) >>> model.predict(test0.head().features) 0.0 @@ -1183,7 +1183,7 @@ class DecisionTreeClassifier(JavaProbabilisticClassifier, _DecisionTreeClassifie >>> dt3 = DecisionTreeClassifier(maxDepth=2, weightCol="weight", labelCol="indexed") >>> model3 = dt3.fit(td3) >>> print(model3.toDebugString) - DecisionTreeClassificationModel (uid=...) of depth 1 with 3 nodes... + DecisionTreeClassificationModel...depth=1, numNodes=3... .. versionadded:: 1.4.0 """ @@ -1394,7 +1394,7 @@ class RandomForestClassifier(JavaProbabilisticClassifier, _RandomForestClassifie >>> model.transform(test1).head().prediction 1.0 >>> model.trees - [DecisionTreeClassificationModel (uid=...) of depth..., DecisionTreeClassificationModel...] + [DecisionTreeClassificationModel...depth=..., DecisionTreeClassificationModel...] >>> rfc_path = temp_path + "/rfc" >>> rf.save(rfc_path) >>> rf2 = RandomForestClassifier.load(rfc_path) @@ -1651,7 +1651,7 @@ class GBTClassifier(JavaProbabilisticClassifier, _GBTClassifierParams, >>> model.totalNumNodes 15 >>> print(model.toDebugString) - GBTClassificationModel (uid=...)...with 5 trees... + GBTClassificationModel...numTrees=5... >>> gbtc_path = temp_path + "gbtc" >>> gbt.save(gbtc_path) >>> gbt2 = GBTClassifier.load(gbtc_path) @@ -1665,7 +1665,7 @@ class GBTClassifier(JavaProbabilisticClassifier, _GBTClassifierParams, >>> model.treeWeights == model2.treeWeights True >>> model.trees - [DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...] + [DecisionTreeRegressionModel...depth=..., DecisionTreeRegressionModel...] >>> validation = spark.createDataFrame([(0.0, Vectors.dense(-1.0),)], ... ["indexed", "features"]) >>> model.evaluateEachIteration(validation) diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index e96e13b564..84e39a035d 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -800,7 +800,7 @@ class DecisionTreeRegressor(JavaPredictor, _DecisionTreeRegressorParams, JavaMLW >>> dt3 = DecisionTreeRegressor(maxDepth=2, weightCol="weight", varianceCol="variance") >>> model3 = dt3.fit(df3) >>> print(model3.toDebugString) - DecisionTreeRegressionModel (uid=...) of depth 1 with 3 nodes... + DecisionTreeRegressionModel...depth=1, numNodes=3... .. versionadded:: 1.4.0 """ @@ -1018,7 +1018,7 @@ class RandomForestRegressor(JavaPredictor, _RandomForestRegressorParams, JavaMLW >>> model.numFeatures 1 >>> model.trees - [DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...] + [DecisionTreeRegressionModel...depth=..., DecisionTreeRegressionModel...] >>> model.getNumTrees 2 >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) @@ -1265,7 +1265,7 @@ class GBTRegressor(JavaPredictor, _GBTRegressorParams, JavaMLWritable, JavaMLRea >>> model.treeWeights == model2.treeWeights True >>> model.trees - [DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...] + [DecisionTreeRegressionModel...depth=..., DecisionTreeRegressionModel...] >>> validation = spark.createDataFrame([(0.0, Vectors.dense(-1.0))], ... ["label", "features"]) >>> model.evaluateEachIteration(validation, "squared")