[SPARK-8744] [ML] Add a public constructor to StringIndexer

It would be helpful to allow users to pass a pre-computed index to create an indexer, rather than always going through StringIndexer to create the model.

Author: Holden Karau <holden@pigscanfly.ca>

Closes #7267 from holdenk/SPARK-8744-StringIndexerModel-should-have-public-constructor.
This commit is contained in:
Holden Karau 2015-08-14 11:22:10 -07:00 committed by Joseph K. Bradley
parent 7ecf0c4699
commit a7317ccdc2
2 changed files with 5 additions and 1 deletions

View file

@ -102,10 +102,12 @@ class StringIndexer(override val uid: String) extends Estimator[StringIndexerMod
* This is a temporary fix for the case when target labels do not exist during prediction.
*/
@Experimental
class StringIndexerModel private[ml] (
class StringIndexerModel (
override val uid: String,
labels: Array[String]) extends Model[StringIndexerModel] with StringIndexerBase {
def this(labels: Array[String]) = this(Identifiable.randomUID("strIdx"), labels)
private val labelToIndex: OpenHashMap[String, Double] = {
val n = labels.length
val map = new OpenHashMap[String, Double](n)

View file

@ -30,7 +30,9 @@ class StringIndexerSuite extends SparkFunSuite with MLlibTestSparkContext {
test("params") {
ParamsSuite.checkParams(new StringIndexer)
val model = new StringIndexerModel("indexer", Array("a", "b"))
val modelWithoutUid = new StringIndexerModel(Array("a", "b"))
ParamsSuite.checkParams(model)
ParamsSuite.checkParams(modelWithoutUid)
}
test("StringIndexer") {