[MINOR] [MLLIB] [ML] [DOC] Minor doc fixes for StringIndexer and MetadataUtils

Changes:
* Make Scala doc for StringIndexerInverse clearer.  Also remove Scala doc from transformSchema, so that the doc is inherited.
* MetadataUtils.scala: “ Helper utilities for tree-based algorithms” —> not just trees anymore

CC: holdenk mengxr

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #8679 from jkbradley/doc-fixes-1.5.
This commit is contained in:
Joseph K. Bradley 2015-09-11 08:55:35 -07:00 committed by Xiangrui Meng
parent 960d2d0ac6
commit 2e3a280754
3 changed files with 20 additions and 29 deletions

View file

@ -181,10 +181,10 @@ class StringIndexerModel (
/**
* :: Experimental ::
* A [[Transformer]] that maps a column of string indices back to a new column of corresponding
* string values using either the ML attributes of the input column, or if provided using the labels
* supplied by the user.
* All original columns are kept during transformation.
* A [[Transformer]] that maps a column of indices back to a new column of corresponding
* string values.
* The index-string mapping is either from the ML attributes of the input column,
* or from user-supplied labels (which take precedence over ML attributes).
*
* @see [[StringIndexer]] for converting strings into indices
*/
@ -202,32 +202,23 @@ class IndexToString private[ml] (
/** @group setParam */
def setOutputCol(value: String): this.type = set(outputCol, value)
/**
* Optional labels to be provided by the user, if not supplied column
* metadata is read for labels. The default value is an empty array,
* but the empty array is ignored and column metadata used instead.
* @group setParam
*/
/** @group setParam */
def setLabels(value: Array[String]): this.type = set(labels, value)
/**
* Param for array of labels.
* Optional labels to be provided by the user.
* Default: Empty array, in which case column metadata is used for labels.
* Optional param for array of labels specifying index-string mapping.
*
* Default: Empty array, in which case [[inputCol]] metadata is used for labels.
* @group param
*/
final val labels: StringArrayParam = new StringArrayParam(this, "labels",
"array of labels, if not provided metadata from inputCol is used instead.")
"Optional array of labels specifying index-string mapping." +
" If not provided or if empty, then metadata from inputCol is used instead.")
setDefault(labels, Array.empty[String])
/**
* Optional labels to be provided by the user, if not supplied column
* metadata is read for labels.
* @group getParam
*/
/** @group getParam */
final def getLabels: Array[String] = $(labels)
/** Transform the schema for the inverse transformation */
override def transformSchema(schema: StructType): StructType = {
val inputColName = $(inputCol)
val inputDataType = schema(inputColName).dataType

View file

@ -25,7 +25,7 @@ import org.apache.spark.sql.types.StructField
/**
* Helper utilities for tree-based algorithms
* Helper utilities for algorithms using ML metadata
*/
private[spark] object MetadataUtils {

View file

@ -985,17 +985,17 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
A :py:class:`Transformer` that maps a column of string indices back to a new column of
corresponding string values using either the ML attributes of the input column, or if
provided using the labels supplied by the user.
All original columns are kept during transformation.
A :py:class:`Transformer` that maps a column of indices back to a new column of
corresponding string values.
The index-string mapping is either from the ML attributes of the input column,
or from user-supplied labels (which take precedence over ML attributes).
See L{StringIndexer} for converting strings into indices.
"""
# a placeholder to make the labels show up in generated doc
labels = Param(Params._dummy(), "labels",
"Optional array of labels to be provided by the user, if not supplied or " +
"empty, column metadata is read for labels")
"Optional array of labels specifying index-string mapping." +
" If not provided or if empty, then metadata from inputCol is used instead.")
@keyword_only
def __init__(self, inputCol=None, outputCol=None, labels=None):
@ -1006,8 +1006,8 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IndexToString",
self.uid)
self.labels = Param(self, "labels",
"Optional array of labels to be provided by the user, if not " +
"supplied or empty, column metadata is read for labels")
"Optional array of labels specifying index-string mapping. If not" +
" provided or if empty, then metadata from inputCol is used instead.")
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)