[SPARK-13770][DOCUMENTATION][ML] Document the ML feature Interaction
I created Scala and Java example and added documentation. Author: chie8842 <hayashidac@nttdata.co.jp> Closes #15658 from hayashidac/SPARK-13770.
This commit is contained in:
parent
c291bd2745
commit
ee2e741ac1
|
@ -729,6 +729,58 @@ for more details on the API.
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
## Interaction
|
||||||
|
|
||||||
|
`Interaction` is a `Transformer` which takes vector or double-valued columns, and generates a single vector column that contains the product of all combinations of one value from each input column.
|
||||||
|
|
||||||
|
For example, if you have 2 vector type columns each of which has 3 dimensions as input columns, then then you'll get a 9-dimensional vector as the output column.
|
||||||
|
|
||||||
|
**Examples**
|
||||||
|
|
||||||
|
Assume that we have the following DataFrame with the columns "id1", "vec1", and "vec2":
|
||||||
|
|
||||||
|
~~~~
|
||||||
|
id1|vec1 |vec2
|
||||||
|
---|--------------|--------------
|
||||||
|
1 |[1.0,2.0,3.0] |[8.0,4.0,5.0]
|
||||||
|
2 |[4.0,3.0,8.0] |[7.0,9.0,8.0]
|
||||||
|
3 |[6.0,1.0,9.0] |[2.0,3.0,6.0]
|
||||||
|
4 |[10.0,8.0,6.0]|[9.0,4.0,5.0]
|
||||||
|
5 |[9.0,2.0,7.0] |[10.0,7.0,3.0]
|
||||||
|
6 |[1.0,1.0,4.0] |[2.0,8.0,4.0]
|
||||||
|
~~~~
|
||||||
|
|
||||||
|
Applying `Interaction` with those input columns,
|
||||||
|
then `interactedCol` as the output column contains:
|
||||||
|
|
||||||
|
~~~~
|
||||||
|
id1|vec1 |vec2 |interactedCol
|
||||||
|
---|--------------|--------------|------------------------------------------------------
|
||||||
|
1 |[1.0,2.0,3.0] |[8.0,4.0,5.0] |[8.0,4.0,5.0,16.0,8.0,10.0,24.0,12.0,15.0]
|
||||||
|
2 |[4.0,3.0,8.0] |[7.0,9.0,8.0] |[56.0,72.0,64.0,42.0,54.0,48.0,112.0,144.0,128.0]
|
||||||
|
3 |[6.0,1.0,9.0] |[2.0,3.0,6.0] |[36.0,54.0,108.0,6.0,9.0,18.0,54.0,81.0,162.0]
|
||||||
|
4 |[10.0,8.0,6.0]|[9.0,4.0,5.0] |[360.0,160.0,200.0,288.0,128.0,160.0,216.0,96.0,120.0]
|
||||||
|
5 |[9.0,2.0,7.0] |[10.0,7.0,3.0]|[450.0,315.0,135.0,100.0,70.0,30.0,350.0,245.0,105.0]
|
||||||
|
6 |[1.0,1.0,4.0] |[2.0,8.0,4.0] |[12.0,48.0,24.0,12.0,48.0,24.0,48.0,192.0,96.0]
|
||||||
|
~~~~
|
||||||
|
|
||||||
|
<div class="codetabs">
|
||||||
|
<div data-lang="scala" markdown="1">
|
||||||
|
|
||||||
|
Refer to the [Interaction Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Interaction)
|
||||||
|
for more details on the API.
|
||||||
|
|
||||||
|
{% include_example scala/org/apache/spark/examples/ml/InteractionExample.scala %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div data-lang="java" markdown="1">
|
||||||
|
|
||||||
|
Refer to the [Interaction Java docs](api/java/org/apache/spark/ml/feature/Interaction.html)
|
||||||
|
for more details on the API.
|
||||||
|
|
||||||
|
{% include_example java/org/apache/spark/examples/ml/JavaInteractionExample.java %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
## Normalizer
|
## Normalizer
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.spark.examples.ml;
|
||||||
|
|
||||||
|
import org.apache.spark.ml.feature.Interaction;
|
||||||
|
import org.apache.spark.ml.feature.VectorAssembler;
|
||||||
|
import org.apache.spark.ml.linalg.Vectors;
|
||||||
|
import org.apache.spark.sql.*;
|
||||||
|
import org.apache.spark.sql.types.DataTypes;
|
||||||
|
import org.apache.spark.sql.types.Metadata;
|
||||||
|
import org.apache.spark.sql.types.StructField;
|
||||||
|
import org.apache.spark.sql.types.StructType;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
// $example on$
|
||||||
|
// $example off$
|
||||||
|
|
||||||
|
public class JavaInteractionExample {
|
||||||
|
public static void main(String[] args) {
|
||||||
|
SparkSession spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName("JavaInteractionExample")
|
||||||
|
.getOrCreate();
|
||||||
|
|
||||||
|
// $example on$
|
||||||
|
List<Row> data = Arrays.asList(
|
||||||
|
RowFactory.create(1, 1, 2, 3, 8, 4, 5),
|
||||||
|
RowFactory.create(2, 4, 3, 8, 7, 9, 8),
|
||||||
|
RowFactory.create(3, 6, 1, 9, 2, 3, 6),
|
||||||
|
RowFactory.create(4, 10, 8, 6, 9, 4, 5),
|
||||||
|
RowFactory.create(5, 9, 2, 7, 10, 7, 3),
|
||||||
|
RowFactory.create(6, 1, 1, 4, 2, 8, 4)
|
||||||
|
);
|
||||||
|
|
||||||
|
StructType schema = new StructType(new StructField[]{
|
||||||
|
new StructField("id1", DataTypes.IntegerType, false, Metadata.empty()),
|
||||||
|
new StructField("id2", DataTypes.IntegerType, false, Metadata.empty()),
|
||||||
|
new StructField("id3", DataTypes.IntegerType, false, Metadata.empty()),
|
||||||
|
new StructField("id4", DataTypes.IntegerType, false, Metadata.empty()),
|
||||||
|
new StructField("id5", DataTypes.IntegerType, false, Metadata.empty()),
|
||||||
|
new StructField("id6", DataTypes.IntegerType, false, Metadata.empty()),
|
||||||
|
new StructField("id7", DataTypes.IntegerType, false, Metadata.empty())
|
||||||
|
});
|
||||||
|
|
||||||
|
Dataset<Row> df = spark.createDataFrame(data, schema);
|
||||||
|
|
||||||
|
VectorAssembler assembler1 = new VectorAssembler()
|
||||||
|
.setInputCols(new String[]{"id2", "id3", "id4"})
|
||||||
|
.setOutputCol("vec1");
|
||||||
|
|
||||||
|
Dataset<Row> assembled1 = assembler1.transform(df);
|
||||||
|
|
||||||
|
VectorAssembler assembler2 = new VectorAssembler()
|
||||||
|
.setInputCols(new String[]{"id5", "id6", "id7"})
|
||||||
|
.setOutputCol("vec2");
|
||||||
|
|
||||||
|
Dataset<Row> assembled2 = assembler2.transform(assembled1).select("id1", "vec1", "vec2");
|
||||||
|
|
||||||
|
Interaction interaction = new Interaction()
|
||||||
|
.setInputCols(new String[]{"id1","vec1","vec2"})
|
||||||
|
.setOutputCol("interactedCol");
|
||||||
|
|
||||||
|
Dataset<Row> interacted = interaction.transform(assembled2);
|
||||||
|
|
||||||
|
interacted.show(false);
|
||||||
|
// $example off$
|
||||||
|
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// scalastyle:off println
|
||||||
|
package org.apache.spark.examples.ml
|
||||||
|
|
||||||
|
// $example on$
|
||||||
|
import org.apache.spark.ml.feature.Interaction
|
||||||
|
import org.apache.spark.ml.feature.VectorAssembler
|
||||||
|
// $example off$
|
||||||
|
import org.apache.spark.sql.SparkSession
|
||||||
|
|
||||||
|
object InteractionExample {
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
val spark = SparkSession
|
||||||
|
.builder
|
||||||
|
.appName("InteractionExample")
|
||||||
|
.getOrCreate()
|
||||||
|
|
||||||
|
// $example on$
|
||||||
|
val df = spark.createDataFrame(Seq(
|
||||||
|
(1, 1, 2, 3, 8, 4, 5),
|
||||||
|
(2, 4, 3, 8, 7, 9, 8),
|
||||||
|
(3, 6, 1, 9, 2, 3, 6),
|
||||||
|
(4, 10, 8, 6, 9, 4, 5),
|
||||||
|
(5, 9, 2, 7, 10, 7, 3),
|
||||||
|
(6, 1, 1, 4, 2, 8, 4)
|
||||||
|
)).toDF("id1", "id2", "id3", "id4", "id5", "id6", "id7")
|
||||||
|
|
||||||
|
val assembler1 = new VectorAssembler().
|
||||||
|
setInputCols(Array("id2", "id3", "id4")).
|
||||||
|
setOutputCol("vec1")
|
||||||
|
|
||||||
|
val assembled1 = assembler1.transform(df)
|
||||||
|
|
||||||
|
val assembler2 = new VectorAssembler().
|
||||||
|
setInputCols(Array("id5", "id6", "id7")).
|
||||||
|
setOutputCol("vec2")
|
||||||
|
|
||||||
|
val assembled2 = assembler2.transform(assembled1).select("id1", "vec1", "vec2")
|
||||||
|
|
||||||
|
val interaction = new Interaction()
|
||||||
|
.setInputCols(Array("id1", "vec1", "vec2"))
|
||||||
|
.setOutputCol("interactedCol")
|
||||||
|
|
||||||
|
val interacted = interaction.transform(assembled2)
|
||||||
|
|
||||||
|
interacted.show(truncate = false)
|
||||||
|
// $example off$
|
||||||
|
|
||||||
|
spark.stop()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// scalastyle:on println
|
Loading…
Reference in a new issue