Data are read from a file where each line has a format label,feature
i.e. 4710.28,500.00. The data are split to training and testing set.
Model is created using the training set and a mean squared error is calculated from the predicted
labels and real labels in the test set.
{% highlight scala %}
import org.apache.spark.mllib.regression.{IsotonicRegression, IsotonicRegressionModel}
val data = sc.textFile("data/mllib/sample_isotonic_regression_data.txt")
// Create label, feature, weight tuples from input data with weight set to default value 1.0.
val parsedData = data.map { line =>
val parts = line.split(',').map(_.toDouble)
(parts(0), parts(1), 1.0)
}
// Split data into training (60%) and test (40%) sets.
val splits = parsedData.randomSplit(Array(0.6, 0.4), seed = 11L)
val training = splits(0)
val test = splits(1)
// Create isotonic regression model from training data.
// Isotonic parameter defaults to true so it is only shown for demonstration
val model = new IsotonicRegression().setIsotonic(true).run(training)
// Create tuples of predicted and real labels.
val predictionAndLabel = test.map { point =>
val predictedLabel = model.predict(point._2)
(predictedLabel, point._1)
}
// Calculate mean squared error between predicted and real labels.
val meanSquaredError = predictionAndLabel.map{case(p, l) => math.pow((p - l), 2)}.mean()
println("Mean Squared Error = " + meanSquaredError)
// Save and load model
model.save(sc, "myModelPath")
val sameModel = IsotonicRegressionModel.load(sc, "myModelPath")
{% endhighlight %}
Data are read from a file where each line has a format label,feature
i.e. 4710.28,500.00. The data are split to training and testing set.
Model is created using the training set and a mean squared error is calculated from the predicted
labels and real labels in the test set.
{% highlight java %}
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaDoubleRDD;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.mllib.regression.IsotonicRegressionModel;
import scala.Tuple2;
import scala.Tuple3;
JavaRDD data = sc.textFile("data/mllib/sample_isotonic_regression_data.txt");
// Create label, feature, weight tuples from input data with weight set to default value 1.0.
JavaRDD> parsedData = data.map(
new Function>() {
public Tuple3 call(String line) {
String[] parts = line.split(",");
return new Tuple3<>(new Double(parts[0]), new Double(parts[1]), 1.0);
}
}
);
// Split data into training (60%) and test (40%) sets.
JavaRDD>[] splits = parsedData.randomSplit(new double[] {0.6, 0.4}, 11L);
JavaRDD> training = splits[0];
JavaRDD> test = splits[1];
// Create isotonic regression model from training data.
// Isotonic parameter defaults to true so it is only shown for demonstration
IsotonicRegressionModel model = new IsotonicRegression().setIsotonic(true).run(training);
// Create tuples of predicted and real labels.
JavaPairRDD predictionAndLabel = test.mapToPair(
new PairFunction, Double, Double>() {
@Override public Tuple2 call(Tuple3 point) {
Double predictedLabel = model.predict(point._2());
return new Tuple2(predictedLabel, point._1());
}
}
);
// Calculate mean squared error between predicted and real labels.
Double meanSquaredError = new JavaDoubleRDD(predictionAndLabel.map(
new Function, Object>() {
@Override public Object call(Tuple2 pl) {
return Math.pow(pl._1() - pl._2(), 2);
}
}
).rdd()).mean();
System.out.println("Mean Squared Error = " + meanSquaredError);
// Save and load model
model.save(sc.sc(), "myModelPath");
IsotonicRegressionModel sameModel = IsotonicRegressionModel.load(sc.sc(), "myModelPath");
{% endhighlight %}
Data are read from a file where each line has a format label,feature
i.e. 4710.28,500.00. The data are split to training and testing set.
Model is created using the training set and a mean squared error is calculated from the predicted
labels and real labels in the test set.
{% highlight python %}
import math
from pyspark.mllib.regression import IsotonicRegression, IsotonicRegressionModel
data = sc.textFile("data/mllib/sample_isotonic_regression_data.txt")
# Create label, feature, weight tuples from input data with weight set to default value 1.0.
parsedData = data.map(lambda line: tuple([float(x) for x in line.split(',')]) + (1.0,))
# Split data into training (60%) and test (40%) sets.
training, test = parsedData.randomSplit([0.6, 0.4], 11)
# Create isotonic regression model from training data.
# Isotonic parameter defaults to true so it is only shown for demonstration
model = IsotonicRegression.train(training)
# Create tuples of predicted and real labels.
predictionAndLabel = test.map(lambda p: (model.predict(p[1]), p[0]))
# Calculate mean squared error between predicted and real labels.
meanSquaredError = predictionAndLabel.map(lambda pl: math.pow((pl[0] - pl[1]), 2)).mean()
print("Mean Squared Error = " + str(meanSquaredError))
# Save and load model
model.save(sc, "myModelPath")
sameModel = IsotonicRegressionModel.load(sc, "myModelPath")
{% endhighlight %}