[SPARK-8661][ML] for LinearRegressionSuite.scala, changed javadoc-style comments to regular multiline comments, to make copy-pasting R code more simple

for mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala, changed javadoc-style comments to regular multiline comments, to make copy-pasting R code more simple

Author: Rosstin <asterazul@gmail.com>

Closes #7098 from Rosstin/SPARK-8661 and squashes the following commits:

5a05dee [Rosstin] SPARK-8661 for LinearRegressionSuite.scala, changed javadoc-style comments to regular multiline comments to make it easier to copy-paste the R code.
bb9a4b1 [Rosstin] Merge branch 'master' of github.com:apache/spark into SPARK-8660
242aedd [Rosstin] SPARK-8660, changed comment style from JavaDoc style to normal multiline comment in order to make copypaste into R easier, in file classification/LogisticRegressionSuite.scala
2cd2985 [Rosstin] Merge branch 'master' of github.com:apache/spark into SPARK-8639
21ac1e5 [Rosstin] Merge branch 'master' of github.com:apache/spark into SPARK-8639
6c18058 [Rosstin] fixed minor typos in docs/README.md and docs/api.md
This commit is contained in:
Rosstin 2015-06-29 16:09:29 -07:00 committed by Reynold Xin
parent ed359de595
commit 4e880cf596

View file

@ -28,26 +28,26 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
@transient var dataset: DataFrame = _
@transient var datasetWithoutIntercept: DataFrame = _
/**
* In `LinearRegressionSuite`, we will make sure that the model trained by SparkML
* is the same as the one trained by R's glmnet package. The following instruction
* describes how to reproduce the data in R.
*
* import org.apache.spark.mllib.util.LinearDataGenerator
* val data =
* sc.parallelize(LinearDataGenerator.generateLinearInput(6.3, Array(4.7, 7.2),
* Array(0.9, -1.3), Array(0.7, 1.2), 10000, 42, 0.1), 2)
* data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1)).coalesce(1)
* .saveAsTextFile("path")
/*
In `LinearRegressionSuite`, we will make sure that the model trained by SparkML
is the same as the one trained by R's glmnet package. The following instruction
describes how to reproduce the data in R.
import org.apache.spark.mllib.util.LinearDataGenerator
val data =
sc.parallelize(LinearDataGenerator.generateLinearInput(6.3, Array(4.7, 7.2),
Array(0.9, -1.3), Array(0.7, 1.2), 10000, 42, 0.1), 2)
data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1)).coalesce(1)
.saveAsTextFile("path")
*/
override def beforeAll(): Unit = {
super.beforeAll()
dataset = sqlContext.createDataFrame(
sc.parallelize(LinearDataGenerator.generateLinearInput(
6.3, Array(4.7, 7.2), Array(0.9, -1.3), Array(0.7, 1.2), 10000, 42, 0.1), 2))
/**
* datasetWithoutIntercept is not needed for correctness testing but is useful for illustrating
* training model without intercept
/*
datasetWithoutIntercept is not needed for correctness testing but is useful for illustrating
training model without intercept
*/
datasetWithoutIntercept = sqlContext.createDataFrame(
sc.parallelize(LinearDataGenerator.generateLinearInput(
@ -59,20 +59,20 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val trainer = new LinearRegression
val model = trainer.fit(dataset)
/**
* Using the following R code to load the data and train the model using glmnet package.
*
* library("glmnet")
* data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE)
* features <- as.matrix(data.frame(as.numeric(data$V2), as.numeric(data$V3)))
* label <- as.numeric(data$V1)
* weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0))
* > weights
* 3 x 1 sparse Matrix of class "dgCMatrix"
* s0
* (Intercept) 6.300528
* as.numeric.data.V2. 4.701024
* as.numeric.data.V3. 7.198257
/*
Using the following R code to load the data and train the model using glmnet package.
library("glmnet")
data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE)
features <- as.matrix(data.frame(as.numeric(data$V2), as.numeric(data$V3)))
label <- as.numeric(data$V1)
weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0))
> weights
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.300528
as.numeric.data.V2. 4.701024
as.numeric.data.V3. 7.198257
*/
val interceptR = 6.298698
val weightsR = Array(4.700706, 7.199082)
@ -94,29 +94,29 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model = trainer.fit(dataset)
val modelWithoutIntercept = trainer.fit(datasetWithoutIntercept)
/**
* weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0,
* intercept = FALSE))
* > weights
* 3 x 1 sparse Matrix of class "dgCMatrix"
* s0
* (Intercept) .
* as.numeric.data.V2. 6.995908
* as.numeric.data.V3. 5.275131
/*
weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0,
intercept = FALSE))
> weights
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
as.numeric.data.V2. 6.995908
as.numeric.data.V3. 5.275131
*/
val weightsR = Array(6.995908, 5.275131)
assert(model.intercept ~== 0 relTol 1E-3)
assert(model.weights(0) ~== weightsR(0) relTol 1E-3)
assert(model.weights(1) ~== weightsR(1) relTol 1E-3)
/**
* Then again with the data with no intercept:
* > weightsWithoutIntercept
* 3 x 1 sparse Matrix of class "dgCMatrix"
* s0
* (Intercept) .
* as.numeric.data3.V2. 4.70011
* as.numeric.data3.V3. 7.19943
/*
Then again with the data with no intercept:
> weightsWithoutIntercept
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
as.numeric.data3.V2. 4.70011
as.numeric.data3.V3. 7.19943
*/
val weightsWithoutInterceptR = Array(4.70011, 7.19943)
@ -129,14 +129,14 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val trainer = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
val model = trainer.fit(dataset)
/**
* weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57))
* > weights
* 3 x 1 sparse Matrix of class "dgCMatrix"
* s0
* (Intercept) 6.24300
* as.numeric.data.V2. 4.024821
* as.numeric.data.V3. 6.679841
/*
weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57))
> weights
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.24300
as.numeric.data.V2. 4.024821
as.numeric.data.V3. 6.679841
*/
val interceptR = 6.24300
val weightsR = Array(4.024821, 6.679841)
@ -158,15 +158,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
.setFitIntercept(false)
val model = trainer.fit(dataset)
/**
* weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57,
* intercept=FALSE))
* > weights
* 3 x 1 sparse Matrix of class "dgCMatrix"
* s0
* (Intercept) .
* as.numeric.data.V2. 6.299752
* as.numeric.data.V3. 4.772913
/*
weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57,
intercept=FALSE))
> weights
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
as.numeric.data.V2. 6.299752
as.numeric.data.V3. 4.772913
*/
val interceptR = 0.0
val weightsR = Array(6.299752, 4.772913)
@ -187,14 +187,14 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val trainer = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
val model = trainer.fit(dataset)
/**
* weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3))
* > weights
* 3 x 1 sparse Matrix of class "dgCMatrix"
* s0
* (Intercept) 6.328062
* as.numeric.data.V2. 3.222034
* as.numeric.data.V3. 4.926260
/*
weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3))
> weights
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.328062
as.numeric.data.V2. 3.222034
as.numeric.data.V3. 4.926260
*/
val interceptR = 5.269376
val weightsR = Array(3.736216, 5.712356)
@ -216,15 +216,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
.setFitIntercept(false)
val model = trainer.fit(dataset)
/**
* weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
* intercept = FALSE))
* > weights
* 3 x 1 sparse Matrix of class "dgCMatrix"
* s0
* (Intercept) .
* as.numeric.data.V2. 5.522875
* as.numeric.data.V3. 4.214502
/*
weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
intercept = FALSE))
> weights
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
as.numeric.data.V2. 5.522875
as.numeric.data.V3. 4.214502
*/
val interceptR = 0.0
val weightsR = Array(5.522875, 4.214502)
@ -245,14 +245,14 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val trainer = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
val model = trainer.fit(dataset)
/**
* weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6))
* > weights
* 3 x 1 sparse Matrix of class "dgCMatrix"
* s0
* (Intercept) 6.324108
* as.numeric.data.V2. 3.168435
* as.numeric.data.V3. 5.200403
/*
weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6))
> weights
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.324108
as.numeric.data.V2. 3.168435
as.numeric.data.V3. 5.200403
*/
val interceptR = 5.696056
val weightsR = Array(3.670489, 6.001122)
@ -274,15 +274,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
.setFitIntercept(false)
val model = trainer.fit(dataset)
/**
* weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6,
* intercept=FALSE))
* > weights
* 3 x 1 sparse Matrix of class "dgCMatrix"
* s0
* (Intercept) .
* as.numeric.dataM.V2. 5.673348
* as.numeric.dataM.V3. 4.322251
/*
weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6,
intercept=FALSE))
> weights
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
as.numeric.dataM.V2. 5.673348
as.numeric.dataM.V3. 4.322251
*/
val interceptR = 0.0
val weightsR = Array(5.673348, 4.322251)