[SPARK-20258][DOC][SPARKR] Fix SparkR logistic regression example in programming guide (did not converge)
## What changes were proposed in this pull request? SparkR logistic regression example did not converge in programming guide (for IRWLS). All estimates are essentially zero: ``` training2 <- read.df("data/mllib/sample_binary_classification_data.txt", source = "libsvm") df_list2 <- randomSplit(training2, c(7,3), 2) binomialDF <- df_list2[[1]] binomialTestDF <- df_list2[[2]] binomialGLM <- spark.glm(binomialDF, label ~ features, family = "binomial") 17/04/07 11:42:03 WARN WeightedLeastSquares: Cholesky solver failed due to singular covariance matrix. Retrying with Quasi-Newton solver. > summary(binomialGLM) Coefficients: Estimate (Intercept) 9.0255e+00 features_0 0.0000e+00 features_1 0.0000e+00 features_2 0.0000e+00 features_3 0.0000e+00 features_4 0.0000e+00 features_5 0.0000e+00 features_6 0.0000e+00 features_7 0.0000e+00 ``` Author: actuaryzhang <actuaryzhang10@gmail.com> Closes #17571 from actuaryzhang/programGuide2.
This commit is contained in:
parent
8feb799af0
commit
1ad73f0a21
|
@ -27,7 +27,7 @@ sparkR.session(appName = "SparkR-ML-glm-example")
|
||||||
# $example on$
|
# $example on$
|
||||||
training <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
|
training <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
|
||||||
# Fit a generalized linear model of family "gaussian" with spark.glm
|
# Fit a generalized linear model of family "gaussian" with spark.glm
|
||||||
df_list <- randomSplit(training, c(7,3), 2)
|
df_list <- randomSplit(training, c(7, 3), 2)
|
||||||
gaussianDF <- df_list[[1]]
|
gaussianDF <- df_list[[1]]
|
||||||
gaussianTestDF <- df_list[[2]]
|
gaussianTestDF <- df_list[[2]]
|
||||||
gaussianGLM <- spark.glm(gaussianDF, label ~ features, family = "gaussian")
|
gaussianGLM <- spark.glm(gaussianDF, label ~ features, family = "gaussian")
|
||||||
|
@ -44,8 +44,9 @@ gaussianGLM2 <- glm(label ~ features, gaussianDF, family = "gaussian")
|
||||||
summary(gaussianGLM2)
|
summary(gaussianGLM2)
|
||||||
|
|
||||||
# Fit a generalized linear model of family "binomial" with spark.glm
|
# Fit a generalized linear model of family "binomial" with spark.glm
|
||||||
training2 <- read.df("data/mllib/sample_binary_classification_data.txt", source = "libsvm")
|
training2 <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
|
||||||
df_list2 <- randomSplit(training2, c(7,3), 2)
|
training2 <- transform(training2, label = cast(training2$label > 1, "integer"))
|
||||||
|
df_list2 <- randomSplit(training2, c(7, 3), 2)
|
||||||
binomialDF <- df_list2[[1]]
|
binomialDF <- df_list2[[1]]
|
||||||
binomialTestDF <- df_list2[[2]]
|
binomialTestDF <- df_list2[[2]]
|
||||||
binomialGLM <- spark.glm(binomialDF, label ~ features, family = "binomial")
|
binomialGLM <- spark.glm(binomialDF, label ~ features, family = "binomial")
|
||||||
|
|
Loading…
Reference in a new issue