[SPARK-15134][EXAMPLE] Indent SparkSession builder patterns and update binary_classification_metrics_example.py
## What changes were proposed in this pull request? This issue addresses the comments in SPARK-15031 and also fix java-linter errors. - Use multiline format in SparkSession builder patterns. - Update `binary_classification_metrics_example.py` to use `SparkSession`. - Fix Java Linter errors (in SPARK-13745, SPARK-15031, and so far) ## How was this patch tested? After passing the Jenkins tests and run `dev/lint-java` manually. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #12911 from dongjoon-hyun/SPARK-15134.
This commit is contained in:
parent
bb9991dec5
commit
2c170dd3d7
|
@ -87,8 +87,11 @@ public class ExternalShuffleBlockHandler extends RpcHandler {
|
|||
blocks.add(blockManager.getBlockData(msg.appId, msg.execId, blockId));
|
||||
}
|
||||
long streamId = streamManager.registerStream(client.getClientId(), blocks.iterator());
|
||||
logger.trace("Registered streamId {} with {} buffers for client {} from host {}", streamId,
|
||||
msg.blockIds.length, client.getClientId(), NettyUtils.getRemoteAddress(client.getChannel()));
|
||||
logger.trace("Registered streamId {} with {} buffers for client {} from host {}",
|
||||
streamId,
|
||||
msg.blockIds.length,
|
||||
client.getClientId(),
|
||||
NettyUtils.getRemoteAddress(client.getChannel()));
|
||||
callback.onSuccess(new StreamHandle(streamId, msg.blockIds.length).toByteBuffer());
|
||||
|
||||
} else if (msgObj instanceof RegisterExecutor) {
|
||||
|
|
|
@ -33,7 +33,10 @@ import org.apache.spark.sql.types.*;
|
|||
|
||||
public class JavaAFTSurvivalRegressionExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaAFTSurvivalRegressionExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaAFTSurvivalRegressionExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
List<Row> data = Arrays.asList(
|
||||
|
|
|
@ -81,7 +81,10 @@ public class JavaALSExample {
|
|||
// $example off$
|
||||
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaALSExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaALSExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
JavaRDD<Rating> ratingsRDD = spark
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
|
||||
package org.apache.spark.examples.ml;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
|
@ -26,7 +24,6 @@ import org.apache.spark.sql.SparkSession;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.ml.feature.Binarizer;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.RowFactory;
|
||||
|
@ -38,7 +35,10 @@ import org.apache.spark.sql.types.StructType;
|
|||
|
||||
public class JavaBinarizerExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaBinarizerExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaBinarizerExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
List<Row> data = Arrays.asList(
|
||||
|
|
|
@ -42,7 +42,10 @@ import org.apache.spark.sql.types.StructType;
|
|||
public class JavaBisectingKMeansExample {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaBisectingKMeansExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaBisectingKMeansExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
List<Row> data = Arrays.asList(
|
||||
|
|
|
@ -35,7 +35,10 @@ import org.apache.spark.sql.types.StructType;
|
|||
|
||||
public class JavaBucketizerExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaBucketizerExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaBucketizerExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY};
|
||||
|
|
|
@ -17,9 +17,6 @@
|
|||
|
||||
package org.apache.spark.examples.ml;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
|
@ -40,7 +37,10 @@ import org.apache.spark.sql.types.StructType;
|
|||
|
||||
public class JavaChiSqSelectorExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaChiSqSelectorExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaChiSqSelectorExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
List<Row> data = Arrays.asList(
|
||||
|
|
|
@ -32,7 +32,10 @@ import org.apache.spark.sql.types.*;
|
|||
|
||||
public class JavaCountVectorizerExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaCountVectorizerExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaCountVectorizerExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
// Input data: Each row is a bag of words from a sentence or document.
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
|
||||
package org.apache.spark.examples.ml;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
|
@ -26,7 +24,6 @@ import org.apache.spark.sql.SparkSession;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.ml.feature.DCT;
|
||||
import org.apache.spark.mllib.linalg.VectorUDT;
|
||||
import org.apache.spark.mllib.linalg.Vectors;
|
||||
|
@ -39,7 +36,10 @@ import org.apache.spark.sql.types.StructType;
|
|||
|
||||
public class JavaDCTExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaDCTExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaDCTExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
List<Row> data = Arrays.asList(
|
||||
|
|
|
@ -49,7 +49,10 @@ import org.apache.spark.sql.SparkSession;
|
|||
public class JavaDeveloperApiExample {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaDeveloperApiExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaDeveloperApiExample")
|
||||
.getOrCreate();
|
||||
|
||||
// Prepare training data.
|
||||
List<LabeledPoint> localTraining = Lists.newArrayList(
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
|
||||
package org.apache.spark.examples.ml;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
|
@ -27,7 +25,6 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.ml.feature.ElementwiseProduct;
|
||||
import org.apache.spark.mllib.linalg.Vector;
|
||||
import org.apache.spark.mllib.linalg.VectorUDT;
|
||||
|
@ -42,7 +39,9 @@ import org.apache.spark.sql.types.StructType;
|
|||
public class JavaElementwiseProductExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession
|
||||
.builder().appName("JavaElementwiseProductExample").getOrCreate();
|
||||
.builder()
|
||||
.appName("JavaElementwiseProductExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
// Create some vector data; also works for sparse vectors
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
|
||||
package org.apache.spark.examples.ml;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
// $example on$
|
||||
import org.apache.spark.ml.Pipeline;
|
||||
import org.apache.spark.ml.PipelineModel;
|
||||
|
@ -35,11 +33,15 @@ import org.apache.spark.sql.SparkSession;
|
|||
public class JavaGradientBoostedTreeClassifierExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession
|
||||
.builder().appName("JavaGradientBoostedTreeClassifierExample").getOrCreate();
|
||||
.builder()
|
||||
.appName("JavaGradientBoostedTreeClassifierExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
// Load and parse the data file, converting it to a DataFrame.
|
||||
Dataset<Row> data = spark.read().format("libsvm")
|
||||
Dataset<Row> data = spark
|
||||
.read()
|
||||
.format("libsvm")
|
||||
.load("data/mllib/sample_libsvm_data.txt");
|
||||
|
||||
// Index labels, adding metadata to the label column.
|
||||
|
|
|
@ -37,7 +37,10 @@ import org.apache.spark.sql.types.StructType;
|
|||
|
||||
public class JavaIndexToStringExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaIndexToStringExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaIndexToStringExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
List<Row> data = Arrays.asList(
|
||||
|
|
|
@ -70,7 +70,10 @@ public class JavaKMeansExample {
|
|||
int k = Integer.parseInt(args[1]);
|
||||
|
||||
// Parses the arguments
|
||||
SparkSession spark = SparkSession.builder().appName("JavaKMeansExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaKMeansExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
// Loads data
|
||||
|
|
|
@ -65,7 +65,10 @@ public class JavaLDAExample {
|
|||
String inputFile = "data/mllib/sample_lda_data.txt";
|
||||
|
||||
// Parses the arguments
|
||||
SparkSession spark = SparkSession.builder().appName("JavaLDAExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaLDAExample")
|
||||
.getOrCreate();
|
||||
|
||||
// Loads data
|
||||
JavaRDD<Row> points = spark.read().text(inputFile).javaRDD().map(new ParseVector());
|
||||
|
|
|
@ -28,13 +28,19 @@ import org.apache.spark.sql.SparkSession;
|
|||
public class JavaMaxAbsScalerExample {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaMaxAbsScalerExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaMaxAbsScalerExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
Dataset<Row> dataFrame = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
|
||||
Dataset<Row> dataFrame = spark
|
||||
.read()
|
||||
.format("libsvm")
|
||||
.load("data/mllib/sample_libsvm_data.txt");
|
||||
MaxAbsScaler scaler = new MaxAbsScaler()
|
||||
.setInputCol("features")
|
||||
.setOutputCol("scaledFeatures");
|
||||
.setInputCol("features")
|
||||
.setOutputCol("scaledFeatures");
|
||||
|
||||
// Compute summary statistics and generate MaxAbsScalerModel
|
||||
MaxAbsScalerModel scalerModel = scaler.fit(dataFrame);
|
||||
|
|
|
@ -28,10 +28,16 @@ import org.apache.spark.sql.Row;
|
|||
|
||||
public class JavaMinMaxScalerExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaMinMaxScalerExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaMinMaxScalerExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
Dataset<Row> dataFrame = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
|
||||
Dataset<Row> dataFrame = spark
|
||||
.read()
|
||||
.format("libsvm")
|
||||
.load("data/mllib/sample_libsvm_data.txt");
|
||||
MinMaxScaler scaler = new MinMaxScaler()
|
||||
.setInputCol("features")
|
||||
.setOutputCol("scaledFeatures");
|
||||
|
|
|
@ -35,7 +35,10 @@ import org.apache.spark.sql.types.StructType;
|
|||
|
||||
public class JavaNGramExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaNGramExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaNGramExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
List<Row> data = Arrays.asList(
|
||||
|
|
|
@ -32,7 +32,10 @@ import org.apache.spark.sql.SparkSession;
|
|||
public class JavaNaiveBayesExample {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaNaiveBayesExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaNaiveBayesExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
// Load training data
|
||||
|
|
|
@ -27,7 +27,10 @@ import org.apache.spark.sql.Row;
|
|||
|
||||
public class JavaNormalizerExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaNormalizerExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaNormalizerExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
Dataset<Row> dataFrame =
|
||||
|
|
|
@ -37,7 +37,10 @@ import org.apache.spark.sql.types.StructType;
|
|||
|
||||
public class JavaOneHotEncoderExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaOneHotEncoderExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaOneHotEncoderExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
List<Row> data = Arrays.asList(
|
||||
|
|
|
@ -58,7 +58,10 @@ public class JavaOneVsRestExample {
|
|||
public static void main(String[] args) {
|
||||
// parse the arguments
|
||||
Params params = parse(args);
|
||||
SparkSession spark = SparkSession.builder().appName("JavaOneVsRestExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaOneVsRestExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
// configure the base classifier
|
||||
|
|
|
@ -37,7 +37,10 @@ import org.apache.spark.sql.types.StructType;
|
|||
|
||||
public class JavaPCAExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaPCAExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaPCAExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
List<Row> data = Arrays.asList(
|
||||
|
|
|
@ -36,7 +36,10 @@ import org.apache.spark.sql.SparkSession;
|
|||
*/
|
||||
public class JavaPipelineExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaPipelineExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaPipelineExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
// Prepare training documents, which are labeled.
|
||||
|
|
|
@ -36,7 +36,10 @@ import org.apache.spark.sql.types.StructType;
|
|||
|
||||
public class JavaPolynomialExpansionExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaPolynomialExpansionExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaPolynomialExpansionExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
PolynomialExpansion polyExpansion = new PolynomialExpansion()
|
||||
|
|
|
@ -35,7 +35,10 @@ import static org.apache.spark.sql.types.DataTypes.*;
|
|||
|
||||
public class JavaRFormulaExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaRFormulaExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaRFormulaExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
StructType schema = createStructType(new StructField[]{
|
||||
|
|
|
@ -31,7 +31,10 @@ import org.apache.spark.sql.types.*;
|
|||
|
||||
public class JavaSQLTransformerExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaSQLTransformerExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaSQLTransformerExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
List<Row> data = Arrays.asList(
|
||||
|
|
|
@ -40,7 +40,10 @@ import org.apache.spark.sql.SparkSession;
|
|||
public class JavaSimpleParamsExample {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaSimpleParamsExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaSimpleParamsExample")
|
||||
.getOrCreate();
|
||||
|
||||
// Prepare training data.
|
||||
// We use LabeledPoint, which is a JavaBean. Spark SQL can convert RDDs of JavaBeans
|
||||
|
|
|
@ -28,7 +28,10 @@ import org.apache.spark.sql.Row;
|
|||
|
||||
public class JavaStandardScalerExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaStandardScalerExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaStandardScalerExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
Dataset<Row> dataFrame =
|
||||
|
|
|
@ -36,7 +36,10 @@ import org.apache.spark.sql.types.StructType;
|
|||
public class JavaStopWordsRemoverExample {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaStopWordsRemoverExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaStopWordsRemoverExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
StopWordsRemover remover = new StopWordsRemover()
|
||||
|
|
|
@ -35,7 +35,10 @@ import static org.apache.spark.sql.types.DataTypes.*;
|
|||
|
||||
public class JavaStringIndexerExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaStringIndexerExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaStringIndexerExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
List<Row> data = Arrays.asList(
|
||||
|
|
|
@ -38,7 +38,10 @@ import org.apache.spark.sql.types.StructType;
|
|||
|
||||
public class JavaTfIdfExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaTfIdfExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaTfIdfExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
List<Row> data = Arrays.asList(
|
||||
|
|
|
@ -36,7 +36,10 @@ import org.apache.spark.sql.types.StructType;
|
|||
|
||||
public class JavaTokenizerExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaTokenizerExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaTokenizerExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
List<Row> data = Arrays.asList(
|
||||
|
|
|
@ -35,7 +35,10 @@ import static org.apache.spark.sql.types.DataTypes.*;
|
|||
|
||||
public class JavaVectorAssemblerExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaVectorAssemblerExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaVectorAssemblerExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
StructType schema = createStructType(new StructField[]{
|
||||
|
|
|
@ -30,7 +30,10 @@ import org.apache.spark.sql.Row;
|
|||
|
||||
public class JavaVectorIndexerExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaVectorIndexerExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaVectorIndexerExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
Dataset<Row> data = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
|
||||
|
|
|
@ -37,7 +37,10 @@ import org.apache.spark.sql.types.*;
|
|||
|
||||
public class JavaVectorSlicerExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaVectorSlicerExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaVectorSlicerExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
Attribute[] attrs = new Attribute[]{
|
||||
|
|
|
@ -32,7 +32,10 @@ import org.apache.spark.sql.types.*;
|
|||
|
||||
public class JavaWord2VecExample {
|
||||
public static void main(String[] args) {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaWord2VecExample").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaWord2VecExample")
|
||||
.getOrCreate();
|
||||
|
||||
// $example on$
|
||||
// Input data: Each row is a bag of words from a sentence or document.
|
||||
|
|
|
@ -51,7 +51,10 @@ public class JavaSparkSQL {
|
|||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
SparkSession spark = SparkSession.builder().appName("JavaSparkSQL").getOrCreate();
|
||||
SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("JavaSparkSQL")
|
||||
.getOrCreate();
|
||||
|
||||
System.out.println("=== Data source: RDD ===");
|
||||
// Load a text file and convert each line to a Java Bean.
|
||||
|
@ -147,7 +150,8 @@ public class JavaSparkSQL {
|
|||
// a RDD[String] storing one JSON object per string.
|
||||
List<String> jsonData = Arrays.asList(
|
||||
"{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}");
|
||||
JavaRDD<String> anotherPeopleRDD = spark.createDataFrame(jsonData, String.class).toJSON().javaRDD();
|
||||
JavaRDD<String> anotherPeopleRDD = spark
|
||||
.createDataFrame(jsonData, String.class).toJSON().javaRDD();
|
||||
Dataset<Row> peopleFromJsonRDD = spark.read().json(anotherPeopleRDD);
|
||||
|
||||
// Take a look at the schema of this new DataFrame.
|
||||
|
|
|
@ -115,7 +115,10 @@ class JavaSparkSessionSingleton {
|
|||
private static transient SparkSession instance = null;
|
||||
public static SparkSession getInstance(SparkConf sparkConf) {
|
||||
if (instance == null) {
|
||||
instance = SparkSession.builder().config(sparkConf).getOrCreate();
|
||||
instance = SparkSession
|
||||
.builder()
|
||||
.config(sparkConf)
|
||||
.getOrCreate();
|
||||
}
|
||||
return instance;
|
||||
}
|
||||
|
|
|
@ -30,7 +30,10 @@ from pyspark.sql import Row
|
|||
# $example off$
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("ALSExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("ALSExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
lines = spark.read.text("data/mllib/als/sample_movielens_ratings.txt").rdd
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import Binarizer
|
|||
# $example off$
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("BinarizerExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("BinarizerExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
continuousDataFrame = spark.createDataFrame([
|
||||
|
|
|
@ -30,7 +30,10 @@ A simple example demonstrating a bisecting k-means clustering.
|
|||
"""
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("PythonBisectingKMeansExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("PythonBisectingKMeansExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
data = spark.read.text("data/mllib/kmeans_data.txt").rdd
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import Bucketizer
|
|||
# $example off$
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("BucketizerExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("BucketizerExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
splits = [-float("inf"), -0.5, 0.0, 0.5, float("inf")]
|
||||
|
|
|
@ -24,7 +24,10 @@ from pyspark.mllib.linalg import Vectors
|
|||
# $example off$
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("ChiSqSelectorExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("ChiSqSelectorExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
df = spark.createDataFrame([
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import CountVectorizer
|
|||
# $example off$
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("CountVectorizerExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("CountVectorizerExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Input data: Each row is a bag of words with a ID.
|
||||
|
|
|
@ -35,7 +35,10 @@ Run with:
|
|||
"""
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("CrossValidatorExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("CrossValidatorExample")\
|
||||
.getOrCreate()
|
||||
# $example on$
|
||||
# Prepare training documents, which are labeled.
|
||||
training = spark.createDataFrame([
|
||||
|
|
|
@ -33,7 +33,10 @@ if __name__ == "__main__":
|
|||
if len(sys.argv) > 2:
|
||||
print("Usage: dataframe_example.py <libsvm file>", file=sys.stderr)
|
||||
exit(-1)
|
||||
spark = SparkSession.builder.appName("DataFrameExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("DataFrameExample")\
|
||||
.getOrCreate()
|
||||
if len(sys.argv) == 2:
|
||||
input = sys.argv[1]
|
||||
else:
|
||||
|
|
|
@ -24,7 +24,10 @@ from pyspark.mllib.linalg import Vectors
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("DCTExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("DCTExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
df = spark.createDataFrame([
|
||||
|
|
|
@ -29,7 +29,10 @@ from pyspark.ml.evaluation import MulticlassClassificationEvaluator
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("decision_tree_classification_example").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("decision_tree_classification_example")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Load the data stored in LIBSVM format as a DataFrame.
|
||||
|
|
|
@ -29,7 +29,10 @@ from pyspark.ml.evaluation import RegressionEvaluator
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("decision_tree_classification_example").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("decision_tree_classification_example")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Load the data stored in LIBSVM format as a DataFrame.
|
||||
|
|
|
@ -24,7 +24,10 @@ from pyspark.mllib.linalg import Vectors
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("ElementwiseProductExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("ElementwiseProductExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
data = [(Vectors.dense([1.0, 2.0, 3.0]),), (Vectors.dense([4.0, 5.0, 6.0]),)]
|
||||
|
|
|
@ -26,7 +26,10 @@ from pyspark.ml.classification import LogisticRegression
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("EstimatorTransformerParamExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("EstimatorTransformerParamExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Prepare training data from a list of (label, features) tuples.
|
||||
|
|
|
@ -29,7 +29,10 @@ from pyspark.ml.evaluation import MulticlassClassificationEvaluator
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("gradient_boosted_tree_classifier_example").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("gradient_boosted_tree_classifier_example")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Load and parse the data file, converting it to a DataFrame.
|
||||
|
|
|
@ -29,7 +29,10 @@ from pyspark.ml.evaluation import RegressionEvaluator
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("gradient_boosted_tree_regressor_example").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("gradient_boosted_tree_regressor_example")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Load and parse the data file, converting it to a DataFrame.
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import IndexToString, StringIndexer
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("IndexToStringExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("IndexToStringExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
df = spark.createDataFrame(
|
||||
|
|
|
@ -49,7 +49,10 @@ if __name__ == "__main__":
|
|||
path = sys.argv[1]
|
||||
k = sys.argv[2]
|
||||
|
||||
spark = SparkSession.builder.appName("PythonKMeansExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("PythonKMeansExample")\
|
||||
.getOrCreate()
|
||||
|
||||
lines = spark.read.text(path).rdd
|
||||
data = lines.map(parseVector)
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.regression import LinearRegression
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("LinearRegressionWithElasticNet").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("LinearRegressionWithElasticNet")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Load training data
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.classification import LogisticRegression
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("LogisticRegressionWithElasticNet").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("LogisticRegressionWithElasticNet")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Load training data
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import MaxAbsScaler
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("MaxAbsScalerExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("MaxAbsScalerExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import MinMaxScaler
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("MinMaxScalerExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("MinMaxScalerExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import NGram
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("NGramExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("NGramExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
wordDataFrame = spark.createDataFrame([
|
||||
|
|
|
@ -24,7 +24,10 @@ from pyspark.ml.evaluation import MulticlassClassificationEvaluator
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("naive_bayes_example").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("naive_bayes_example")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Load training data
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import Normalizer
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("NormalizerExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("NormalizerExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import OneHotEncoder, StringIndexer
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("OneHotEncoderExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("OneHotEncoderExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
df = spark.createDataFrame([
|
||||
|
|
|
@ -24,7 +24,10 @@ from pyspark.mllib.linalg import Vectors
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("PCAExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("PCAExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),),
|
||||
|
|
|
@ -27,7 +27,10 @@ from pyspark.ml.feature import HashingTF, Tokenizer
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("PipelineExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("PipelineExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Prepare training documents from a list of (id, text, label) tuples.
|
||||
|
|
|
@ -24,7 +24,10 @@ from pyspark.mllib.linalg import Vectors
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("PolynomialExpansionExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("PolynomialExpansionExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
df = spark\
|
||||
|
|
|
@ -29,7 +29,10 @@ from pyspark.ml.evaluation import MulticlassClassificationEvaluator
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("random_forest_classifier_example").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("random_forest_classifier_example")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Load and parse the data file, converting it to a DataFrame.
|
||||
|
|
|
@ -29,7 +29,10 @@ from pyspark.ml.evaluation import RegressionEvaluator
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("random_forest_regressor_example").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("random_forest_regressor_example")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Load and parse the data file, converting it to a DataFrame.
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import RFormula
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("RFormulaExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("RFormulaExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
dataset = spark.createDataFrame(
|
||||
|
|
|
@ -33,7 +33,10 @@ pipeline in Python. Run with:
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("SimpleTextClassificationPipeline").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("SimpleTextClassificationPipeline")\
|
||||
.getOrCreate()
|
||||
|
||||
# Prepare training documents, which are labeled.
|
||||
training = spark.createDataFrame([
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import SQLTransformer
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("SQLTransformerExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("SQLTransformerExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
df = spark.createDataFrame([
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import StandardScaler
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("StandardScalerExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("StandardScalerExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import StopWordsRemover
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("StopWordsRemoverExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("StopWordsRemoverExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
sentenceData = spark.createDataFrame([
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import StringIndexer
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("StringIndexerExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("StringIndexerExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
df = spark.createDataFrame(
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import HashingTF, IDF, Tokenizer
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("TfIdfExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("TfIdfExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
sentenceData = spark.createDataFrame([
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import Tokenizer, RegexTokenizer
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("TokenizerExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("TokenizerExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
sentenceDataFrame = spark.createDataFrame([
|
||||
|
|
|
@ -31,7 +31,10 @@ Run with:
|
|||
"""
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("TrainValidationSplit").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("TrainValidationSplit")\
|
||||
.getOrCreate()
|
||||
# $example on$
|
||||
# Prepare training and test data.
|
||||
data = spark.read.format("libsvm")\
|
||||
|
|
|
@ -24,7 +24,10 @@ from pyspark.ml.feature import VectorAssembler
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("VectorAssemblerExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("VectorAssemblerExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
dataset = spark.createDataFrame(
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import VectorIndexer
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("VectorIndexerExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("VectorIndexerExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
|
||||
|
|
|
@ -25,7 +25,10 @@ from pyspark.sql.types import Row
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("VectorSlicerExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("VectorSlicerExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
df = spark.createDataFrame([
|
||||
|
|
|
@ -23,7 +23,10 @@ from pyspark.ml.feature import Word2Vec
|
|||
from pyspark.sql import SparkSession
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("Word2VecExample").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("Word2VecExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Input data: Each row is a bag of words from a sentence or document.
|
||||
|
|
|
@ -18,20 +18,25 @@
|
|||
Binary Classification Metrics Example.
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from pyspark import SparkContext
|
||||
from pyspark.sql import SparkSession
|
||||
# $example on$
|
||||
from pyspark.mllib.classification import LogisticRegressionWithLBFGS
|
||||
from pyspark.mllib.evaluation import BinaryClassificationMetrics
|
||||
from pyspark.mllib.util import MLUtils
|
||||
from pyspark.mllib.regression import LabeledPoint
|
||||
# $example off$
|
||||
|
||||
if __name__ == "__main__":
|
||||
sc = SparkContext(appName="BinaryClassificationMetricsExample")
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("BinaryClassificationMetricsExample")\
|
||||
.getOrCreate()
|
||||
|
||||
# $example on$
|
||||
# Several of the methods available in scala are currently missing from pyspark
|
||||
# Load training data in LIBSVM format
|
||||
data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_binary_classification_data.txt")
|
||||
data = spark\
|
||||
.read.format("libsvm").load("data/mllib/sample_binary_classification_data.txt")\
|
||||
.rdd.map(lambda row: LabeledPoint(row[0], row[1]))
|
||||
|
||||
# Split data into training (60%) and test (40%)
|
||||
training, test = data.randomSplit([0.6, 0.4], seed=11L)
|
||||
|
@ -53,4 +58,4 @@ if __name__ == "__main__":
|
|||
print("Area under ROC = %s" % metrics.areaUnderROC)
|
||||
# $example off$
|
||||
|
||||
sc.stop()
|
||||
spark.stop()
|
||||
|
|
|
@ -25,7 +25,10 @@ from pyspark.sql.types import Row, StructField, StructType, StringType, IntegerT
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
spark = SparkSession.builder.appName("PythonSQL").getOrCreate()
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName("PythonSQL")\
|
||||
.getOrCreate()
|
||||
|
||||
# A list of Rows. Infer schema from the first row, create a DataFrame and print the schema
|
||||
rows = [Row(name="John", age=19), Row(name="Smith", age=23), Row(name="Sarah", age=18)]
|
||||
|
|
|
@ -38,8 +38,10 @@ from pyspark.sql import Row, SparkSession
|
|||
|
||||
def getSparkSessionInstance(sparkConf):
|
||||
if ('sparkSessionSingletonInstance' not in globals()):
|
||||
globals()['sparkSessionSingletonInstance'] =\
|
||||
SparkSession.builder.config(conf=sparkConf).getOrCreate()
|
||||
globals()['sparkSessionSingletonInstance'] = SparkSession\
|
||||
.builder\
|
||||
.config(conf=sparkConf)\
|
||||
.getOrCreate()
|
||||
return globals()['sparkSessionSingletonInstance']
|
||||
|
||||
|
||||
|
|
|
@ -30,7 +30,10 @@ import org.apache.spark.sql.SparkSession
|
|||
object AFTSurvivalRegressionExample {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val spark = SparkSession.builder.appName("AFTSurvivalRegressionExample").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName("AFTSurvivalRegressionExample")
|
||||
.getOrCreate()
|
||||
|
||||
// $example on$
|
||||
val training = spark.createDataFrame(Seq(
|
||||
|
|
|
@ -42,7 +42,10 @@ object ALSExample {
|
|||
// $example off$
|
||||
|
||||
def main(args: Array[String]) {
|
||||
val spark = SparkSession.builder.appName("ALSExample").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName("ALSExample")
|
||||
.getOrCreate()
|
||||
import spark.implicits._
|
||||
|
||||
// $example on$
|
||||
|
|
|
@ -25,7 +25,10 @@ import org.apache.spark.sql.{DataFrame, SparkSession}
|
|||
|
||||
object BinarizerExample {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val spark = SparkSession.builder.appName("BinarizerExample").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName("BinarizerExample")
|
||||
.getOrCreate()
|
||||
// $example on$
|
||||
val data = Array((0, 0.1), (1, 0.8), (2, 0.2))
|
||||
val dataFrame: DataFrame = spark.createDataFrame(data).toDF("label", "feature")
|
||||
|
|
|
@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession
|
|||
|
||||
object BucketizerExample {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val spark = SparkSession.builder.appName("BucketizerExample").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName("BucketizerExample")
|
||||
.getOrCreate()
|
||||
|
||||
// $example on$
|
||||
val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity)
|
||||
|
|
|
@ -26,7 +26,10 @@ import org.apache.spark.sql.SparkSession
|
|||
|
||||
object ChiSqSelectorExample {
|
||||
def main(args: Array[String]) {
|
||||
val spark = SparkSession.builder.appName("ChiSqSelectorExample").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName("ChiSqSelectorExample")
|
||||
.getOrCreate()
|
||||
import spark.implicits._
|
||||
|
||||
// $example on$
|
||||
|
|
|
@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession
|
|||
|
||||
object CountVectorizerExample {
|
||||
def main(args: Array[String]) {
|
||||
val spark = SparkSession.builder.appName("CounterVectorizerExample").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName("CounterVectorizerExample")
|
||||
.getOrCreate()
|
||||
|
||||
// $example on$
|
||||
val df = spark.createDataFrame(Seq(
|
||||
|
|
|
@ -26,7 +26,10 @@ import org.apache.spark.sql.SparkSession
|
|||
|
||||
object DCTExample {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val spark = SparkSession.builder.appName("DCTExample").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName("DCTExample")
|
||||
.getOrCreate()
|
||||
|
||||
// $example on$
|
||||
val data = Seq(
|
||||
|
|
|
@ -61,7 +61,10 @@ object DataFrameExample {
|
|||
}
|
||||
|
||||
def run(params: Params) {
|
||||
val spark = SparkSession.builder.appName(s"DataFrameExample with $params").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName(s"DataFrameExample with $params")
|
||||
.getOrCreate()
|
||||
|
||||
// Load input data
|
||||
println(s"Loading LIBSVM file with UDT from ${params.input}.")
|
||||
|
|
|
@ -29,7 +29,10 @@ import org.apache.spark.sql.SparkSession
|
|||
|
||||
object DecisionTreeClassificationExample {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val spark = SparkSession.builder.appName("DecisionTreeClassificationExample").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName("DecisionTreeClassificationExample")
|
||||
.getOrCreate()
|
||||
// $example on$
|
||||
// Load the data stored in LIBSVM format as a DataFrame.
|
||||
val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
|
||||
|
|
|
@ -167,7 +167,9 @@ object DecisionTreeExample {
|
|||
testInput: String,
|
||||
algo: String,
|
||||
fracTest: Double): (DataFrame, DataFrame) = {
|
||||
val spark = SparkSession.builder.getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.getOrCreate()
|
||||
|
||||
// Load training data
|
||||
val origExamples: DataFrame = loadData(spark, input, dataFormat)
|
||||
|
|
|
@ -29,7 +29,10 @@ import org.apache.spark.sql.SparkSession
|
|||
|
||||
object DecisionTreeRegressionExample {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val spark = SparkSession.builder.appName("DecisionTreeRegressionExample").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName("DecisionTreeRegressionExample")
|
||||
.getOrCreate()
|
||||
|
||||
// $example on$
|
||||
// Load the data stored in LIBSVM format as a DataFrame.
|
||||
|
|
|
@ -37,7 +37,10 @@ import org.apache.spark.sql.{Dataset, Row, SparkSession}
|
|||
object DeveloperApiExample {
|
||||
|
||||
def main(args: Array[String]) {
|
||||
val spark = SparkSession.builder.appName("DeveloperApiExample").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName("DeveloperApiExample")
|
||||
.getOrCreate()
|
||||
import spark.implicits._
|
||||
|
||||
// Prepare training data.
|
||||
|
|
|
@ -26,7 +26,10 @@ import org.apache.spark.sql.SparkSession
|
|||
|
||||
object ElementwiseProductExample {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val spark = SparkSession.builder.appName("ElementwiseProductExample").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName("ElementwiseProductExample")
|
||||
.getOrCreate()
|
||||
|
||||
// $example on$
|
||||
// Create some vector data; also works for sparse vectors
|
||||
|
|
|
@ -29,7 +29,10 @@ import org.apache.spark.sql.SparkSession
|
|||
object EstimatorTransformerParamExample {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val spark = SparkSession.builder.appName("EstimatorTransformerParamExample").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName("EstimatorTransformerParamExample")
|
||||
.getOrCreate()
|
||||
|
||||
// $example on$
|
||||
// Prepare training data from a list of (label, features) tuples.
|
||||
|
|
|
@ -28,7 +28,10 @@ import org.apache.spark.sql.SparkSession
|
|||
|
||||
object GradientBoostedTreeClassifierExample {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val spark = SparkSession.builder.appName("GradientBoostedTreeClassifierExample").getOrCreate()
|
||||
val spark = SparkSession
|
||||
.builder
|
||||
.appName("GradientBoostedTreeClassifierExample")
|
||||
.getOrCreate()
|
||||
|
||||
// $example on$
|
||||
// Load and parse the data file, converting it to a DataFrame.
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue