[SPARK-16685] Remove audit-release scripts.

## What changes were proposed in this pull request?
This patch removes dev/audit-release. It was initially created to do basic release auditing. They have been unused by for the last one year+.

## How was this patch tested?
N/A

Author: Reynold Xin <rxin@databricks.com>

Closes #14342 from rxin/SPARK-16685.
This commit is contained in:
Reynold Xin 2016-07-25 20:03:54 +01:00 committed by Sean Owen
parent ad3708e783
commit dd784a8822
24 changed files with 0 additions and 1020 deletions

View file

@ -1,2 +0,0 @@
project/
spark_audit*

View file

@ -1,12 +0,0 @@
Test Application Builds
=======================
This directory includes test applications which are built when auditing releases. You can run them locally by setting appropriate environment variables.
```
$ cd sbt_app_core
$ SCALA_VERSION=2.11.7 \
SPARK_VERSION=1.0.0-SNAPSHOT \
SPARK_RELEASE_REPOSITORY=file:///home/patrick/.ivy2/local \
sbt run
```

View file

@ -1,236 +0,0 @@
#!/usr/bin/python
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Audits binary and maven artifacts for a Spark release.
# Requires GPG and Maven.
# usage:
# python audit_release.py
import os
import re
import shutil
import subprocess
import sys
import time
import urllib2
# Note: The following variables must be set before use!
RELEASE_URL = "http://people.apache.org/~andrewor14/spark-1.1.1-rc1/"
RELEASE_KEY = "XXXXXXXX" # Your 8-digit hex
RELEASE_REPOSITORY = "https://repository.apache.org/content/repositories/orgapachespark-1033"
RELEASE_VERSION = "1.1.1"
SCALA_VERSION = "2.11.7"
SCALA_BINARY_VERSION = "2.11"
# Do not set these
LOG_FILE_NAME = "spark_audit_%s" % time.strftime("%h_%m_%Y_%I_%M_%S")
LOG_FILE = open(LOG_FILE_NAME, 'w')
WORK_DIR = "/tmp/audit_%s" % int(time.time())
MAVEN_CMD = "mvn"
GPG_CMD = "gpg"
SBT_CMD = "sbt -Dsbt.log.noformat=true"
# Track failures to print them at the end
failures = []
# Log a message. Use sparingly because this flushes every write.
def log(msg):
LOG_FILE.write(msg + "\n")
LOG_FILE.flush()
def log_and_print(msg):
print msg
log(msg)
# Prompt the user to delete the scratch directory used
def clean_work_files():
response = raw_input("OK to delete scratch directory '%s'? (y/N) " % WORK_DIR)
if response == "y":
shutil.rmtree(WORK_DIR)
# Run the given command and log its output to the log file
def run_cmd(cmd, exit_on_failure=True):
log("Running command: %s" % cmd)
ret = subprocess.call(cmd, shell=True, stdout=LOG_FILE, stderr=LOG_FILE)
if ret != 0 and exit_on_failure:
log_and_print("Command failed: %s" % cmd)
clean_work_files()
sys.exit(-1)
return ret
def run_cmd_with_output(cmd):
log_and_print("Running command: %s" % cmd)
return subprocess.check_output(cmd, shell=True, stderr=LOG_FILE)
# Test if the given condition is successful
# If so, print the pass message; otherwise print the failure message
def test(cond, msg):
return passed(msg) if cond else failed(msg)
def passed(msg):
log_and_print("[PASSED] %s" % msg)
def failed(msg):
failures.append(msg)
log_and_print("[**FAILED**] %s" % msg)
def get_url(url):
return urllib2.urlopen(url).read()
# If the path exists, prompt the user to delete it
# If the resource is not deleted, abort
def ensure_path_not_present(path):
full_path = os.path.expanduser(path)
if os.path.exists(full_path):
print "Found %s locally." % full_path
response = raw_input("This can interfere with testing published artifacts. OK to delete? (y/N) ")
if response == "y":
shutil.rmtree(full_path)
else:
print "Abort."
sys.exit(-1)
log_and_print("|-------- Starting Spark audit tests for release %s --------|" % RELEASE_VERSION)
log_and_print("Log output can be found in %s" % LOG_FILE_NAME)
original_dir = os.getcwd()
# For each of these modules, we'll test an 'empty' application in sbt and
# maven that links against them. This will catch issues with messed up
# dependencies within those projects.
modules = [
"spark-core", "spark-mllib", "spark-streaming", "spark-repl",
"spark-graphx", "spark-streaming-flume", "spark-streaming-kafka-0-8",
"spark-catalyst", "spark-sql", "spark-hive", "spark-streaming-kinesis-asl"
]
modules = map(lambda m: "%s_%s" % (m, SCALA_BINARY_VERSION), modules)
# Check for directories that might interfere with tests
local_ivy_spark = "~/.ivy2/local/org.apache.spark"
cache_ivy_spark = "~/.ivy2/cache/org.apache.spark"
local_maven_kafka = "~/.m2/repository/org/apache/kafka"
local_maven_kafka = "~/.m2/repository/org/apache/spark"
map(ensure_path_not_present, [local_ivy_spark, cache_ivy_spark, local_maven_kafka])
# SBT build tests
log_and_print("==== Building SBT modules ====")
os.chdir("blank_sbt_build")
os.environ["SPARK_VERSION"] = RELEASE_VERSION
os.environ["SCALA_VERSION"] = SCALA_VERSION
os.environ["SPARK_RELEASE_REPOSITORY"] = RELEASE_REPOSITORY
os.environ["SPARK_AUDIT_MASTER"] = "local"
for module in modules:
log("==== Building module %s in SBT ====" % module)
os.environ["SPARK_MODULE"] = module
ret = run_cmd("%s clean update" % SBT_CMD, exit_on_failure=False)
test(ret == 0, "SBT build against '%s' module" % module)
os.chdir(original_dir)
# SBT application tests
log_and_print("==== Building SBT applications ====")
for app in ["sbt_app_core", "sbt_app_graphx", "sbt_app_streaming", "sbt_app_sql", "sbt_app_hive", "sbt_app_kinesis"]:
log("==== Building application %s in SBT ====" % app)
os.chdir(app)
ret = run_cmd("%s clean run" % SBT_CMD, exit_on_failure=False)
test(ret == 0, "SBT application (%s)" % app)
os.chdir(original_dir)
# Maven build tests
os.chdir("blank_maven_build")
log_and_print("==== Building Maven modules ====")
for module in modules:
log("==== Building module %s in maven ====" % module)
cmd = ('%s --update-snapshots -Dspark.release.repository="%s" -Dspark.version="%s" '
'-Dspark.module="%s" clean compile' %
(MAVEN_CMD, RELEASE_REPOSITORY, RELEASE_VERSION, module))
ret = run_cmd(cmd, exit_on_failure=False)
test(ret == 0, "maven build against '%s' module" % module)
os.chdir(original_dir)
# Maven application tests
log_and_print("==== Building Maven applications ====")
os.chdir("maven_app_core")
mvn_exec_cmd = ('%s --update-snapshots -Dspark.release.repository="%s" -Dspark.version="%s" '
'-Dscala.binary.version="%s" clean compile '
'exec:java -Dexec.mainClass="SimpleApp"' %
(MAVEN_CMD, RELEASE_REPOSITORY, RELEASE_VERSION, SCALA_BINARY_VERSION))
ret = run_cmd(mvn_exec_cmd, exit_on_failure=False)
test(ret == 0, "maven application (core)")
os.chdir(original_dir)
# Binary artifact tests
if os.path.exists(WORK_DIR):
print "Working directory '%s' already exists" % WORK_DIR
sys.exit(-1)
os.mkdir(WORK_DIR)
os.chdir(WORK_DIR)
index_page = get_url(RELEASE_URL)
artifact_regex = r = re.compile("<a href=\"(.*.tgz)\">")
artifacts = r.findall(index_page)
# Verify artifact integrity
for artifact in artifacts:
log_and_print("==== Verifying download integrity for artifact: %s ====" % artifact)
artifact_url = "%s/%s" % (RELEASE_URL, artifact)
key_file = "%s.asc" % artifact
run_cmd("wget %s" % artifact_url)
run_cmd("wget %s/%s" % (RELEASE_URL, key_file))
run_cmd("wget %s%s" % (artifact_url, ".sha"))
# Verify signature
run_cmd("%s --keyserver pgp.mit.edu --recv-key %s" % (GPG_CMD, RELEASE_KEY))
run_cmd("%s %s" % (GPG_CMD, key_file))
passed("Artifact signature verified.")
# Verify md5
my_md5 = run_cmd_with_output("%s --print-md MD5 %s" % (GPG_CMD, artifact)).strip()
release_md5 = get_url("%s.md5" % artifact_url).strip()
test(my_md5 == release_md5, "Artifact MD5 verified.")
# Verify sha
my_sha = run_cmd_with_output("%s --print-md SHA512 %s" % (GPG_CMD, artifact)).strip()
release_sha = get_url("%s.sha" % artifact_url).strip()
test(my_sha == release_sha, "Artifact SHA verified.")
# Verify Apache required files
dir_name = artifact.replace(".tgz", "")
run_cmd("tar xvzf %s" % artifact)
base_files = os.listdir(dir_name)
test("CHANGES.txt" in base_files, "Tarball contains CHANGES.txt file")
test("NOTICE" in base_files, "Tarball contains NOTICE file")
test("LICENSE" in base_files, "Tarball contains LICENSE file")
os.chdir(WORK_DIR)
# Report result
log_and_print("\n")
if len(failures) == 0:
log_and_print("*** ALL TESTS PASSED ***")
else:
log_and_print("XXXXX SOME TESTS DID NOT PASS XXXXX")
for f in failures:
log_and_print(" %s" % f)
os.chdir(original_dir)
# Clean up
clean_work_files()
log_and_print("|-------- Spark release audit complete --------|")

View file

@ -1,43 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project>
<groupId>spark.audit</groupId>
<artifactId>spark-audit</artifactId>
<modelVersion>4.0.0</modelVersion>
<name>Spark Release Auditor</name>
<packaging>jar</packaging>
<version>1.0</version>
<repositories>
<repository>
<id>Spray.cc repository</id>
<url>http://repo.spray.cc</url>
</repository>
<repository>
<id>Spark Staging Repo</id>
<url>${spark.release.repository}</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>${spark.module}</artifactId>
<version>${spark.version}</version>
</dependency>
</dependencies>
</project>

View file

@ -1,30 +0,0 @@
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
name := "Spark Release Auditor"
version := "1.0"
scalaVersion := System.getenv.get("SCALA_VERSION")
libraryDependencies += "org.apache.spark" % System.getenv.get("SPARK_MODULE") % System.getenv.get("SPARK_VERSION")
resolvers ++= Seq(
"Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
"Eclipse Paho Repository" at "https://repo.eclipse.org/content/repositories/paho-releases/",
"Maven Repository" at "http://repo1.maven.org/maven2/",
"Spray Repository" at "http://repo.spray.cc/")

View file

@ -1,8 +0,0 @@
a
b
c
d
a
b
c
d

View file

@ -1,52 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project>
<groupId>spark.audit</groupId>
<artifactId>spark-audit</artifactId>
<modelVersion>4.0.0</modelVersion>
<name>Simple Project</name>
<packaging>jar</packaging>
<version>1.0</version>
<repositories>
<repository>
<id>Spray.cc repository</id>
<url>http://repo.spray.cc</url>
</repository>
<repository>
<id>Spark Staging Repo</id>
<url>${spark.release.repository}</url>
</repository>
</repositories>
<dependencies>
<dependency> <!-- Spark dependency -->
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
</dependencies>
<!-- Makes sure we get a fairly recent compiler plugin. -->
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
</plugin>
</plugins>
</build>
</project>

View file

@ -1,42 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.spark.api.java.*;
import org.apache.spark.api.java.function.Function;
public class SimpleApp {
public static void main(String[] args) {
String logFile = "input.txt";
JavaSparkContext sc = new JavaSparkContext("local", "Simple App");
JavaRDD<String> logData = sc.textFile(logFile).cache();
long numAs = logData.filter(new Function<String, Boolean>() {
public Boolean call(String s) { return s.contains("a"); }
}).count();
long numBs = logData.filter(new Function<String, Boolean>() {
public Boolean call(String s) { return s.contains("b"); }
}).count();
if (numAs != 2 || numBs != 2) {
System.out.println("Failed to parse log files with Spark");
System.exit(-1);
}
System.out.println("Test succeeded");
sc.stop();
}
}

View file

@ -1,28 +0,0 @@
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
name := "Simple Project"
version := "1.0"
scalaVersion := System.getenv.get("SCALA_VERSION")
libraryDependencies += "org.apache.spark" %% "spark-core" % System.getenv.get("SPARK_VERSION")
resolvers ++= Seq(
"Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
"Spray Repository" at "http://repo.spray.cc/")

View file

@ -1,8 +0,0 @@
a
b
c
d
a
b
c
d

View file

@ -1,63 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// scalastyle:off println
package main.scala
import scala.util.Try
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
object SimpleApp {
def main(args: Array[String]) {
val conf = sys.env.get("SPARK_AUDIT_MASTER") match {
case Some(master) => new SparkConf().setAppName("Simple Spark App").setMaster(master)
case None => new SparkConf().setAppName("Simple Spark App")
}
val logFile = "input.txt"
val sc = new SparkContext(conf)
val logData = sc.textFile(logFile, 2).cache()
val numAs = logData.filter(line => line.contains("a")).count()
val numBs = logData.filter(line => line.contains("b")).count()
if (numAs != 2 || numBs != 2) {
println("Failed to parse log files with Spark")
System.exit(-1)
}
// Regression test for SPARK-1167: Remove metrics-ganglia from default build due to LGPL issue
val foundConsole = Try(Class.forName("org.apache.spark.metrics.sink.ConsoleSink")).isSuccess
val foundGanglia = Try(Class.forName("org.apache.spark.metrics.sink.GangliaSink")).isSuccess
if (!foundConsole) {
println("Console sink not loaded via spark-core")
System.exit(-1)
}
if (foundGanglia) {
println("Ganglia sink was loaded via spark-core")
System.exit(-1)
}
// Remove kinesis from default build due to ASL license issue
val foundKinesis = Try(Class.forName("org.apache.spark.streaming.kinesis.KinesisUtils")).isSuccess
if (foundKinesis) {
println("Kinesis was loaded via spark-core")
System.exit(-1)
}
}
}
// scalastyle:on println

View file

@ -1,30 +0,0 @@
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
name := "Ganglia Test"
version := "1.0"
scalaVersion := System.getenv.get("SCALA_VERSION")
libraryDependencies += "org.apache.spark" %% "spark-core" % System.getenv.get("SPARK_VERSION")
libraryDependencies += "org.apache.spark" %% "spark-ganglia-lgpl" % System.getenv.get("SPARK_VERSION")
resolvers ++= Seq(
"Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
"Spray Repository" at "http://repo.spray.cc/")

View file

@ -1,41 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// scalastyle:off println
package main.scala
import scala.util.Try
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
object SimpleApp {
def main(args: Array[String]) {
// Regression test for SPARK-1167: Remove metrics-ganglia from default build due to LGPL issue
val foundConsole = Try(Class.forName("org.apache.spark.metrics.sink.ConsoleSink")).isSuccess
val foundGanglia = Try(Class.forName("org.apache.spark.metrics.sink.GangliaSink")).isSuccess
if (!foundConsole) {
println("Console sink not loaded via spark-core")
System.exit(-1)
}
if (!foundGanglia) {
println("Ganglia sink not loaded via spark-ganglia-lgpl")
System.exit(-1)
}
}
}
// scalastyle:on println

View file

@ -1,28 +0,0 @@
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
name := "Simple Project"
version := "1.0"
scalaVersion := System.getenv.get("SCALA_VERSION")
libraryDependencies += "org.apache.spark" %% "spark-graphx" % System.getenv.get("SPARK_VERSION")
resolvers ++= Seq(
"Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
"Spray Repository" at "http://repo.spray.cc/")

View file

@ -1,55 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// scalastyle:off println
package main.scala
import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.SparkContext._
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
object GraphXApp {
def main(args: Array[String]) {
val conf = sys.env.get("SPARK_AUDIT_MASTER") match {
case Some(master) => new SparkConf().setAppName("Simple GraphX App").setMaster(master)
case None => new SparkConf().setAppName("Simple Graphx App")
}
val sc = new SparkContext(conf)
SparkContext.jarOfClass(this.getClass).foreach(sc.addJar)
val users: RDD[(VertexId, (String, String))] =
sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
(5L, ("franklin", "prof")), (2L, ("istoica", "prof")),
(4L, ("peter", "student"))))
val relationships: RDD[Edge[String]] =
sc.parallelize(Array(Edge(3L, 7L, "collab"), Edge(5L, 3L, "advisor"),
Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi"),
Edge(4L, 0L, "student"), Edge(5L, 0L, "colleague")))
val defaultUser = ("John Doe", "Missing")
val graph = Graph(users, relationships, defaultUser)
// Notice that there is a user 0 (for which we have no information) connected to users
// 4 (peter) and 5 (franklin).
val triplets = graph.triplets.map(e => (e.srcAttr._1, e.dstAttr._1)).collect
if (!triplets.exists(_ == ("peter", "John Doe"))) {
println("Failed to run GraphX")
System.exit(-1)
}
println("Test succeeded")
}
}
// scalastyle:on println

View file

@ -1,29 +0,0 @@
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
name := "Simple Project"
version := "1.0"
scalaVersion := System.getenv.get("SCALA_VERSION")
libraryDependencies += "org.apache.spark" %% "spark-hive" % System.getenv.get("SPARK_VERSION")
resolvers ++= Seq(
"Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
"Maven Repository" at "http://repo1.maven.org/maven2/",
"Spray Repository" at "http://repo.spray.cc/")

View file

@ -1,9 +0,0 @@
0val_0
1val_1
2val_2
3val_3
4val_4
5val_5
6val_6
7val_7
9val_9

View file

@ -1,59 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// scalastyle:off println
package main.scala
import scala.collection.mutable.{ListBuffer, Queue}
import org.apache.spark.{SparkConf, SparkContext, SparkSession}
import org.apache.spark.rdd.RDD
case class Person(name: String, age: Int)
object SparkSqlExample {
def main(args: Array[String]) {
val conf = sys.env.get("SPARK_AUDIT_MASTER") match {
case Some(master) => new SparkConf().setAppName("Simple Sql App").setMaster(master)
case None => new SparkConf().setAppName("Simple Sql App")
}
val sc = new SparkContext(conf)
val sparkSession = SparkSession.builder
.enableHiveSupport()
.getOrCreate()
import sparkSession._
sql("DROP TABLE IF EXISTS src")
sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
sql("LOAD DATA LOCAL INPATH 'data.txt' INTO TABLE src")
val results = sql("FROM src SELECT key, value WHERE key >= 0 AND KEY < 5").collect()
results.foreach(println)
def test(f: => Boolean, failureMsg: String) = {
if (!f) {
println(failureMsg)
System.exit(-1)
}
}
test(results.size == 5, "Unexpected number of selected elements: " + results)
println("Test succeeded")
sc.stop()
}
}
// scalastyle:on println

View file

@ -1,28 +0,0 @@
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
name := "Kinesis Test"
version := "1.0"
scalaVersion := System.getenv.get("SCALA_VERSION")
libraryDependencies += "org.apache.spark" %% "spark-streaming-kinesis-asl" % System.getenv.get("SPARK_VERSION")
resolvers ++= Seq(
"Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
"Spray Repository" at "http://repo.spray.cc/")

View file

@ -1,35 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// scalastyle:off println
package main.scala
import scala.util.Try
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
object SimpleApp {
def main(args: Array[String]) {
val foundKinesis = Try(Class.forName("org.apache.spark.streaming.kinesis.KinesisUtils")).isSuccess
if (!foundKinesis) {
println("Kinesis not loaded via kinesis-asl")
System.exit(-1)
}
}
}
// scalastyle:on println

View file

@ -1,28 +0,0 @@
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
name := "Simple Project"
version := "1.0"
scalaVersion := System.getenv.get("SCALA_VERSION")
libraryDependencies += "org.apache.spark" %% "spark-sql" % System.getenv.get("SPARK_VERSION")
resolvers ++= Seq(
"Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
"Spray Repository" at "http://repo.spray.cc/")

View file

@ -1,61 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// scalastyle:off println
package main.scala
import scala.collection.mutable.{ListBuffer, Queue}
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
case class Person(name: String, age: Int)
object SparkSqlExample {
def main(args: Array[String]) {
val conf = sys.env.get("SPARK_AUDIT_MASTER") match {
case Some(master) => new SparkConf().setAppName("Simple Sql App").setMaster(master)
case None => new SparkConf().setAppName("Simple Sql App")
}
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
import sqlContext._
val people = sc.makeRDD(1 to 100, 10).map(x => Person(s"Name$x", x)).toDF()
people.createOrReplaceTempView("people")
val teenagers = sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
val teenagerNames = teenagers.map(t => "Name: " + t(0)).collect()
teenagerNames.foreach(println)
def test(f: => Boolean, failureMsg: String) = {
if (!f) {
println(failureMsg)
System.exit(-1)
}
}
test(teenagerNames.size == 7, "Unexpected number of selected elements: " + teenagerNames)
println("Test succeeded")
sc.stop()
}
}
// scalastyle:on println

View file

@ -1,28 +0,0 @@
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
name := "Simple Project"
version := "1.0"
scalaVersion := System.getenv.get("SCALA_VERSION")
libraryDependencies += "org.apache.spark" %% "spark-streaming" % System.getenv.get("SPARK_VERSION")
resolvers ++= Seq(
"Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
"Spray Repository" at "http://repo.spray.cc/")

View file

@ -1,65 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// scalastyle:off println
package main.scala
import scala.collection.mutable.{ListBuffer, Queue}
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming._
object SparkStreamingExample {
def main(args: Array[String]) {
val conf = sys.env.get("SPARK_AUDIT_MASTER") match {
case Some(master) => new SparkConf().setAppName("Simple Streaming App").setMaster(master)
case None => new SparkConf().setAppName("Simple Streaming App")
}
val ssc = new StreamingContext(conf, Seconds(1))
val seen = ListBuffer[RDD[Int]]()
val rdd1 = ssc.sparkContext.makeRDD(1 to 100, 10)
val rdd2 = ssc.sparkContext.makeRDD(1 to 1000, 10)
val rdd3 = ssc.sparkContext.makeRDD(1 to 10000, 10)
val queue = Queue(rdd1, rdd2, rdd3)
val stream = ssc.queueStream(queue)
stream.foreachRDD(rdd => seen += rdd)
ssc.start()
Thread.sleep(5000)
def test(f: => Boolean, failureMsg: String) = {
if (!f) {
println(failureMsg)
System.exit(-1)
}
}
val rddCounts = seen.map(rdd => rdd.count()).filter(_ > 0)
test(rddCounts.length == 3, "Did not collect three RDD's from stream")
test(rddCounts.toSet == Set(100, 1000, 10000), "Did not find expected streams")
println("Test succeeded")
ssc.stop()
}
}
// scalastyle:on println