Merge branch 'master' of https://github.com/mesos/spark into bootstrap-update
This commit is contained in:
commit
5dae283996
|
@ -62,43 +62,31 @@
|
|||
<groupId>org.spark-project</groupId>
|
||||
<artifactId>spark-core</artifactId>
|
||||
<classifier>${classifier.name}</classifier>
|
||||
<version>0.8.0-SNAPSHOT</version>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.spark-project</groupId>
|
||||
<artifactId>spark-bagel</artifactId>
|
||||
<classifier>${classifier.name}</classifier>
|
||||
<version>0.8.0-SNAPSHOT</version>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.spark-project</groupId>
|
||||
<artifactId>spark-examples</artifactId>
|
||||
<artifactId>spark-mllib</artifactId>
|
||||
<classifier>${classifier.name}</classifier>
|
||||
<version>0.8.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.spark-project</groupId>
|
||||
<artifactId>spark-examples</artifactId>
|
||||
<classifier>javadoc</classifier>
|
||||
<version>0.8.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.spark-project</groupId>
|
||||
<artifactId>spark-examples</artifactId>
|
||||
<classifier>sources</classifier>
|
||||
<version>0.8.0-SNAPSHOT</version>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.spark-project</groupId>
|
||||
<artifactId>spark-repl</artifactId>
|
||||
<classifier>${classifier.name}</classifier>
|
||||
<version>0.8.0-SNAPSHOT</version>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.spark-project</groupId>
|
||||
<artifactId>spark-streaming</artifactId>
|
||||
<classifier>${classifier.name}</classifier>
|
||||
<version>0.8.0-SNAPSHOT</version>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -49,7 +49,7 @@
|
|||
<include>org.spark-project:*:jar</include>
|
||||
</includes>
|
||||
<excludes>
|
||||
<exclude>org.spark-project:spark-dist:jar</exclude>
|
||||
<exclude>org.spark-project:spark-assembly:jar</exclude>
|
||||
</excludes>
|
||||
</dependencySet>
|
||||
<dependencySet>
|
||||
|
|
|
@ -25,17 +25,25 @@ import akka.dispatch.Await
|
|||
import akka.pattern.ask
|
||||
import akka.util.duration._
|
||||
|
||||
import net.liftweb.json.JsonAST.JValue
|
||||
|
||||
import spark.Utils
|
||||
import spark.deploy.DeployWebUI
|
||||
import spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
|
||||
import spark.deploy.JsonProtocol
|
||||
import spark.deploy.master.{ApplicationInfo, WorkerInfo}
|
||||
import spark.ui.UIUtils
|
||||
|
||||
|
||||
private[spark] class IndexPage(parent: MasterWebUI) {
|
||||
val master = parent.master
|
||||
implicit val timeout = parent.timeout
|
||||
|
||||
def renderJson(request: HttpServletRequest): JValue = {
|
||||
val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
|
||||
val state = Await.result(stateFuture, 30 seconds)
|
||||
JsonProtocol.writeMasterState(state)
|
||||
}
|
||||
|
||||
/** Index view listing applications and executors */
|
||||
def render(request: HttpServletRequest): Seq[Node] = {
|
||||
val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
|
||||
|
|
|
@ -61,6 +61,7 @@ class MasterWebUI(val master: ActorRef, requestedPort: Int) extends Logging {
|
|||
("/static", createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR)),
|
||||
("/app/json", (request: HttpServletRequest) => applicationPage.renderJson(request)),
|
||||
("/app", (request: HttpServletRequest) => applicationPage.render(request)),
|
||||
("/json", (request: HttpServletRequest) => indexPage.renderJson(request)),
|
||||
("*", (request: HttpServletRequest) => indexPage.render(request))
|
||||
)
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ private[spark] class ClusterTaskSetManager(sched: ClusterScheduler, val taskSet:
|
|||
val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toDouble
|
||||
|
||||
// Maximum times a task is allowed to fail before failing the job
|
||||
val MAX_TASK_FAILURES = 4
|
||||
val MAX_TASK_FAILURES = System.getProperty("spark.task.maxFailures", "4").toInt
|
||||
|
||||
// Quantile of tasks at which to start speculation
|
||||
val SPECULATION_QUANTILE = System.getProperty("spark.speculation.quantile", "0.75").toDouble
|
||||
|
|
|
@ -74,7 +74,6 @@ private[spark] object UIUtils {
|
|||
</ul>
|
||||
<ul id="infolist">
|
||||
<li>Application: <strong>{sc.appName}</strong></li>
|
||||
<li>Master: <strong>{sc.master}</strong></li>
|
||||
<li>Executors: <strong>{sc.getExecutorStorageStatus.size}</strong></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
@ -117,9 +116,9 @@ private[spark] object UIUtils {
|
|||
<img src="/static/spark_logo.png" />
|
||||
</div>
|
||||
<div class="span10">
|
||||
<h1 style="vertical-align: bottom; margin-top: 40px; display: inline-block;">
|
||||
<h3 style="vertical-align: bottom; margin-top: 40px; display: inline-block;">
|
||||
{title}
|
||||
</h1>
|
||||
</h3>
|
||||
</div>
|
||||
</div>
|
||||
{content}
|
||||
|
|
|
@ -310,6 +310,14 @@ Apart from these, the following properties are also available, and may be useful
|
|||
Duration (milliseconds) of how long to batch new objects coming from network receivers.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>spark.task.maxFailures</td>
|
||||
<td>4</td>
|
||||
<td>
|
||||
Number of individual task failures before giving up on the job.
|
||||
Should be greater than or equal to 1. Number of allowed retries = this value - 1.
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
</table>
|
||||
|
||||
|
|
|
@ -315,14 +315,15 @@ object KMeans {
|
|||
}
|
||||
|
||||
def main(args: Array[String]) {
|
||||
if (args.length != 4) {
|
||||
println("Usage: KMeans <master> <input_file> <k> <max_iterations>")
|
||||
if (args.length < 4) {
|
||||
println("Usage: KMeans <master> <input_file> <k> <max_iterations> [<runs>]")
|
||||
System.exit(1)
|
||||
}
|
||||
val (master, inputFile, k, iters) = (args(0), args(1), args(2).toInt, args(3).toInt)
|
||||
val runs = if (args.length >= 5) args(4).toInt else 1
|
||||
val sc = new SparkContext(master, "KMeans")
|
||||
val data = sc.textFile(inputFile).map(line => line.split(' ').map(_.toDouble))
|
||||
val model = KMeans.train(data, k, iters)
|
||||
val data = sc.textFile(inputFile).map(line => line.split(' ').map(_.toDouble)).cache()
|
||||
val model = KMeans.train(data, k, iters, runs)
|
||||
val cost = model.computeCost(data)
|
||||
println("Cluster centers:")
|
||||
for (c <- model.clusterCenters) {
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package spark.mllib.util
|
||||
|
||||
import scala.util.Random
|
||||
|
||||
import spark.{RDD, SparkContext}
|
||||
|
||||
object KMeansDataGenerator {
|
||||
|
||||
/**
|
||||
* Generate an RDD containing test data for KMeans. This function chooses k cluster centers
|
||||
* from a d-dimensional Gaussian distribution scaled by factor r, then creates a Gaussian
|
||||
* cluster with scale 1 around each center.
|
||||
*
|
||||
* @param sc SparkContext to use for creating the RDD
|
||||
* @param numPoints Number of points that will be contained in the RDD
|
||||
* @param k Number of clusters
|
||||
* @param d Number of dimensions
|
||||
* @parak r Scaling factor for the distribution of the initial centers
|
||||
* @param numPartitions Number of partitions of the generated RDD; default 2
|
||||
*/
|
||||
def generateKMeansRDD(
|
||||
sc: SparkContext,
|
||||
numPoints: Int,
|
||||
k: Int,
|
||||
d: Int,
|
||||
r: Double,
|
||||
numPartitions: Int = 2)
|
||||
: RDD[Array[Double]] =
|
||||
{
|
||||
// First, generate some centers
|
||||
val rand = new Random(42)
|
||||
val centers = Array.fill(k)(Array.fill(d)(rand.nextGaussian() * r))
|
||||
// Then generate points around each center
|
||||
sc.parallelize(0 until numPoints, numPartitions).map { idx =>
|
||||
val center = centers(idx % k)
|
||||
val rand2 = new Random(42 + idx)
|
||||
Array.tabulate(d)(i => center(i) + rand2.nextGaussian())
|
||||
}
|
||||
}
|
||||
|
||||
def main(args: Array[String]) {
|
||||
if (args.length < 6) {
|
||||
println("Usage: KMeansGenerator " +
|
||||
"<master> <output_dir> <num_points> <k> <d> <r> [<num_partitions>]")
|
||||
System.exit(1)
|
||||
}
|
||||
|
||||
val sparkMaster = args(0)
|
||||
val outputPath = args(1)
|
||||
val numPoints = args(2).toInt
|
||||
val k = args(3).toInt
|
||||
val d = args(4).toInt
|
||||
val r = args(5).toDouble
|
||||
val parts = if (args.length >= 7) args(6).toInt else 2
|
||||
|
||||
val sc = new SparkContext(sparkMaster, "KMeansDataGenerator")
|
||||
val data = generateKMeansRDD(sc, numPoints, k, d, r, parts)
|
||||
data.map(_.mkString(" ")).saveAsTextFile(outputPath)
|
||||
|
||||
System.exit(0)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in a new issue