Merge branch 'master' into collection_accumulators
Conflicts: core/src/test/scala/spark/AccumulatorSuite.scala
This commit is contained in:
commit
1490d09b5d
|
@ -17,15 +17,14 @@ Spark requires Scala 2.9.1. This version has been tested with 2.9.1.final.
|
||||||
The project is built using Simple Build Tool (SBT), which is packaged with it.
|
The project is built using Simple Build Tool (SBT), which is packaged with it.
|
||||||
To build Spark and its example programs, run:
|
To build Spark and its example programs, run:
|
||||||
|
|
||||||
sbt/sbt compile
|
sbt/sbt package
|
||||||
|
|
||||||
To run Spark, you will need to have Scala's bin in your `PATH`, or you
|
To run Spark, you will need to have Scala's bin in your `PATH`, or you
|
||||||
will need to set the `SCALA_HOME` environment variable to point to where
|
will need to set the `SCALA_HOME` environment variable to point to where
|
||||||
you've installed Scala. Scala must be accessible through one of these
|
you've installed Scala. Scala must be accessible through one of these
|
||||||
methods on Mesos slave nodes as well as on the master.
|
methods on Mesos slave nodes as well as on the master.
|
||||||
|
|
||||||
To run one of the examples, first run `sbt/sbt package` to create a JAR with
|
To run one of the examples, first run `sbt/sbt package` to build them. Then use `./run <class> <params>`. For example:
|
||||||
the example classes. Then use `./run <class> <params>`. For example:
|
|
||||||
|
|
||||||
./run spark.examples.SparkLR local[2]
|
./run spark.examples.SparkLR local[2]
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,16 @@ class Accumulable[T,R] (
|
||||||
else throw new UnsupportedOperationException("Can't use read value in task")
|
else throw new UnsupportedOperationException("Can't use read value in task")
|
||||||
}
|
}
|
||||||
|
|
||||||
private[spark] def localValue = value_
|
/**
|
||||||
|
* Get the current value of this accumulator from within a task.
|
||||||
|
*
|
||||||
|
* This is NOT the global value of the accumulator. To get the global value after a
|
||||||
|
* completed operation on the dataset, call `value`.
|
||||||
|
*
|
||||||
|
* The typical use of this method is to directly mutate the local value, eg., to add
|
||||||
|
* an element to a Set.
|
||||||
|
*/
|
||||||
|
def localValue = value_
|
||||||
|
|
||||||
def value_= (t: T) {
|
def value_= (t: T) {
|
||||||
if (!deserialized) value_ = t
|
if (!deserialized) value_ = t
|
||||||
|
@ -156,4 +165,4 @@ private object Accumulators {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -309,6 +309,7 @@ private trait DAGScheduler extends Scheduler with Logging {
|
||||||
// outputs on the node as dead.
|
// outputs on the node as dead.
|
||||||
case _ =>
|
case _ =>
|
||||||
// Non-fetch failure -- probably a bug in the job, so bail out
|
// Non-fetch failure -- probably a bug in the job, so bail out
|
||||||
|
eventQueues -= runId
|
||||||
throw new SparkException("Task failed: " + evt.task + ", reason: " + evt.reason)
|
throw new SparkException("Task failed: " + evt.task + ", reason: " + evt.reason)
|
||||||
// TODO: Cancel all tasks that are still running
|
// TODO: Cancel all tasks that are still running
|
||||||
}
|
}
|
||||||
|
|
|
@ -293,6 +293,9 @@ class SimpleJob(
|
||||||
if (numFailures(index) > MAX_TASK_FAILURES) {
|
if (numFailures(index) > MAX_TASK_FAILURES) {
|
||||||
logError("Task %d:%d failed more than %d times; aborting job".format(
|
logError("Task %d:%d failed more than %d times; aborting job".format(
|
||||||
jobId, index, MAX_TASK_FAILURES))
|
jobId, index, MAX_TASK_FAILURES))
|
||||||
|
val taskEndReason = ser.deserialize[TaskEndReason](
|
||||||
|
status.getData.toByteArray, getClass.getClassLoader)
|
||||||
|
sched.taskEnded(tasks(index), taskEndReason, null, null) // To make DAGScheduler stop
|
||||||
abort("Task %d failed more than %d times".format(index, MAX_TASK_FAILURES))
|
abort("Task %d failed more than %d times".format(index, MAX_TASK_FAILURES))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,9 +81,9 @@ class AccumulatorSuite extends FunSuite with ShouldMatchers {
|
||||||
for (nThreads <- List(1, 10)) {
|
for (nThreads <- List(1, 10)) {
|
||||||
//test single & multi-threaded
|
//test single & multi-threaded
|
||||||
val sc = new SparkContext("local[" + nThreads + "]", "test")
|
val sc = new SparkContext("local[" + nThreads + "]", "test")
|
||||||
val setAcc = sc.accumlableCollection(mutable.HashSet[Int]())
|
val setAcc = sc.accumulableCollection(mutable.HashSet[Int]())
|
||||||
val bufferAcc = sc.accumlableCollection(mutable.ArrayBuffer[Int]())
|
val bufferAcc = sc.accumulableCollection(mutable.ArrayBuffer[Int]())
|
||||||
val mapAcc = sc.accumlableCollection(mutable.HashMap[Int,String]())
|
val mapAcc = sc.accumulableCollection(mutable.HashMap[Int,String]())
|
||||||
val d = sc.parallelize( (1 to maxI) ++ (1 to maxI))
|
val d = sc.parallelize( (1 to maxI) ++ (1 to maxI))
|
||||||
d.foreach {
|
d.foreach {
|
||||||
x => {setAcc += x; bufferAcc += x; mapAcc += (x -> x.toString)}
|
x => {setAcc += x; bufferAcc += x; mapAcc += (x -> x.toString)}
|
||||||
|
@ -100,7 +100,21 @@ class AccumulatorSuite extends FunSuite with ShouldMatchers {
|
||||||
}
|
}
|
||||||
sc.stop()
|
sc.stop()
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
test ("localValue readable in tasks") {
|
||||||
|
import SetAccum._
|
||||||
|
val maxI = 1000
|
||||||
|
for (nThreads <- List(1, 10)) { //test single & multi-threaded
|
||||||
|
val sc = new SparkContext("local[" + nThreads + "]", "test")
|
||||||
|
val acc: Accumulable[mutable.Set[Any], Any] = sc.accumulable(new mutable.HashSet[Any]())
|
||||||
|
val groupedInts = (1 to (maxI/20)).map {x => (20 * (x - 1) to 20 * x).toSet}
|
||||||
|
val d = sc.parallelize(groupedInts)
|
||||||
|
d.foreach {
|
||||||
|
x => acc.localValue ++= x
|
||||||
|
}
|
||||||
|
acc.value should be ( (0 to maxI).toSet)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -58,7 +58,9 @@ def parse_args():
|
||||||
"WARNING: must be 64-bit; small instances won't work")
|
"WARNING: must be 64-bit; small instances won't work")
|
||||||
parser.add_option("-m", "--master-instance-type", default="",
|
parser.add_option("-m", "--master-instance-type", default="",
|
||||||
help="Master instance type (leave empty for same as instance-type)")
|
help="Master instance type (leave empty for same as instance-type)")
|
||||||
parser.add_option("-z", "--zone", default="us-east-1b",
|
parser.add_option("-r", "--region", default="us-east-1",
|
||||||
|
help="EC2 region zone to launch instances in")
|
||||||
|
parser.add_option("-z", "--zone", default="",
|
||||||
help="Availability zone to launch instances in")
|
help="Availability zone to launch instances in")
|
||||||
parser.add_option("-a", "--ami", default="latest",
|
parser.add_option("-a", "--ami", default="latest",
|
||||||
help="Amazon Machine Image ID to use, or 'latest' to use latest " +
|
help="Amazon Machine Image ID to use, or 'latest' to use latest " +
|
||||||
|
@ -438,7 +440,7 @@ def ssh(host, opts, command):
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
(opts, action, cluster_name) = parse_args()
|
(opts, action, cluster_name) = parse_args()
|
||||||
conn = boto.connect_ec2()
|
conn = boto.ec2.connect_to_region(opts.region)
|
||||||
|
|
||||||
# Select an AZ at random if it was not specified.
|
# Select an AZ at random if it was not specified.
|
||||||
if opts.zone == "":
|
if opts.zone == "":
|
||||||
|
|
Loading…
Reference in a new issue